In [23]:
import pandas as pd
import numpy as np
from nptdms import TdmsFile
from pathlib import Path
import os
import matplotlib.pyplot as plt

# Path to the directory containing TDMS files
tdms_dir = r'C:\Users\patry\OneDrive\Pulpit\testv2'

# List to store paths of TDMS files
tdms_list = []

# Walk through the directory and its subdirectories
for root, dirs, files in os.walk(tdms_dir):
    print(f"There are {len(dirs)} directories and {len(files)} file in {root}")
    
    for file in files:
        if file.endswith(".tdms"):
            # If the file ends with '.tdms', add its full path to the list
            tdms_list.append(os.path.join(root, file))

There are 0 directories and 1 file in C:\Users\patry\OneDrive\Pulpit\testv2


In [24]:
from scipy.interpolate import interp1d

# Create an empty list to store data
data_list = []

# Open tdms files and create a list with labels
for tdms_file_path in tdms_list:
    tdms_file = TdmsFile.read(tdms_file_path)
    first_letter = os.path.basename(tdms_file_path)[0]
    
    for group in tdms_file.groups():
        for channel in group.channels():
            data_len = np.shape(channel.data)[0]
            # Check conditions and append data to list
            if (data_len <= 1000000 and data_len >= 10000 and first_letter != 'z'):
                data_list.append({
                    #'anchor_ids': group.name,
                    'class': first_letter,
                    #'driveway': channel.name,
                    'excitation': channel.data,
                    'type_id': 1 if first_letter == 'd' else 0
                })

# Create DataFrame from the list of dictionaries
df = pd.DataFrame(data_list)

df_date = df.loc[df.index % 2 == 0]
df_magnitude = df.loc[df.index % 2 != 0]

In [25]:
# Upewnij się, że wszystkie wartości w kolumnie 'excitation' są listami lub tablicami
df_magnitude['excitation'] = df_magnitude['excitation'].apply(lambda x: x if isinstance(x, (list, np.ndarray)) else [x])

# Znajdź maksymalną długość danych w kolumnie 'excitation'
max_length = max(df_magnitude['excitation'].apply(len))
print(max_length)
# Zero-pad each signal to the maximum length
padded_signals = df_magnitude['excitation'].apply(lambda x: np.pad(x, (0, max_length - len(x)), 'mean')).values.tolist()

df_magnitude['excitation'] = padded_signals

760000


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_magnitude['excitation'] = df_magnitude['excitation'].apply(lambda x: x if isinstance(x, (list, np.ndarray)) else [x])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_magnitude['excitation'] = padded_signals


In [26]:
len(df_magnitude['excitation'].iloc[0])

760000

In [27]:
from sklearn.preprocessing import StandardScaler

# Standarizing data
data = np.array(df_magnitude['excitation'].tolist())
#print(len(data[0]))
scaler = StandardScaler()
scaler.fit(data)
normalized_data = scaler.transform(data)

df_magnitude['excitation'] = [list(row) for row in normalized_data]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_magnitude['excitation'] = [list(row) for row in normalized_data]


In [28]:
len(df_magnitude['excitation'].iloc[0])

760000

In [29]:
from torch.utils.data import Dataset
import torch
import numpy as np
from scipy.fft import fft, ifft

class Anchors_data(Dataset):
    def __init__(self, df):
        self.df = df

    def _compute_dft(self, signal):
        # Convert the excitation list of lists to a numpy array
        excitation_array = np.array(signal)
        
        # Ensure excitation_array is one-dimensional
        excitation_array = excitation_array.flatten()

        # Compute DFT
        dft_data = np.fft.fft(excitation_array)
        
        # Extract the positive frequencies, excluding the zero frequency component
        positive_frequencies = dft_data[2:(len(dft_data) // 2)]
        
        return positive_frequencies.real  # Return only the real part

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        signal = self.df['excitation'].iloc[idx]
        dft_data = self._compute_dft(signal)
        
        type_id = self.df['type_id'].iloc[idx]
        return torch.tensor(dft_data, dtype=torch.float32).unsqueeze(dim=0), torch.tensor(type_id, dtype=torch.long)

In [30]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

# Assuming df_magnitude is already defined
train_df, test_df = train_test_split(df_magnitude, test_size=0.3, random_state=42)

train_data = Anchors_data(train_df)
test_data = Anchors_data(test_df)

BATCH_SIZE = 8
train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

In [31]:
next(iter(train_dataloader))[0].size(), next(iter(train_dataloader))[1].size()

(torch.Size([8, 1, 379998]), torch.Size([8]))

In [32]:
from torch import nn
from torchsummary import summary

### model
class SimpleVGG(nn.Module):
    def __init__(self, num_classes=1):
        super(SimpleVGG, self).__init__()
        self.features = nn.Sequential(
            nn.Conv1d(1, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),
            
            nn.Conv1d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),
            
            nn.Conv1d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),

            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),

            nn.Conv1d(128, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool1d(7)
        self.classifier = nn.Sequential(
            nn.Linear(64 * 7, 128*10),
            nn.ReLU(),
            nn.Linear(128*10, 256),
            nn.ReLU(),
            nn.Linear(256, num_classes),
            nn.Sigmoid()        
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

model = SimpleVGG()
summary(model, (1, 499998))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1           [-1, 16, 499998]              64
              ReLU-2           [-1, 16, 499998]               0
         MaxPool1d-3           [-1, 16, 249999]               0
            Conv1d-4           [-1, 32, 249999]           1,568
              ReLU-5           [-1, 32, 249999]               0
         MaxPool1d-6           [-1, 32, 124999]               0
            Conv1d-7           [-1, 64, 124999]           6,208
              ReLU-8           [-1, 64, 124999]               0
         MaxPool1d-9            [-1, 64, 62499]               0
           Conv1d-10           [-1, 128, 62499]          24,704
             ReLU-11           [-1, 128, 62499]               0
        MaxPool1d-12           [-1, 128, 31249]               0
           Conv1d-13            [-1, 64, 31249]          24,640
             ReLU-14            [-1, 64

In [33]:
from tqdm.auto import tqdm
import torch.nn as nn

# Define the model, loss function, and optimizer
model = SimpleVGG(num_classes=1)
criterion = nn.BCELoss()  # Using Binary Cross Entropy Loss for binary classification
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def train(model, train_dataloader, test_dataloader, criterion, optimizer, num_epochs=10):
    for epoch in tqdm(range(num_epochs)):
        model.train()  # Set the model to training mode
        running_loss = 0.0
        correct_train = 0
        total_train = 0
        
        # Training loop
        for inputs, labels in train_dataloader:
            optimizer.zero_grad()  # Zero the gradients
            outputs = model(inputs)  # Forward pass
            outputs = outputs.view(-1)  # Ensure outputs is of shape (batch_size,)
            labels = labels.float()  # Ensure labels is of shape (batch_size,)
            loss = criterion(outputs, labels)  # Compute the loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update the weights
            running_loss += loss.item() * inputs.size(0)
            
            # Calculate training accuracy
            predicted = (outputs > 0.5).long()
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()
        
        epoch_loss = running_loss / len(train_dataloader.dataset)
        train_accuracy = correct_train / total_train
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Training Accuracy: {train_accuracy:.4f}")

        # Evaluation loop
        model.eval()  # Set the model to evaluation mode
        running_val_loss = 0.0
        correct_val = 0
        total_val = 0
        
        with torch.inference_mode():
            for inputs, labels in test_dataloader:
                outputs = model(inputs)
                outputs = outputs.view(-1)  # Ensure outputs is of shape (batch_size,)
                labels = labels.float()  # Ensure labels is of shape (batch_size,)
                loss = criterion(outputs, labels)
                running_val_loss += loss.item() * inputs.size(0)
                
                predicted = (outputs > 0.5).long()
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        val_loss = running_val_loss / len(test_dataloader.dataset)
        val_accuracy = correct_val / total_val
        print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")
# Use the training function
train(model, train_dataloader, test_dataloader, criterion, optimizer, num_epochs=10)


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [1/10], Loss: 0.1462, Training Accuracy: 1.0000
Validation Loss: 0.0000, Validation Accuracy: 1.0000
Epoch [2/10], Loss: 0.0000, Training Accuracy: 1.0000
Validation Loss: 0.0000, Validation Accuracy: 1.0000
Epoch [3/10], Loss: 0.0000, Training Accuracy: 1.0000
Validation Loss: 0.0000, Validation Accuracy: 1.0000
Epoch [4/10], Loss: 0.0000, Training Accuracy: 1.0000
Validation Loss: 0.0000, Validation Accuracy: 1.0000
Epoch [5/10], Loss: 0.0000, Training Accuracy: 1.0000
Validation Loss: 0.0000, Validation Accuracy: 1.0000
Epoch [6/10], Loss: 0.0000, Training Accuracy: 1.0000
Validation Loss: 0.0000, Validation Accuracy: 1.0000
Epoch [7/10], Loss: 0.0000, Training Accuracy: 1.0000
Validation Loss: 0.0000, Validation Accuracy: 1.0000
Epoch [8/10], Loss: 0.0000, Training Accuracy: 1.0000
Validation Loss: 0.0000, Validation Accuracy: 1.0000
Epoch [9/10], Loss: 0.0000, Training Accuracy: 1.0000
Validation Loss: 0.0000, Validation Accuracy: 1.0000
Epoch [10/10], Loss: 0.0000, Training