In [125]:
import torch
import os
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, random_split
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence
from torch.utils.data import TensorDataset

In [101]:
torch.cuda.is_available(), torch.cuda.device_count(),  torch.cuda.current_device(), torch.cuda.get_device_name(0)

(True, 1, 0, 'NVIDIA GeForce RTX 3060 Laptop GPU')

In [102]:
class SensorDataset(Dataset):
    def __init__(self, directory, label):
        self.data = []
        self.labels = []
        for filename in os.listdir(directory):
            file_path = os.path.join(directory, filename)
            sequence = pd.read_csv(file_path, header=None, usecols=range(1, 19)) # Skip timestamp column

            # Check for NaN or infinite values in the DataFrame
            if sequence.isnull().values.any():
                raise ValueError(f"NaN values found in file {filename}")
            if np.isinf(sequence.values).any():
                raise ValueError(f"Infinite values found in file {filename}")
            
            sequence = sequence.values # Skip timestamp column


            self.data.append(sequence)
            self.labels.append(np.full((sequence.shape[0],), label)) # Label for each sequence
            
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx], dtype=torch.float), torch.tensor(self.labels[idx], dtype=torch.long)


Convert CSVs into Datasets

In [103]:
proper_dir = '../data/clean/clean_proper'
proper_dataset = SensorDataset(proper_dir, 0)

improper_dir = '../data/clean/clean_improper'
improper_dataset = SensorDataset(improper_dir, 1)

full_dataset = torch.utils.data.ConcatDataset([proper_dataset, improper_dataset])


Get Train/Test Split

In [138]:
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

# train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False)


Concatenate and Convert Again to Dataset and Dataloaders

In [139]:
# TRAIN

train_features_concatenated = torch.cat([sequence[0] for sequence in train_dataset], dim=0)
train_targets_concatenated = torch.cat([sequence[1] for sequence in train_dataset], dim=0)

train_dataset = TensorDataset(train_features_concatenated, train_targets_concatenated)
train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)


# TEST

test_features_concatenated = torch.cat([sequence[0] for sequence in test_dataset], dim=0)
test_targets_concatenated = torch.cat([sequence[1] for sequence in test_dataset], dim=0)

test_dataset = TensorDataset(test_features_concatenated, test_targets_concatenated)
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)


In [159]:
# Save the training dataset
torch.save({
    'features': train_features_concatenated,
    'targets': train_targets_concatenated
}, 'train_dataset.pt')

# Save the test dataset
torch.save({
    'features': test_features_concatenated,
    'targets': test_targets_concatenated
}, 'test_dataset.pt')

In [158]:
# Define the RNN model
class BinaryClassificationRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(BinaryClassificationRNN, self).__init__()
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        # x.shape = (batch_size, sequence_length, input_size)
        out, (hn, cn) = self.rnn(x)
        # Select the output of the last time step
        out = out[:, -1, :]
        out = self.fc(out)
        out = self.sigmoid(out)
        return out

# Hyperparameters
input_size = 18  # Number of features
hidden_size = 128  # Number of features in hidden state
num_layers = 2  # Number of stacked LSTM layers

# Initialize the model
model = BinaryClassificationRNN(input_size, hidden_size, num_layers)

# Loss and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


for i, (features, labels) in enumerate(train_loader):
    features = features.unsqueeze(1)  # Adding a sequence length of 1
    labels = labels.unsqueeze(1).float()  # Ensure labels are float for BCELoss
    
    outputs = model(features)
    print(outputs)  # Check the range of outputs
    print(labels)   # Check the labels
    
    loss = criterion(outputs, labels)
    # Proceed with backward pass and optimization



# Assuming train_loader is defined and loaded with your dataset
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    for i, (features, labels) in enumerate(train_loader):
        # Reshape features to [batch_size, 1, input_size] for single time step
        features = features.unsqueeze(1)  # Adding a sequence length of 1
        labels = labels.unsqueeze(1).float()  # Ensure labels are float for BCELoss
        
        # Forward pass
        outputs = model(features)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}], Loss: {loss.item():.4f}')

# Note: This code assumes that your DataLoader `train_loader` is already defined and loaded with your dataset


tensor([[0.4847],
        [0.4834],
        [0.4687],
        [0.4850],
        [0.4667],
        [0.4755],
        [0.4778],
        [0.4760],
        [0.4826],
        [0.4895],
        [0.4825],
        [0.4758],
        [0.4857],
        [0.4801],
        [0.4814],
        [0.4669],
        [0.4583],
        [0.4548],
        [0.4780],
        [0.4789],
        [0.4776],
        [0.4651],
        [0.4725],
        [0.4755],
        [0.4703],
        [0.4760],
        [0.4775],
        [0.4773],
        [0.4778],
        [0.4767],
        [0.4808],
        [0.4709],
        [0.4784],
        [0.4785],
        [0.4817],
        [0.4795],
        [0.4708],
        [0.4763],
        [0.4540],
        [0.4781],
        [0.4666],
        [0.4636],
        [0.4853],
        [0.4705],
        [0.4815],
        [0.4752],
        [0.4794],
        [0.4856],
        [0.4694],
        [0.4785],
        [0.4784],
        [0.4796],
        [0.4837],
        [0.4671],
        [0.4940],
        [0

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
