In [1]:
from tslearn.utils import to_time_series_dataset
import json
import pandas as pd
import os
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

Install h5py to use hdf5 features: http://docs.h5py.org/
  warn(h5py_msg)


In [2]:
label_df = pd.read_csv('labels.csv')

In [3]:
parent_dir = '/home/vault/empkins/tpD/D02/processed_data/facemo_data'

In [4]:
ids = os.listdir(parent_dir)

In [5]:
ids = os.listdir(parent_dir)

numeric_ids = [id for id in ids if id.isdigit()]

In [6]:
valid_ids = []

# List to store DataFrames from json files
coping_list = []

# Iterate through all IDs in the directory
ids = os.listdir(parent_dir)
for id in ids:
    tagret_path = os.path.join(parent_dir, str(id), 'latency.json')
    if os.path.isfile(tagret_path):  # Check if the file exists
        try:
            with open(tagret_path, 'r') as f:
                coping_data = json.load(f)
            df = pd.DataFrame.from_dict(coping_data)
            if not df.empty:
                coping_list.append(df)
                valid_ids.append(int(id))  # Add valid ID to the list
        except (FileNotFoundError, ValueError, json.JSONDecodeError):
            pass

# Create a DataFrame from the valid IDs
id_df = pd.DataFrame({'Participant ID': valid_ids})

# Assuming combined_df is already defined and loaded
# combined_df = pd.read_excel('path_to_combined_df.xlsx') # If loaded from file

# Ensure Participant ID in combined_df is an integer
label_df['Participant ID'] = label_df['Participant ID'].astype(int)

# Merge combined_df with id_df to rearrange according to the directory IDs
reordered_df = id_df.merge(label_df, on='Participant ID', how='left')

In [7]:
reordered_df.shape

(100, 2)

In [8]:
len(coping_list)

100

In [9]:
coping_list = [df for df in coping_list if len(df) > 0]

In [10]:
y = reordered_df['Label']
y = pd.Series(y)

In [11]:
if y.isna().sum() > 0:
    print(f"Found {y.isna().sum()} NaN values in the labels. Handling them...")
    y.fillna(y.mode()[0], inplace=True)

Found 3 NaN values in the labels. Handling them...


In [12]:
y = y.apply(lambda x: 0 if x == 0.5 else x).astype(int)

In [13]:
class CustomDataset(Dataset):
    def __init__(self, dataframes_list, labels):
        self.data = dataframes_list
        self.labels = labels
        
        # Transpose dataframes to switch orientation from frame-wise to feature-wise
        self.data = [df.transpose() for df in self.data]
        
        # Find the maximum sequence length in the entire dataset
        self.max_seq_length = max(df.shape[1] for df in self.data)
        
        # Pad all sequences to the maximum length
        self.data = [self.pad_sequence(df) for df in self.data]
        
    def pad_sequence(self, df):
        padded_sequence = np.zeros((df.shape[0], self.max_seq_length))
        padded_sequence[:, :df.shape[1]] = df
        return padded_sequence
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        padded_data = self.data[idx]
        label = self.labels[idx]
        # Convert label to tensor
        label = torch.tensor(label, dtype=torch.float32)
        padded_data = torch.tensor(padded_data, dtype=torch.float32)
        # Return data and label
        return padded_data, label

In [14]:
custom_dataset = CustomDataset(coping_list, y)

In [15]:
custom_dataset[0][0]

tensor([[3.8773e+04, 3.8774e+04, 3.8775e+04,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [1.2924e+03, 1.2924e+03, 1.2925e+03,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        ...,
        [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [3.0000e-02, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00]])

In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
import numpy as np
import matplotlib.pyplot as plt

In [17]:
train_size = int(0.8 * len(custom_dataset))
test_size = len(custom_dataset) - train_size
train_dataset, test_dataset = random_split(custom_dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [18]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # Take the output from the last time step
        return out

In [19]:
input_size = 40
hidden_size = 64
num_layers = 2
num_classes = 2

model = LSTMModel(input_size, hidden_size, num_layers, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10
train_losses = []
test_accuracies = []

In [None]:
for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        outputs = model(inputs)
        loss = criterion(outputs, labels.long())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    train_losses.append(loss.item())

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    test_accuracies.append(accuracy)

    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {loss.item():.4f}, Test Accuracy: {accuracy:.2f}%')

print(f'Final Train Loss: {train_losses[-1]:.4f}')
print(f'Final Test Accuracy: {test_accuracies[-1]:.2f}%')

: 

In [None]:
labels

tensor([0., 0., 0., 1., 1., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1.])