In [10]:
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
import copy
import pandas as pd
import time
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Loading in data

In [11]:
print("Loading")
path_Ben_linux_1 = "/home/rubin/Research/data/metadata_IMU_EMG_allgestures_allusers(1).pkl"

start_time = time.time()
data_df = pd.read_pickle(path_Ben_linux_1)
end_time = time.time()
print(f"Completed in {end_time - start_time}s")

Loading
Completed in 0.1108553409576416s


# Split Dataset into train and test split, for now just use unimpaired

In [12]:
# Impaired participants
pIDs_impaired = ['P102', 'P103', 'P104', 'P105', 'P106', 'P107', 'P108', 'P109', 'P110', 'P111',
                 'P112', 'P114', 'P115', 'P116', 'P118', 'P119', 'P121', 'P122', 'P123', 'P124', 
                 'P125', 'P126', 'P127', 'P128', 'P131', 'P132']

# Unimpaired participants
pIDs_unimpaired = ['P004', 'P005', 'P006', 'P008', 'P010', 'P011']

def split_and_preprocess_by_user(data_df, pIDs_impaired, pIDs_unimpaired, test_size=0.2):
    # Split impaired and unimpaired participants into train and test sets
    impaired_train, impaired_test = train_test_split(pIDs_impaired, test_size=test_size, random_state=42)
    unimpaired_train, unimpaired_test = train_test_split(pIDs_unimpaired, test_size=test_size, random_state=42)

    # Combine train and test users
    training_users = impaired_train + unimpaired_train
    test_users = impaired_test + unimpaired_test

    metadata_cols = ['Participant', 'Gesture_ID', 'Gesture_Num']

    # Split data by users
    train_df = data_df[data_df['Participant'].isin(training_users)]
    test_df = data_df[data_df['Participant'].isin(test_users)]

    # Subset metadata columns for training and testing sets
    train_metadata_df = train_df[metadata_cols].reset_index(drop=True)
    test_metadata_df = test_df[metadata_cols].reset_index(drop=True)

    # Drop metadata columns from the dataframes
    train_df = train_df.drop(metadata_cols, axis=1).reset_index(drop=True)
    test_df = test_df.drop(metadata_cols, axis=1).reset_index(drop=True)

    # Scale the data
    train_scaler = StandardScaler()

    # Fit on training data and transform both train and test sets
    ppd_train_df = pd.DataFrame(train_scaler.fit_transform(train_df))
    ppd_test_df = pd.DataFrame(train_scaler.transform(test_df))

    ppd_train_emg_df = ppd_train_df.iloc[:, 72:]
    ppd_test_emg_df = ppd_test_df.iloc[:, 72:]

    # Concatenate metadata back to the processed data
    ppd_train_emg_df = pd.concat([train_metadata_df, ppd_train_emg_df], axis=1)
    ppd_test_emg_df = pd.concat([test_metadata_df, ppd_test_emg_df], axis=1)

    return ppd_train_emg_df, ppd_test_emg_df

# Call the function with the full participant list
ppd_train_emg_df, ppd_test_emg_df = split_and_preprocess_by_user(data_df, pIDs_impaired, pIDs_unimpaired)


In [13]:
print(ppd_train_emg_df.shape)
ppd_train_emg_df.head()

(153600, 19)


Unnamed: 0,Participant,Gesture_ID,Gesture_Num,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87
0,P103,move,1,-0.272553,-0.174909,-0.487032,-0.479092,-0.282206,-0.340962,-0.349967,-0.372957,-0.460476,-0.322696,-0.353601,-0.359954,-0.341444,-0.270736,-0.129773,-0.256569
1,P103,move,1,-0.237541,-0.219476,-0.491541,-0.456715,-0.259345,-0.269619,-0.370232,-0.354646,-0.456982,-0.301867,-0.349724,-0.356034,-0.337765,-0.281176,-0.106944,-0.2506
2,P103,move,1,-0.226202,-0.227727,-0.498172,-0.514807,-0.247238,-0.248624,-0.340924,-0.330598,-0.453981,-0.29439,-0.355787,-0.360192,-0.341552,-0.274673,-0.068181,-0.18223
3,P103,move,1,-0.059969,-0.167775,-0.476214,-0.59732,-0.266217,-0.281169,-0.308048,-0.280282,-0.518521,-0.294813,-0.355749,-0.350663,-0.333018,-0.264322,-0.065513,-0.144634
4,P103,move,1,-0.012488,-0.202698,-0.483415,-0.510942,-0.278117,-0.243418,-0.328787,-0.316063,-0.439284,-0.295585,-0.346027,-0.350198,-0.327595,-0.256635,-0.112046,-0.256429


In [14]:
import numpy as np
import torch
from torch.utils.data import Dataset

class EMG_Dataset(Dataset):
    def __init__(self, emg_df, num_channels_emg=16, time_units=64):
        # Create labels array using only Gesture_ID from EMG data
        self.labels = emg_df['Gesture_ID'].values
        
        # Exclude metadata columns and reshape the EMG 
        emg_data = emg_df.drop(['Participant', 'Gesture_ID', 'Gesture_Num'], axis=1).values

        # EMG data processing: (num_samples, time_units, num_channels_emg)
        num_samples_emg = len(emg_data) // time_units
        self.emg_data = emg_data.reshape(num_samples_emg, time_units, num_channels_emg).transpose((0, 2, 1))

        # Create a dictionary to map each unique Gesture_ID to an integer label
        unique_labels = np.unique(self.labels)
        self.label_map = {label: i for i, label in enumerate(unique_labels)}
        
        # Map labels to integers
        self.labels = np.array([self.label_map[label] for label in self.labels[:num_samples_emg * time_units:time_units]])

        # Create a dictionary to map (Participant, Gesture_ID, Gesture_Num) to index
        self.index_map = {(row['Participant'], row['Gesture_ID'], row['Gesture_Num']): idx // time_units 
                          for idx, row in emg_df.iterrows()}

        # Sanity check
        print(f"EMG Data shape: {self.emg_data.shape}")
        print(f"Labels shape: {self.labels.shape}")
        print(f"Label mapping: {self.label_map}")
    
    def __len__(self):
        return len(self.emg_data)

    def __getitem__(self, idx):
        if isinstance(idx, tuple):
            # Get item by (Participant, Gesture_ID, Gesture_Num)
            idx = self.index_map[idx]
        
        emg_data = torch.tensor(self.emg_data[idx], dtype=torch.float32)
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        
        return emg_data, label


In [15]:
from torch.utils.data import DataLoader

# Create dataset instances
train_dataset = EMG_Dataset(ppd_train_emg_df)
test_dataset = EMG_Dataset(ppd_test_emg_df)

# Create DataLoader instances
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


EMG Data shape: (2400, 16, 64)
Labels shape: (2400,)
Label mapping: {'close': 0, 'delete': 1, 'duplicate': 2, 'move': 3, 'open': 4, 'pan': 5, 'rotate': 6, 'select-single': 7, 'zoom-in': 8, 'zoom-out': 9}
EMG Data shape: (800, 16, 64)
Labels shape: (800,)
Label mapping: {'close': 0, 'delete': 1, 'duplicate': 2, 'move': 3, 'open': 4, 'pan': 5, 'rotate': 6, 'select-single': 7, 'zoom-in': 8, 'zoom-out': 9}


In [21]:
import torch.nn as nn

class EMG_CNN(nn.Module):
    def __init__(self, num_classes=10):
        super(EMG_CNN, self).__init__()
        
        # Convolutional Block for EMG Data (16 channels)
        self.emg_conv1 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2)
        self.emg_conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.emg_conv3 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, padding=1)

        # Fully connected layers after concatenation
        # Flattened size of EMG (128 * 8)
        self.fc1 = nn.Linear(128 * 8, 256)  
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, num_classes)

    def forward(self, emg_input):
        # Forward pass for EMG data
        x = torch.relu(self.emg_conv1(emg_input))
        x = self.pool(x)
        x = torch.relu(self.emg_conv2(x))
        x = self.pool(x)
        x = torch.relu(self.emg_conv3(x))
        x = self.pool(x)
        
        # Flatten EMG outputs
        x = torch.flatten(x, start_dim=1)


        # Pass through the fully connected layers
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x


In [22]:
def train(model, optimizer, dataloader, criterion, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for emg_data, labels in dataloader:
        # Move EMG data, and labels to the device (GPU or CPU)
        emg_data, labels = emg_data.to(device), labels.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass with both EMG
        outputs = model(emg_data)
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()

        # Update the weights
        optimizer.step()

        # Update running loss
        running_loss += loss.item()
        
        # Compute accuracy
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    # Calculate final accuracy for the epoch
    accuracy = 100. * correct / total
    return running_loss / len(dataloader), accuracy


In [23]:
def evaluate(model, dataloader, criterion, device):
    model.eval()  # Set model to evaluation mode
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():  # Disable gradient computation
        for emg_data, labels in dataloader:
            # Move EMG data and labels to the device (GPU or CPU)
            emg_data, labels = emg_data.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(emg_data)
            loss = criterion(outputs, labels)
            
            # Update running loss
            running_loss += loss.item()
            
            # Compute accuracy
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    # Compute accuracy over the whole dataset
    accuracy = 100. * correct / total
    return running_loss / len(dataloader), accuracy


In [24]:
def train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs, device):   
    model.to(device)
    for epoch in range(num_epochs):
        start_time = time.time()

        # Train the model for one epoch
        train_loss, train_acc = train(model, optimizer, train_loader, criterion, device)

        end_time = time.time()
        epoch_time = end_time - start_time

        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Epoch Time: {epoch_time:.2f} seconds")
        test_loss, test_acc = evaluate(model, test_loader, criterion, device)
        print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")


In [25]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize EMG_CNN model and move it to the device
emg_model = EMG_CNN().to(device)

# Number of epochs
num_epochs = 100

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(emg_model.parameters(), lr=0.0001, weight_decay=1e-5)

# Train the EMG model
train_model(emg_model, train_loader, test_loader, criterion, optimizer, num_epochs, device)



Epoch 1/100
Train Loss: 2.3008, Train Acc: 11.33%
Epoch Time: 0.24 seconds
Test Loss: 2.3053, Test Acc: 10.00%
Epoch 2/100
Train Loss: 2.2889, Train Acc: 15.00%
Epoch Time: 0.21 seconds
Test Loss: 2.3524, Test Acc: 11.00%
Epoch 3/100
Train Loss: 2.2561, Train Acc: 18.67%
Epoch Time: 0.19 seconds
Test Loss: 2.6463, Test Acc: 12.38%
Epoch 4/100
Train Loss: 2.1931, Train Acc: 19.46%
Epoch Time: 0.17 seconds
Test Loss: 3.0900, Test Acc: 12.88%
Epoch 5/100
Train Loss: 2.1198, Train Acc: 21.17%
Epoch Time: 0.18 seconds
Test Loss: 3.5506, Test Acc: 12.62%
Epoch 6/100
Train Loss: 2.0539, Train Acc: 23.83%
Epoch Time: 0.17 seconds
Test Loss: 3.8807, Test Acc: 13.12%
Epoch 7/100
Train Loss: 1.9959, Train Acc: 26.50%
Epoch Time: 0.17 seconds
Test Loss: 4.6750, Test Acc: 12.75%
Epoch 8/100
Train Loss: 1.9420, Train Acc: 27.88%
Epoch Time: 0.20 seconds
Test Loss: 4.9501, Test Acc: 12.38%
Epoch 9/100
Train Loss: 1.9047, Train Acc: 28.79%
Epoch Time: 0.19 seconds
Test Loss: 5.4278, Test Acc: 10.62%
E