<a href="https://colab.research.google.com/github/Flo909/GraspandLiftDetection/blob/main/Copy_of_Assignment_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import torch
import torchvision
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim

import tensorflow

from matplotlib import pyplot as plt


# Training will be significantly faster if GPU is available. In Colab, go to Runtime -> Change runtime type -> Hardware accelerator -> GPU
if torch.cuda.is_available():
    print("GPU is available")
else:
    print("GPU is not available")
# set the device to GPU if available, else CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


GPU is available


Import data:
Download the train.zip file from https://www.kaggle.com/c/grasp-and-lift-eeg-detection/data and
explore the dataset. This file contains the first 8 series for each subject. (We will be only using
train.zip for the project.)
There are two files for each subject + series combination:
● the *_data.csv files contain the raw 32 channels EEG data (sampling rate 500Hz)
● the *_events.csv files contains the ground truth frame-wise labels for all events

NOTE: to import the data you have to log into kaggle and create an API token. From there you upload the kaggle.json file that will be downloaded when creating the API token and upload it into the connected google drive account for colab. Then give colab permission to access the files when running the below lines.

In [2]:
! pip install kaggle



In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
! mkdir ~/.kaggle

mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [7]:
!cp /content/drive/MyDrive/kaggle.json ~/.kaggle/kaggle.json

In [8]:
! kaggle competitions download grasp-and-lift-eeg-detection --force

Downloading grasp-and-lift-eeg-detection.zip to /content
 99% 1.01G/1.02G [00:11<00:00, 98.2MB/s]
100% 1.02G/1.02G [00:11<00:00, 91.5MB/s]


In [3]:
import zipfile
import pandas as pd
import os

# Open the zip file
with zipfile.ZipFile('grasp-and-lift-eeg-detection.zip', 'r') as zip_ref:
    # Extract all files
    zip_ref.extractall('grasp-and-lift-eeg-detection')

# List the extracted files
extracted_files = zip_ref.namelist()

display(extracted_files)

['sample_submission.csv.zip', 'test.zip', 'train.zip']

In [4]:
# Function to read data and labels
def read_data_and_labels(zip_file):
    train_data = {}
    test_data = {}
    test_labels = {}
    train_labels = {}

    # Extracted folder name will be the same as the zip file name without the extension
    extracted_folder = os.path.splitext(zip_file)[0]

    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        # Extract all files
        zip_ref.extractall(extracted_folder)

    extracted_files = zip_ref.namelist()

    for file_name in extracted_files:
        if file_name.endswith('_data.csv'):
            subject_id, series = file_name.split('_')[:2]
            df = pd.read_csv(os.path.join(extracted_folder, file_name))
            if series == 'series7' or series == 'series8':
                test_data.setdefault(subject_id, []).append(df)
            else:
                train_data.setdefault(subject_id, []).append(df)

        elif file_name.endswith('_events.csv'):
            subject_id, series = file_name.split('_')[:2]
            labels_df = pd.read_csv(os.path.join(extracted_folder, file_name))

            if series == 'series7' or series == 'series8':
              test_labels.setdefault(subject_id, []).append(labels_df)
            else:
              train_labels.setdefault(subject_id, []).append(labels_df)


    return train_data, test_data, test_labels, train_labels

# Read data and labels
train_data, test_data, test_labels, train_labels = read_data_and_labels('grasp-and-lift-eeg-detection/train.zip')



In [5]:
print(train_data.keys())
print(test_data.keys())

# Example: Accessing data for subject '1'
subject_id = 'train/subj1'
train_data_subject_1 = pd.concat(train_data[subject_id], ignore_index=True)
test_data_subject_1 = pd.concat(test_data[subject_id], ignore_index=True)
test_labels_subject_1 = pd.concat(test_labels[subject_id], ignore_index=True)
train_labels_subject_1 = pd.concat(train_labels[subject_id], ignore_index=True)

# display(test_labels_subject_1)
# Now you have train and test data along with labels for each subject

dict_keys(['train/subj10', 'train/subj11', 'train/subj12', 'train/subj1', 'train/subj2', 'train/subj3', 'train/subj4', 'train/subj5', 'train/subj6', 'train/subj7', 'train/subj8', 'train/subj9'])
dict_keys(['train/subj10', 'train/subj11', 'train/subj12', 'train/subj1', 'train/subj2', 'train/subj3', 'train/subj4', 'train/subj5', 'train/subj6', 'train/subj7', 'train/subj8', 'train/subj9'])


Unnamed: 0,id,HandStart,FirstDigitTouch,BothStartLoadPhase,LiftOff,Replace,BothReleased
0,subj1_series7_0,0,0,0,0,0,0
1,subj1_series7_1,0,0,0,0,0,0
2,subj1_series7_2,0,0,0,0,0,0
3,subj1_series7_3,0,0,0,0,0,0
4,subj1_series7_4,0,0,0,0,0,0
...,...,...,...,...,...,...,...
236889,subj1_series8_117328,0,0,0,0,0,0
236890,subj1_series8_117329,0,0,0,0,0,0
236891,subj1_series8_117330,0,0,0,0,0,0
236892,subj1_series8_117331,0,0,0,0,0,0




# Preprocess the data
Explore on EEG signal preprocessing methods and make choices on what sort of preprocessing this
dataset would need. You can justify your choices in the report

In [7]:
import torch
from torch.utils.data import Dataset, DataLoader

# Custom Dataset Class
class EEGDataset(Dataset):
    def __init__(self, data_dict, labels_dict):
        self.data_dict = data_dict
        self.labels_dict = labels_dict

        # Extract subject IDs and series
        self.subjects = list(data_dict.keys())
        self.series = ['series1', 'series2', 'series3', 'series4', 'series5', 'series6','series7', 'series8', 'series9', 'series10', 'series11', 'series12']

    def __len__(self):
        return len(self.subjects) * len(self.series)

    def __getitem__(self, idx):
        subject_idx = idx // len(self.series)
        series_idx = idx % len(self.series)
        subject_id = self.subjects[subject_idx]
        series = self.series[series_idx]

        # Get EEG data and labels for the current subject and series
        eeg_data = self.data_dict[subject_id][series_idx]
        labels = self.labels_dict[subject_id]

        return eeg_data.values, labels.values

# Create Dataset instances
train_dataset = EEGDataset(train_data, train_labels)
test_dataset = EEGDataset(test_data, test_labels)

# Create DataLoader instances
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define Neural Network

In [14]:
class EEGModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(EEGModel, self).__init__()
        # Define your neural network architecture here
        # Example:
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # Define the forward pass of your network
        # Example:
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [8]:
# Get the input size from the train_loader
for inputs, _ in train_loader:
    input_size = inputs.shape[1]  # Get the number of features from the shape of the input data
    break  # Break after extracting the first batch

# Define the neural network model with the correct input size
hidden_size = 64  # Example hidden size
num_classes = 6  # Example number of classes

model = EEGModel(input_size, hidden_size, num_classes)

IndexError: list index out of range