In [1]:
import os
import sys
import random
import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, Subset
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder , StandardScaler
from tqdm import tqdm
import scipy.io as sio
import pickle

In [2]:
#Source: https://github.com/MultiScale-BCI/IV-2a/blob/master/get_data.py

'''	Loads the dataset 2a of the BCI Competition IV 
available on http://bnci-horizon-2020.eu/database/data-sets
'''

__author__ = "Michael Hersche and Tino Rellstab"
__email__ = "herschmi@ethz.ch,tinor@ethz.ch"

def get_data(subject,training,PATH):
    '''
    Keyword arguments:
    subject -- number of subject in [1, .. ,9]
    training -- if True: load training data
                if False: load testing data
    
    Return: data_return numpy matrix (nvalid_trial x 22 x 1750)
            class_return numpy matrix (nvalid_trial)
    '''
    nchannels = 22
    ntests = 6*48 
    windowlength = 7*250 

    class_return = np.zeros(ntests)
    data_return = np.zeros((ntests,nchannels,windowlength))

    nvalid_trial = 0
    if training:
        a = sio.loadmat(PATH+'A0'+str(subject)+'T.mat')
    else:
        a = sio.loadmat(PATH+'A0'+str(subject)+'E.mat')
    a_data = a['data']
    for ii in range(0,a_data.size):
        a_data1 = a_data[0,ii]
        a_data2=[a_data1[0,0]]
        a_data3=a_data2[0]
        a_X = a_data3[0]
        a_trial = a_data3[1]
        a_y = a_data3[2]
        a_fs = a_data3[3]
        a_classes = a_data3[4]
        a_artifacts = a_data3[5]
        a_gender = a_data3[6]
        a_age = a_data3[7]
        for trial in range(0,a_trial.size):
            if(a_artifacts[trial]==0):
                data_return[nvalid_trial,:,:] = np.transpose(a_X[int(a_trial[trial]):(int(a_trial[trial])+windowlength),:22])
                class_return[nvalid_trial] = int(a_y[trial])
                nvalid_trial +=1


    return data_return[0:nvalid_trial,:,:], class_return[0:nvalid_trial]

In [3]:
PATH = 'BCI Competition IV/Dataset2/2a/'
training = True

In [4]:
subjs = []
num_subjects = 9
for i in range(1,num_subjects+1):
    data, labels = get_data(i,training,PATH)
    subjs.append([data,labels])

In [5]:
X = []
Y = []
for i in range(len(subjs)):
    X.append(subjs[i][0])
    for j in range(len(subjs[i][0])):
        Y.append([i,int(subjs[i][1][j]-1)]) #subject id x label (adjust class labels [1,2,3,4] --> [0,1,2,3])

In [6]:
X = np.concatenate(X)
Y = np.array(Y)

In [7]:
X.shape, Y.shape

((2328, 22, 1750), (2328, 2))

In [8]:
X.dtype, Y.dtype

(dtype('float64'), dtype('int64'))

In [9]:
X = X.reshape(X.shape[0],X.shape[2],X.shape[1])

In [10]:
X.shape #trials x timestamp x channels

(2328, 1750, 22)

In [11]:
Y.shape #trials x (subject_id, label)

(2328, 2)

In [12]:
pickle.dump([X,Y] ,open('CrossSubjects_MI.pkl', "wb"))