In [1]:
import scipy.io as sio
import numpy as np
import pandas as pd
from sklearn.decomposition import FastICA
from sklearn import svm
import pandas as pd
from tqdm.notebook import tqdm

In [2]:
def extract_data(subject, training, artifactius = False):
    n_channels = 22    #Corresponding to the 22 EEG channels
    n_trials = 6*48    #Each session has 6 experimental runs with 48 trials each
    offset = 3*250     #record 3 seconds after trial marker
    win_length = 3*250 #stop after 3 seconds of recording
    
    class_return = np.zeros(n_trials)
    data_return = np.zeros((n_trials, n_channels, win_length))
    
    n_valid_trials = 0
    
    if training:
        Z = sio.loadmat('A0'+str(subject)+'T.mat')
    else:
        Z = sio.loadmat('A0'+str(subject)+'E.mat')
    Z_data = Z["data"]
    for i in range(Z_data.size):
        Z_data1 = Z_data[0,i]
        Z_data2 = [Z_data1[0,0]]
        Z_data3 = Z_data2[0]
        
        Z_X         = Z_data3[0]
        Z_trial     = Z_data3[1]
        Z_y         = Z_data3[2]
        Z_fs        = Z_data3[3]
        Z_classes   = Z_data3[4]
        Z_artifacts = Z_data3[5]
        Z_gender    = Z_data3[6]
        Z_age       = Z_data3[7]
        for trial in range(0,Z_trial.size):
            if Z_artifacts[trial]==0 or artifactius:
                data_return[n_valid_trials,:,:] = np.transpose(
                    Z_X[int(Z_trial[trial])+offset:(int(Z_trial[trial])+offset+win_length),:22]
                )
                class_return[n_valid_trials] = int(Z_y[trial])
                n_valid_trials +=1
                
    return data_return[0:n_valid_trials,:,:], class_return[0:n_valid_trials]

In [None]:
testData = extract_data(1,True)
print(testData[0].shape)
print(testData[1].shape)

Classification on raw data to sanity check

In [3]:
def bandwidthpow(Data):
    x,y,z = Data.shape
    output = np.zeros((x,y))
    
    for i in range(x):
        output[i,:] = np.log(np.var(Data[i,:,:], axis = 1))
    return output

In [None]:
bandwidths = bandwidthpow(testData[0])

In [None]:
clf = svm.SVC(decision_function_shape='ovo')
clf.fit(bandwidths, testData[1])

In [None]:
predicts = clf.predict(bandwidths)

In [None]:
np.mean(predicts==testData[1])

Well above chance, i.e. signals do explain to some degree.

# Try bandpass filtering

In [4]:
from scipy.signal import butter, lfilter

lowcut = 8
highcut = 30
fs = 250 # sampling frequency
order = 3
nyq = 0.5 * fs
low = lowcut / nyq
high = highcut / nyq
b, a = butter(order, [low, high], btype='band')

In [5]:
def reshape_signal(arr):
    x,y,z = arr.shape
    arr2 = np.stack(arr, axis = 1)
    arr2 = arr2.reshape((y, x*z))
    return arr2

In [None]:
sig = reshape_signal(testData[0])

In [None]:
sig_filter = lfilter(b, a, sig)

In [None]:
sig_filter = np.stack((sig_filter).reshape(22,sig.shape[1]//750,750),axis=1)

In [None]:
bandwidths = bandwidthpow(sig_filter)

In [None]:
clf = svm.SVC(decision_function_shape='ovo')
clf.fit(bandwidths, testData[1])

In [None]:
predicts = clf.predict(bandwidths)
np.mean(predicts==testData[1])

In [None]:
print(sig.shape, testData[0].shape, sig_filter.shape)

# CAR

In [6]:
def car(samples):
    d = samples.shape[0]
    centering = np.eye(d) - np.ones((d, d)) / d
    return centering.dot(samples)


# returns basis of A's null space
def null(A, eps=1e-15):
    # svd
    u, s, v = np.linalg.svd(A)
    # dimension of null space
    padding = max(0, np.shape(A)[1] - np.shape(s)[0])
    # select columns/rows corresponding to v
    null_mask = np.concatenate(((s <= eps),
                                np.ones((padding,), dtype=bool)), axis=0)
    null_space = np.compress(null_mask, v, axis=0)
    return null_space


def carcomplement(samples):
    d = samples.shape[0]
    carcomp = null(np.ones((1, d)))
    return carcomp.dot(samples)

# Cleaning all data

In [7]:
def clean_data(subs = range(1,10),datatype="float32",filt = False,art = False):
    Signal_list = []
    Class_list = []
    Session_lengths = []

    for i in tqdm(subs):
        #Extract data for training session
        data = extract_data(i,True,art)

        #Concatenate cignals, CAR, and project to null complement
        sig = reshape_signal(data[0])
        sig = carcomplement(sig)

        #Filter
        if filt:
            sig = lfilter(b, a, sig)
        
        #Reshape
        sig = np.stack(sig.reshape(21,sig.shape[1]//750,750),axis=1)
        Signal_list.append(sig)

        Class_list.append(data[1])
        Session_lengths.append((data[1].shape)[0])

        #Repeat for eval
        data = extract_data(i,False,art)
        
        sig = reshape_signal(data[0])
        sig = carcomplement(sig)
        if filt:
            sig = lfilter(b, a, sig)
        
        sig = np.stack(sig.reshape(21,sig.shape[1]//750,750),axis=1)
        Signal_list.append(sig)
        Class_list.append(data[1])
        Session_lengths.append((data[1].shape)[0])

    Signals = np.concatenate(Signal_list)
    Classes = np.concatenate(Class_list)
    
    Signals = Signals.astype(datatype)
    
    return Signals,Classes, Session_lengths

In [8]:
Signals,Classes,n_se = clean_data(subs=[1,3,4,5,6,7,8,9])

HBox(children=(IntProgress(value=0, max=8), HTML(value='')))




In [9]:
print(Signals.shape, Classes.shape,n_se)

(4143, 21, 750) (4143,) [273, 281, 270, 273, 262, 228, 262, 276, 219, 215, 271, 277, 264, 271, 237, 264]


In [10]:
np.save("Signals_CAR_nofilt8.npy", Signals)

In [11]:
np.save("Class_CAR_nofilt8.npy", Classes)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# 5 seconds of a 25 Hz signal with slow drift, sampling frequency 250 Hz
signal = np.array([np.sin(25 * np.arange(5 * 250) * 2 * np.pi / 250) + \
np.sin(4 * np.arange(5 * 250) * 2 * np.pi / 250 + .42),
          np.sin(25 * np.arange(5 * 250) * 2 * np.pi / 250) + \
np.sin(4 * np.arange(5 * 250) * 2 * np.pi / 250 + .42)])


# filtered signal
fsignal = lfilter(b, a, signal)[0]
fig, axs = plt.subplots(1, 2)
for ax, sig, lab in zip(axs,[signal[0], fsignal],['raw signal', 'filtered signal']):
    ax.plot(sig[:250])
    ax.set_title(lab)
plt.show()

In [None]:
def clean_data2(subject, training, artifactius = False):
    '''
    comment
    '''
    n_channels = 22    #Corresponding to the 22 EEG channels
    n_trials = 6*48    #Each session has 6 experimental runs with 48 trials each
    offset = 3*250     #record 3 seconds after trial marker
    win_length = 3*250 #stop after 3 seconds of recording
    
    class_return = np.zeros(n_trials)
    data_return = np.zeros((n_trials, n_channels, win_length))
    
    n_valid_trials = 0
    
    if training:
        Z = sio.loadmat('A0'+str(subject)+'T.mat')
    else:
        Z = sio.loadmat('A0'+str(subject)+'E.mat')
    Z_data = Z["data"]
    for i in range(Z_data.size):
        Z_data1 = Z_data[0,i]
        Z_data2 = [Z_data1[0,0]]
        Z_data3 = Z_data2[0]
        
        Z_X         = Z_data3[0]
        Z_trial     = Z_data3[1]
        Z_y         = Z_data3[2]
        Z_fs        = Z_data3[3]
        Z_classes   = Z_data3[4]
        Z_artifacts = Z_data3[5]
        Z_gender    = Z_data3[6]
        Z_age       = Z_data3[7]
        for trial in range(0,Z_trial.size):
            if Z_artifacts[trial]==0:
                sig = reshape_signal(np.expand_dims(np.transpose(
                    Z_X[int(Z_trial[trial])+offset:(int(Z_trial[trial])+offset+win_length),:22]
                ), axis=0))
                sig_filter = lfilter(b, a, sig)
                sig_filter = np.stack((sig_filter).reshape(22,1,750),axis=1)
                
                data_return[n_valid_trials,:,:] = sig_filter[0]
                class_return[n_valid_trials] = int(Z_y[trial])
                n_valid_trials +=1
                
    return data_return[0:n_valid_trials,:,:], class_return[0:n_valid_trials]

In [None]:
Signal_list = []
Class_list = []
Session_lengths = []

for i in tqdm(range(1,10)):
    data = clean_data2(i,True)
    Signal_list.append(data[0])
    Class_list.append(data[1])
    Session_lengths.append((data[1].shape)[0])
    
    
    data = clean_data(i,False)
    Signal_list.append(data[0])
    Class_list.append(data[1])
    Session_lengths.append((data[1].shape)[0])

In [None]:
Signals = np.concatenate(Signal_list)
Classes = np.concatenate(Class_list)
print(Signals.shape, Classes.shape,Session_lengths)

In [None]:
Signals = Signals.astype("float32")

In [None]:
np.save("Signals_filt.npy", Signals)
np.save("Class_filt.npy", Classes)