# USING ML on SSVEP

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import signal

In [2]:
def butter_highpass(cutoff, fs, order=5):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = signal.butter(order, normal_cutoff, btype='high', analog=False)
    return b, a

def butter_highpass_filter(data, cutoff, fs, order=5):
    b, a = butter_highpass(cutoff, fs, order=order)
    y = signal.filtfilt(b, a, data)
    return y

In [17]:
from sklearn.preprocessing import MinMaxScaler

def preprocess(t,label, fileName):
    
    data = np.loadtxt(fileName,delimiter=',' , skiprows=1)
    data = data[:,[22]]
    arr = []
    
    y = data[:,0][256: t*256]

    y = butter_highpass_filter(y,5,256,5)
    ps = np.abs(np.fft.fft(y))**2
    scaler = MinMaxScaler()
    scaler.fit(ps)
    ps = scaler.transform(ps)
    arr.append(ps)
        
    freqs = np.fft.fftfreq( y.size , float(1)/256 )
    arr.append(freqs)
    
    output_arr = np.array(arr)
    output_arr = np.transpose(output_arr)
    
    # DROP WHEN FREQUENCY < 0
    output_arr = output_arr[ np.logical_not( output_arr[:,1] < 0 ) ]
    
    # Add label column
    num_rows, num_cols = output_arr.shape
    label_column = np.full((num_rows, 1), label)  
    output_arr = np.hstack(( output_arr , label_column ))
#     ### *********************************************************************
#     for i in range(0,6):
#         output_arr = output_arr[ np.logical_not( output_arr[:,i] < 100000 ) ]
#     ###
    
#     print output_arr.shape    
    return output_arr

In [18]:
preprocess(2,8, '../Data/SSVEP_Public/SSVEP_8Hz_Trial1_SUBJ3.MAT.csv')



array([[  2.42469610e-03,   0.00000000e+00,   8.00000000e+00],
       [  2.38499800e-03,   1.00000000e+00,   8.00000000e+00],
       [  2.16550445e-03,   2.00000000e+00,   8.00000000e+00],
       [  1.72164342e-03,   3.00000000e+00,   8.00000000e+00],
       [  8.59674851e-03,   4.00000000e+00,   8.00000000e+00],
       [  8.23954594e-03,   5.00000000e+00,   8.00000000e+00],
       [  9.01200973e-02,   6.00000000e+00,   8.00000000e+00],
       [  1.00000000e+00,   7.00000000e+00,   8.00000000e+00],
       [  4.61972364e-01,   8.00000000e+00,   8.00000000e+00],
       [  3.78686963e-01,   9.00000000e+00,   8.00000000e+00],
       [  8.10346056e-01,   1.00000000e+01,   8.00000000e+00],
       [  1.87982430e-01,   1.10000000e+01,   8.00000000e+00],
       [  3.91040495e-01,   1.20000000e+01,   8.00000000e+00],
       [  4.87521148e-01,   1.30000000e+01,   8.00000000e+00],
       [  6.09784148e-01,   1.40000000e+01,   8.00000000e+00],
       [  9.59815262e-02,   1.50000000e+01,   8.0000000

In [19]:
eight_ssvep =   ['../Data/SSVEP_Public/SSVEP_8Hz_Trial1_SUBJ1.MAT.csv',
                 '../Data/SSVEP_Public/SSVEP_8Hz_Trial1_SUBJ2.MAT.csv', 
                 '../Data/SSVEP_Public/SSVEP_8Hz_Trial1_SUBJ3.MAT.csv', 
                 '../Data/SSVEP_Public/SSVEP_8Hz_Trial1_SUBJ4.MAT.csv',
                 '../Data/SSVEP_Public/SSVEP_8Hz_Trial2_SUBJ1.MAT.csv',
                 '../Data/SSVEP_Public/SSVEP_8Hz_Trial2_SUBJ2.MAT.csv', 
                 '../Data/SSVEP_Public/SSVEP_8Hz_Trial2_SUBJ3.MAT.csv', 
                 '../Data/SSVEP_Public/SSVEP_8Hz_Trial2_SUBJ4.MAT.csv',
                 '../Data/SSVEP_Public/SSVEP_8Hz_Trial3_SUBJ1.MAT.csv',
                 '../Data/SSVEP_Public/SSVEP_8Hz_Trial3_SUBJ2.MAT.csv', 
                 '../Data/SSVEP_Public/SSVEP_8Hz_Trial3_SUBJ3.MAT.csv', 
                 '../Data/SSVEP_Public/SSVEP_8Hz_Trial3_SUBJ4.MAT.csv',
                 '../Data/SSVEP_Public/SSVEP_8Hz_Trial4_SUBJ1.MAT.csv',
                 '../Data/SSVEP_Public/SSVEP_8Hz_Trial4_SUBJ2.MAT.csv', 
                 '../Data/SSVEP_Public/SSVEP_8Hz_Trial4_SUBJ3.MAT.csv', 
                 '../Data/SSVEP_Public/SSVEP_8Hz_Trial4_SUBJ4.MAT.csv',]

fourteen_ssvep =   ['../Data/SSVEP_Public/SSVEP_14Hz_Trial1_SUBJ1.MAT.csv',
                 '../Data/SSVEP_Public/SSVEP_14Hz_Trial1_SUBJ2.MAT.csv', 
                 '../Data/SSVEP_Public/SSVEP_14Hz_Trial1_SUBJ3.MAT.csv', 
                 '../Data/SSVEP_Public/SSVEP_14Hz_Trial1_SUBJ4.MAT.csv',
                 '../Data/SSVEP_Public/SSVEP_14Hz_Trial2_SUBJ1.MAT.csv',
                 '../Data/SSVEP_Public/SSVEP_14Hz_Trial2_SUBJ2.MAT.csv', 
                 '../Data/SSVEP_Public/SSVEP_14Hz_Trial2_SUBJ3.MAT.csv', 
                 '../Data/SSVEP_Public/SSVEP_14Hz_Trial2_SUBJ4.MAT.csv',
                 '../Data/SSVEP_Public/SSVEP_14Hz_Trial3_SUBJ1.MAT.csv',
                 '../Data/SSVEP_Public/SSVEP_14Hz_Trial3_SUBJ2.MAT.csv', 
                 '../Data/SSVEP_Public/SSVEP_14Hz_Trial3_SUBJ3.MAT.csv', 
                 '../Data/SSVEP_Public/SSVEP_14Hz_Trial3_SUBJ4.MAT.csv',
                 '../Data/SSVEP_Public/SSVEP_14Hz_Trial4_SUBJ1.MAT.csv',
                 '../Data/SSVEP_Public/SSVEP_14Hz_Trial4_SUBJ2.MAT.csv', 
                 '../Data/SSVEP_Public/SSVEP_14Hz_Trial4_SUBJ3.MAT.csv', 
                 '../Data/SSVEP_Public/SSVEP_14Hz_Trial4_SUBJ4.MAT.csv',]

In [26]:
time = 5
train_data = np.concatenate((   preprocess(time,8, eight_ssvep[0]),    
                                preprocess(time,8, eight_ssvep[1]),    
                                preprocess(time,8, eight_ssvep[2]),    
                                preprocess(time,8, eight_ssvep[12]), 
                                preprocess(time,14, fourteen_ssvep[0]),  
                                preprocess(time,14, fourteen_ssvep[1]),  
                                preprocess(time,14, fourteen_ssvep[2]),  
                                preprocess(time,14, fourteen_ssvep[12]),
                                preprocess(time,8, eight_ssvep[4]),    
                                preprocess(time,8, eight_ssvep[5]),    
                                preprocess(time,8, eight_ssvep[6]),    
                                preprocess(time,8, eight_ssvep[13]), 
                                preprocess(time,14, fourteen_ssvep[4]),  
                                preprocess(time,14, fourteen_ssvep[5]),  
                                preprocess(time,14, fourteen_ssvep[6]),  
                                preprocess(time,14, fourteen_ssvep[13]),
                                preprocess(time,8, eight_ssvep[8]),    
                                preprocess(time,8, eight_ssvep[9]),    
                                preprocess(time,8, eight_ssvep[10]),    
                                preprocess(time,8, eight_ssvep[14]), 
                                preprocess(time,14, fourteen_ssvep[8]),  
                                preprocess(time,14, fourteen_ssvep[9]),  
                                preprocess(time,14, fourteen_ssvep[10]),  
                                preprocess(time,14, fourteen_ssvep[14])
                            ), axis=0)

test_data = np.concatenate((    
                                preprocess(time,8, eight_ssvep[3]),    
                                preprocess(time,8, eight_ssvep[7]),    
                                preprocess(time,8, eight_ssvep[11]),    
                                preprocess(time,8, eight_ssvep[15]), 
                                preprocess(time,14, fourteen_ssvep[3]),  
                                preprocess(time,14, fourteen_ssvep[7]),  
                                preprocess(time,14, fourteen_ssvep[11]),  
                                preprocess(time,14, fourteen_ssvep[15]),
                           ), axis=0)







In [27]:
print test_data.shape

(4096, 3)


In [28]:
train_X = train_data[:,:-1]
train_Y = train_data[:,-1]

test_X = test_data[:,:-1]
test_Y = test_data[:,-1]

In [29]:
from sklearn.model_selection import train_test_split
# FOR CROSS VALIDATION WITH SPLIT 40%
X_train, X_test, Y_train, Y_test = train_test_split(train_X, train_Y, test_size=0.4, random_state=0)

In [30]:
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.model_selection import cross_val_predict
from sklearn import svm, datasets
from sklearn.metrics import confusion_matrix
from sklearn.kernel_approximation import RBFSampler
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestCentroid
from sklearn.ensemble import RandomForestClassifier

classifiers = [ 
                    DecisionTreeClassifier(),
                    KNeighborsClassifier(n_neighbors=9),
                    GaussianNB(),
                    MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=3),
                    NearestCentroid(),
                    RandomForestClassifier()
                    #SVC(decision_function_shape='ovo'), 
                    #SVC(kernel='linear', C=1),
                  ]

In [31]:
for clf in classifiers:
        print ""
        print clf
#         clf.fit(train_X, train_Y)
 
        predicted = cross_val_predict(clf, train_X, train_Y, cv=10)
        print("Cross-validation accuracy: ", metrics.accuracy_score(train_Y, predicted))


DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')
('Cross-validation accuracy: ', 0.51871744791666663)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=9, p=2,
           weights='uniform')
('Cross-validation accuracy: ', 0.50813802083333337)

GaussianNB(priors=None)
('Cross-validation accuracy: ', 0.52970377604166663)

MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(5, 2), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=3, shuffle=True,
      