In [277]:
import mne
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
mne.utils.use_log_level('error')

<mne.utils.use_log_level at 0x129df1d30>

In [278]:
#subject = subject number
#testtype = VR or Non-VR
def loadRawData(subject, testtype):
    path = "Preprocessed_Data/Subject-"+str(subject)+"_"+testtype+".fif"
    raw = mne.io.read_raw_fif(path, verbose='error')
    return raw

def getFeatures(raw):
    col = ['Mean', 'Median', 'StdDev','Mean/StdDev','Skew','Kurt']
    features = pd.DataFrame(columns = col)
    for i in range(55,165,10): 
        start, stop = raw.time_as_index([i, i+5])
        
        #only looking at CH 4
        picks = mne.pick_types(raw.info, eeg=True, exclude=['CH 1','CH 2','CH 3','CH 5','CH 6','CH 7','CH 8'])
        
        try:    
            d, t = raw[picks[:], start:stop]
            ds = pd.DataFrame(d[0])
            
            #normalize
            #ds = (ds-ds.mean())/(ds.max()-ds.min())
            
            mean = ds.mean()[0]
            median = ds.median()[0]
            std = ds.std()[0]
            mos = mean/std
            skew = ds.skew()[0]
            kurt = ds.kurt()[0]
            features = features.append(pd.Series([mean, median, std, mos, skew, kurt], index=col), ignore_index=True)
        except:
            continue
    return features

In [279]:
columns = ['Subject','Test', 'Channel', 'Mean', 'Median', 'StdDev','Mean/StdDev','Skew','Kurt']

In [280]:
#testtype = VR or Non-VR
df = pd.DataFrame(columns = columns)
for i in range(1,33):
    data = loadRawData(i,"Non-VR")
    f = getFeatures(data)
    l = len(f)
    for j in range(l):
        df = df.append(pd.Series([i, 'Non-VR', 'CH 4', f['Mean'][j], f['Median'][j], f['StdDev'][j], f['Mean/StdDev'][j], f['Skew'][j], f['Kurt'][j]], index=columns), ignore_index=True)

In [281]:
#with pd.option_context('display.max_rows', None, 'display.max_columns', None):
#    print(df)

In [282]:
x = "Mean/StdDev"
y = "Median"
c = "Subject"
#df.plot.scatter(x=x, y=y, c=c, cmap='rainbow', figsize=[10,10])

In [283]:
import itertools

In [284]:
subs = df['Subject'].unique()    # All subjects
all_subs= list(itertools.combinations(subs, 2)) # All possible combination for all subjects

columns = ['Subject', 'Mean', 'Median', 'StdDev','Mean/StdDev','Skew','Kurt', 'Type']
features = ['Mean', 'Median', 'StdDev','Mean/StdDev','Skew','Kurt']
intra_data = pd.DataFrame(columns = columns)

#Intra Distance Computation (Same Person)
for sub in subs:
    rows = df.loc[df['Subject'] == sub]
    each_comb = list(itertools.combinations(rows.index, 2))
    for i in range(len(each_comb)):
        comb = each_comb[i]
        r=0
        fdr = [None]*6
        for feature in features:
            f1= df.iloc[comb[0]][feature]
            f2 = df.iloc[comb[1]][feature]
            Intra_dis = np.absolute(f1-f2) # absolute difference formula
            fdr[r] = Intra_dis
            r+=1
        intra_data = intra_data.append(pd.Series([sub,fdr[0],fdr[1],fdr[2],fdr[3],fdr[4],fdr[5],0], index=columns), ignore_index=True)

In [285]:
inter_data = pd.DataFrame(columns = columns)

# Inter Distance Computation (Different Person) 
all_rows=len(df)
for sub_pair in all_subs: # Pairs of subjets
    sp1 = df.loc[df['Subject'] == sub_pair[0]].index
    sp2 = df.loc[df['Subject'] == sub_pair[1]].index
    for i in range(len(sp1)):
        for j in range(len(sp2)):
            r=0
            fdr = [None]*6
            for feature in features:
                f1 = df.iloc[sp1[i]][feature] 
                f2 = df.iloc[sp2[j]][feature] 
                Inter_dis = np.absolute(f1-f2) # absolute difference formula
                fdr[r] = Inter_dis
                r+=1
            inter_data = inter_data.append(pd.Series([sub_pair, fdr[0],fdr[1],fdr[2],fdr[3],fdr[4],fdr[5], 1], index=columns), ignore_index=True)

In [286]:
#5 second segments are *prohibitively long, 196,000 inter data and took over an hour
print("Intra length: "+str(len(intra_data)))
print("Inter length: "+str(len(inter_data)))

Intra length: 1595
Inter length: 49126


In [287]:
intra_data

Unnamed: 0,Subject,Mean,Median,StdDev,Mean/StdDev,Skew,Kurt,Type
0,1.0,0.065975,0.080665,0.546298,0.006713,0.175279,1.342909,0.0
1,1.0,0.013381,0.100218,0.776321,0.001934,0.643952,3.163573,0.0
2,1.0,0.042419,0.108157,1.687331,0.005033,0.085419,0.664817,0.0
3,1.0,0.088414,0.007083,0.750863,0.008801,0.192665,0.598961,0.0
4,1.0,0.100918,0.469482,0.109249,0.011292,0.033216,0.304332,0.0
5,1.0,0.166174,0.013972,0.474042,0.019275,1.080959,4.653585,0.0
6,1.0,0.067410,0.053578,0.078264,0.007515,0.586329,2.621993,0.0
7,1.0,0.133120,0.459462,0.554605,0.014102,0.301855,0.157904,0.0
8,1.0,0.010114,0.123618,0.232436,0.001065,0.360650,1.248571,0.0
9,1.0,0.018313,0.144707,0.007774,0.002030,0.254640,1.501476,0.0


In [288]:
inter_data

Unnamed: 0,Subject,Mean,Median,StdDev,Mean/StdDev,Skew,Kurt,Type
0,"(1, 2)",0.101152,0.354645,0.633117,0.010700,0.765688,3.510582,1
1,"(1, 2)",0.338957,0.566600,236.684461,0.004543,3.902320,30.677867,1
2,"(1, 2)",0.112225,0.258614,33.128860,0.005244,2.657857,8.165087,1
3,"(1, 2)",0.104866,1.305354,27.074810,0.005369,1.812636,1.270056,1
4,"(1, 2)",0.029646,0.699604,0.830731,0.003289,0.869444,3.657114,1
5,"(1, 2)",0.103526,0.804893,84.429794,0.004075,5.210031,38.051268,1
6,"(1, 2)",0.002082,0.237479,0.374460,0.000099,0.792131,3.220317,1
7,"(1, 2)",0.002547,0.029466,0.938419,0.000697,0.925438,4.054272,1
8,"(1, 2)",0.030912,0.007043,0.402912,0.003743,0.780554,4.257179,1
9,"(1, 2)",0.014667,0.511165,0.217926,0.001586,1.125587,3.315135,1


In [289]:
subF1 = pd.DataFrame(columns = columns)
subF2 = pd.DataFrame(columns = columns)
subF3 = pd.DataFrame(columns = columns)
#intra = intra_data.copy()
#inter = inter_data.copy()

In [290]:
#import random
#for i in range(0,len(intra),9):
#    rands = random.sample(range(i, i+8), 3)
#    for rand in rands:
#        subF1 = subF1.append(intra.iloc[rand])
    

In [291]:
#split intra and inter datasets into 3 combined tables
for i in range(len(intra_data)):
    if(i%3==0):
        subF1 = subF1.append(intra_data.iloc[i],ignore_index=True)
    elif(i%3==1):
        subF2 = subF2.append(intra_data.iloc[i],ignore_index=True)
    elif(i%3==2):
        subF3 = subF3.append(intra_data.iloc[i],ignore_index=True)

        
for i in range(len(inter_data)):
    if(i%3==0):
        subF1 = subF1.append(inter_data.iloc[i],ignore_index=True)
    elif(i%3==1):
        subF2 = subF2.append(inter_data.iloc[i],ignore_index=True)
    elif(i%3==2):
        subF3 = subF3.append(inter_data.iloc[i],ignore_index=True)

In [292]:
print(len(intra_data)//3)
print(len(inter_data)//3)

531
16375


In [293]:
import random

#takes subframe and returns a more managble table for SVM
def get_SVM_Table(subF):
    svmTable = pd.DataFrame(columns = columns)
    rands = random.sample(range(0, (len(intra_data)//3)), 500)
    for rand in rands:
        svmTable = svmTable.append(subF.iloc[rand],ignore_index=True)
    
    rands = random.sample(range((len(intra_data)//3), ((len(inter_data)//3)+(len(intra_data)//3))), 800)
    for rand in rands:
        svmTable = svmTable.append(subF.iloc[rand],ignore_index=True)
    return svmTable

In [294]:
svm1 = get_SVM_Table(subF1)
svm2 = get_SVM_Table(subF2)
svm3 = get_SVM_Table(subF3)
all_svm = pd.concat([svm1, svm2, svm3], ignore_index=True)

In [295]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [326]:
def svmTest(svm):
    X = svm.drop(['Subject','Type'], axis=1)
    y = svm["Type"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)
    
    model = SVC(C=1.0, kernel = 'rbf', degree=3, gamma='auto')
    model.fit(X_train, y_train)
    prediction = model.predict(X_test)
    print(confusion_matrix(y_test, prediction))
    print(classification_report(y_test, prediction))
    print("Accuracy: "+str(accuracy_score(y_test, prediction)))

In [327]:
print('Test 1:')
svmTest(svm1)
print('')
print('Test 2:')
svmTest(svm2)
print('')
print('Test 3:')
svmTest(svm3)
print('')
print('Test All:')
svmTest(all_svm)

Test 1:
[[100  63]
 [ 38 189]]
              precision    recall  f1-score   support

         0.0       0.72      0.61      0.66       163
         1.0       0.75      0.83      0.79       227

   micro avg       0.74      0.74      0.74       390
   macro avg       0.74      0.72      0.73       390
weighted avg       0.74      0.74      0.74       390

Accuracy: 0.7410256410256411

Test 2:
[[ 98  65]
 [ 39 188]]
              precision    recall  f1-score   support

         0.0       0.72      0.60      0.65       163
         1.0       0.74      0.83      0.78       227

   micro avg       0.73      0.73      0.73       390
   macro avg       0.73      0.71      0.72       390
weighted avg       0.73      0.73      0.73       390

Accuracy: 0.7333333333333333

Test 3:
[[103  60]
 [ 43 184]]
              precision    recall  f1-score   support

         0.0       0.71      0.63      0.67       163
         1.0       0.75      0.81      0.78       227

   micro avg       0.74      