In [70]:
import mne
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import itertools
from scipy import signal
from scipy.stats import entropy
mne.utils.use_log_level('error')

<mne.utils.use_log_level at 0x1297d9860>

In [71]:
#Add new featres in feature list and again in getFeatures method's "flist"
feature_list = ['Mean', 'Median', 'StdDev','CoV','Skew','Kurt']
columns = ['Subject','Test', 'Channel', *feature_list]

In [93]:
#subject = subject number
#testtype = VR or Non-VR
def loadRawData(subject, testtype):
    path = "Preprocessed_Data/Subject-"+str(subject)+"_"+testtype+".fif"
    raw = mne.io.read_raw_fif(path, verbose='error')
    return raw

def getFeatures(raw, time):
    features = pd.DataFrame(columns = feature_list)
    for i in range(time[0],time[1],5): 
        start, stop = raw.time_as_index([i, i+5])
        
        #only looking at CH 4
        picks = mne.pick_types(raw.info, eeg=True, exclude=['CH 1','CH 2','CH 3','CH 5','CH 6','CH 7','CH 8'])
        
        try:    
            d, t = raw[picks[:], start:stop]
            ds = pd.DataFrame(d[0])
            #normalize
            #ds = (ds-ds.mean())/(ds.max()-ds.min())
            
            mean = ds.mean()[0]
            median = ds.median()[0]
            std = ds.std()[0]
            mos = mean/std
            skew = ds.skew()[0]
            kurt = ds.kurt()[0]
            
            #add new features here too 
            flist = [mean, median, std, mos, skew, kurt]
            features = features.append(pd.Series(flist, index=feature_list), ignore_index=True)
        except:
            continue
    return features

def getDataFrame(f, data, s):
    for j in range(len(f)):
        fl = [None]*len(feature_list)
        k=0
        for feature in feature_list:
            fl[k] = f[feature][j]
            k+=1
        data = data.append(pd.Series([s, 'VR', 'CH 4', *fl], index=columns), ignore_index=True)
    return data


def getDiff(df):
    diffDF = df.copy()
    if('Subject' in df.columns):
        diffDF.drop(['Subject'], axis=1, inplace=True)
        diffDF.drop(['Test'], axis=1, inplace=True)
        diffDF.drop(['Channel'], axis=1, inplace=True)
    diffDF = diffDF.diff().dropna()
    return diffDF

In [73]:
#Test 1
#testtype = VR or Non-VR
df1 = pd.DataFrame(columns = columns)
for i in range(1,33):
    data = loadRawData(i,"VR")
    f = getFeatures(raw=data, time=(60,120))
    df1 = getDataFrame(f,df1,i)

In [74]:
df = df1

In [75]:
df.head()

Unnamed: 0,Subject,Test,Channel,Mean,Median,StdDev,CoV,Skew,Kurt
0,1,VR,CH 4,0.016971,-0.569254,7.855022,0.00216,0.756513,4.314372
1,1,VR,CH 4,0.073224,-0.227175,9.066625,0.008076,0.399206,2.627498
2,1,VR,CH 4,-0.05097,-0.411724,7.046158,-0.007234,1.086018,4.213718
3,1,VR,CH 4,0.038742,-0.453701,7.845483,0.004938,1.220575,4.716822
4,1,VR,CH 4,-0.176283,-0.600765,8.047743,-0.021905,0.777629,3.14139


In [76]:
#feature_list = ['Mean', 'Median', 'StdDev','CoV','Skew','Kurt', 
#                'Dif1 Mean', 'Dif1 Median', 'Dif1 StdDev','Dif1 CoV','Dif1 Skew','Dif1 Kurt',
#                'Dif2 Mean', 'Dif2 Median', 'Dif2 StdDev','Dif2 CoV','Dif2 Skew','Dif2 Kurt']
#columns = ['Subject','Test', 'Channel', *feature_list]

#df = pd.concat([df1, df2, df3], axis=1)
#df.columns = columns
#df = df.dropna()

In [77]:
#with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    #print(df)

In [78]:
def distanceCalculation(df):
    subs = df['Subject'].unique()    # All subjects
    all_subs= list(itertools.combinations(subs, 2)) # All possible combination for all subjects

    distance_col = ['Subject',*feature_list, 'Type']
    intra_data = pd.DataFrame(columns = distance_col)

    #Intra Distance Computation (Same Person)
    for sub in subs:
        rows = df.loc[df['Subject'] == sub]
        each_comb = list(itertools.combinations(rows.index, 2))
        for i in range(len(each_comb)):
            comb = each_comb[i]
            fdr = absDistance(df, feature_list, comb[0], comb[1])
            intra_data = intra_data.append(pd.Series([sub,*fdr,0], index=distance_col), ignore_index=True)
    
    inter_data = pd.DataFrame(columns = distance_col)
    # Inter Distance Computation (Different Person) 
    all_rows=len(df)
    for sub_pair in all_subs: # Pairs of subjets
        sp1 = df.loc[df['Subject'] == sub_pair[0]].index
        sp2 = df.loc[df['Subject'] == sub_pair[1]].index
        for i in range(len(sp1)):
            for j in range(len(sp2)):
                fdr = absDistance(df, feature_list, sp1[i], sp2[j])
                inter_data = inter_data.append(pd.Series([sub_pair, *fdr, 1], index=distance_col), ignore_index=True)    
    return intra_data, inter_data

def absDistance(df, features, s1, s2):
    r=0
    fdr = [None]*len(features)
    for feature in features:
        f1 = df.iloc[s1][feature]
        f2 = df.iloc[s2][feature]
        Inter_dis = np.absolute(f1-f2) # absolute difference
        fdr[r] = Inter_dis
        r+=1
    return fdr

In [79]:
#15 min to run (6 fetures), 18 min (8 features) - 4/17 - 15 minutes (18 features)
intra1, inter1 = distanceCalculation(df)


In [80]:
print("Intra length: "+str(len(intra1)))
print("Inter length: "+str(len(inter1)))

Intra length: 1914
Inter length: 58464


In [82]:
import random
random.seed(1234)
#takes subframe and returns a more managble table for SVM
def get_SVM_Table(intra, inter):
    svmTable = pd.DataFrame()
    rands = random.sample(range(0, len(intra)), 1900)
    for rand in rands:
        svmTable = svmTable.append(intra.iloc[rand],ignore_index=True)
    
    rands = random.sample(range(0, len(inter)), 1900)
    for rand in rands:
        svmTable = svmTable.append(inter.iloc[rand],ignore_index=True)
    return svmTable

In [83]:
svm1 = get_SVM_Table(intra1,inter1)

In [84]:
#all_svm = pd.concat([svm1, svm2], ignore_index=True)

In [85]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [159]:
def svmTest(svm):
    X = svm.drop(['Subject','Type'], axis=1)
    y = svm["Type"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)
    
    model = SVC(C=1.0, kernel = 'rbf', degree=3, gamma='auto')
    model.fit(X_train, y_train)
    prediction = model.predict(X_test)
    confusionMatrix = confusion_matrix(y_test, prediction)
    print(confusionMatrix)
    print(classification_report(y_test, prediction))
    print("Accuracy: "+str(accuracy_score(y_test, prediction)))
    return confusionMatrix, model

In [172]:
cm, model = svmTest(svm1)

[[444 129]
 [194 373]]
              precision    recall  f1-score   support

         0.0       0.70      0.77      0.73       573
         1.0       0.74      0.66      0.70       567

   micro avg       0.72      0.72      0.72      1140
   macro avg       0.72      0.72      0.72      1140
weighted avg       0.72      0.72      0.72      1140

Accuracy: 0.7166666666666667


In [173]:
TN = cm[0][0]
FN = cm[1][0]
TP = cm[1][1]
FP = cm[0][1]
sums = TN+TP+FN+FP

acc = (TN+TP)/sums

print('False Acceptance: '+str(FP/sums))
print('False Rejection: '+str(FN/sums))
print(acc)

False Acceptance: 0.11315789473684211
False Rejection: 0.17017543859649123
0.7166666666666667


In [150]:
#df1 = pd.DataFrame(columns = columns)
#data = loadRawData(12,"VR")
#f = getFeatures(raw=data, time=(140,145))
#df1 = getDataFrame(f,df1,12)

#df2 = pd.DataFrame(columns = columns)
#data = loadRawData(29,"VR")
#f = getFeatures(raw=data, time=(145,150))
#df2 = getDataFrame(f,df2,29)

In [151]:
def classify(test):
    if(model.predict([test]))==0:
        return True
    else:
        return False

In [152]:
#dft = pd.DataFrame(columns = columns)
#dft = pd.concat([df1,df2], ignore_index=True)

In [153]:
#test = absDistance(dft, feature_list, 0, 1)
#test

In [154]:
#print(classify(test))

In [155]:
#dft