In [11]:
import mne
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import itertools
from scipy import signal
from scipy.stats import entropy
mne.utils.use_log_level('error')

<mne.utils.use_log_level at 0xd1b27b8>

In [12]:
#Add new featres in feature list and again in getFeatures method's "flist"
feature_list = ['Mean', 'Median', 'StdDev','CoV','Skew','Kurt']
columns = ['Subject','Test', 'Channel', *feature_list]

In [13]:
#subject = subject number
#testtype = VR or Non-VR
def loadRawData(subject, testtype):
    path = "Preprocessed_Data/Subject-"+str(subject)+"_"+testtype+".fif"
    raw = mne.io.read_raw_fif(path, verbose='error')
    return raw

def getFeatures(raw, time, segment=10):
    features = pd.DataFrame(columns = feature_list)
    inc = (int)(segment*.25)
    for i in range(time[0],time[1],inc): 
        end = i+inc
        if(i+inc>time[1]):
            end = time[1]           
        start, stop = raw.time_as_index([i, end])
        
        #only looking at CH 4
        picks = mne.pick_types(raw.info, eeg=True, exclude=['CH 1','CH 2','CH 3','CH 5','CH 6','CH 7','CH 8'])
        
        try:    
            d, t = raw[picks[:], start:stop]
            ds = pd.DataFrame(d[0])
            #normalize
            #ds = (ds-ds.mean())/(ds.max()-ds.min())
            
            mean = ds.mean()[0]
            median = ds.median()[0]
            std = ds.std()[0]
            mos = mean/std
            skew = ds.skew()[0]
            kurt = ds.kurt()[0]
            
            #add new features here too 
            flist = [mean, median, std, mos, skew, kurt]
            features = features.append(pd.Series(flist, index=feature_list), ignore_index=True)
        except:
            continue
    return features

def getDataFrame(f, data):
    for j in range(len(f)):
        fl = [None]*len(feature_list)
        k=0
        for feature in feature_list:
            fl[k] = f[feature][j]
            k+=1
        data = data.append(pd.Series([i, 'VR', 'CH 4', *fl], index=columns), ignore_index=True)
    return data


def getDiff(df):
    diffDF = df.copy()
    if('Subject' in df.columns):
        diffDF.drop(['Subject'], axis=1, inplace=True)
        diffDF.drop(['Test'], axis=1, inplace=True)
        diffDF.drop(['Channel'], axis=1, inplace=True)
    diffDF = diffDF.diff().dropna()
    diffDF = diffDF.applymap(np.abs)
    return diffDF

In [14]:
#Test 1
#testtype = VR or Non-VR
df1 = pd.DataFrame(columns = columns)
for i in range(1,33):
    data = loadRawData(i,"VR")
    f = getFeatures(raw=data, time=(60,120))
    df1 = getDataFrame(f,df1)

In [15]:
df2 = getDiff(df1)
df3 = getDiff(df2)

df1.reset_index(drop=True, inplace=True)
df2.reset_index(drop=True, inplace=True)
df3.reset_index(drop=True, inplace=True)


In [16]:
len(df1)/29

30.0

In [17]:
feature_list = ['Mean', 'Median', 'StdDev','CoV','Skew','Kurt', 
                'Dif1 Mean', 'Dif1 Median', 'Dif1 StdDev','Dif1 CoV','Dif1 Skew','Dif1 Kurt',
                'Dif2 Mean', 'Dif2 Median', 'Dif2 StdDev','Dif2 CoV','Dif2 Skew','Dif2 Kurt']
columns = ['Subject','Test', 'Channel', *feature_list]

df = pd.concat([df1, df2, df3], axis=1)
df.columns = columns
df = df.dropna()

In [18]:
#with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    #print(df)
df.head()

Unnamed: 0,Subject,Test,Channel,Mean,Median,StdDev,CoV,Skew,Kurt,Dif1 Mean,...,Dif1 StdDev,Dif1 CoV,Dif1 Skew,Dif1 Kurt,Dif2 Mean,Dif2 Median,Dif2 StdDev,Dif2 CoV,Dif2 Skew,Dif2 Kurt
0,1,VR,CH 4,-0.016555,-0.853062,7.532125,-0.002198,1.284384,6.718239,0.080915,...,1.264397,0.009514,1.109892,4.897208,0.025475,0.213015,0.680844,0.003937,0.015292,2.38697
1,1,VR,CH 4,0.064359,-0.30938,8.796523,0.007316,0.174492,1.821031,0.10639,...,1.945242,0.013451,1.0946,2.510239,0.201895,0.012938,2.393204,0.016478,0.200489,0.335245
2,1,VR,CH 4,-0.042031,-0.640048,6.851281,-0.006135,1.269092,4.331269,0.308285,...,4.338446,0.029929,0.894111,2.845484,0.004509,0.046104,0.516354,0.000182,0.582203,0.592728
3,1,VR,CH 4,0.266254,-0.296442,11.189727,0.023795,0.374981,1.485785,0.312794,...,3.822092,0.030111,0.311909,2.252756,0.208722,0.190916,2.962448,0.013286,1.267148,1.101471
4,1,VR,CH 4,-0.04654,0.093268,7.367635,-0.006317,0.063072,3.738542,0.104072,...,0.859644,0.016826,1.579057,3.354227,0.011449,0.36724,0.290256,0.001256,0.576975,2.44241


In [27]:
import random
random.seed(14)

def distanceCalculation(df):
    subs = df['Subject'].unique()    # All subjects
    all_subs= list(itertools.combinations(subs, 2)) # All possible combination for all subjects

    distance_col = ['Subject',*feature_list, 'Type']
    intra_data = pd.DataFrame(columns = distance_col)

    #Intra Distance Computation (Same Person)
    for sub in subs:
        rows = df.loc[df['Subject'] == sub]
        each_comb = list(itertools.combinations(rows.index, 2))
        for i in range(len(each_comb)):
            comb = each_comb[i]
            fdr = absDistance(df, feature_list, comb[0], comb[1])
            intra_data = intra_data.append(pd.Series([sub,*fdr,0], index=distance_col), ignore_index=True)
    
    #Inter Distance Computation (Different Person)
    inter_data = pd.DataFrame(columns = distance_col)
    inter_total = len(intra_data)*2
    while(len(inter_data)<inter_total):
        v1 = random.randint(0, len(df)-1)
        v2 = random.randint(0, len(df)-1)
        while(df.iloc[v1]['Subject']==df.iloc[v2]['Subject']):
            v1 = random.randint(0, len(df)-1)
            v2 = random.randint(0, len(df)-1)      
        fdr = absDistance(df, feature_list, v1, v2)
        sub_pair = (df.iloc[v1]['Subject'],df.iloc[v2]['Subject'])
        inter_data = inter_data.append(pd.Series([sub_pair, *fdr, 1], index=distance_col), ignore_index=True)    
    return intra_data, inter_data

def absDistance(df, features, s1, s2):
    r=0
    fdr = [None]*len(features)
    for feature in features:
        f1 = df.iloc[s1][feature] 
        f2 = df.iloc[s2][feature] 
        Inter_dis = np.absolute(f1-f2) # absolute difference
        fdr[r] = Inter_dis
        r+=1
    return fdr

In [None]:
#15 min to run (6 fetures), 18 min (8 features) - 4/17 - 15 minutes (18 features)
intra1, inter1 = distanceCalculation(df)


In [None]:
print("Intra length: "+str(len(intra1)))
print("Inter length: "+str(len(inter1)))

In [127]:
random.seed(1234)
#takes subframe and returns a more managble table for SVM
def get_SVM_Table(intra, inter):
    svmTable = pd.DataFrame()
    len1 = (len(intra)//100)*100
    len2 = (len1/2)*3
    rands = random.sample(range(0, len(intra)), len1)
    for rand in rands:
        svmTable = svmTable.append(intra.iloc[rand],ignore_index=True)
    
    rands = random.sample(range(0, len(inter)), len2)
    for rand in rands:
        svmTable = svmTable.append(inter.iloc[rand],ignore_index=True)
    return svmTable

In [128]:
svm1 = get_SVM_Table(intra1,inter1)

In [129]:
#all_svm = pd.concat([svm1, svm2], ignore_index=True)

In [130]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [131]:
def svmTest(svm):
    X = svm.drop(['Subject','Type'], axis=1)
    y = svm["Type"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)
    
    model = SVC(C=1.0, kernel = 'rbf', degree=3, gamma='auto')
    model.fit(X_train, y_train)
    prediction = model.predict(X_test)
    confusionMatrix = confusion_matrix(y_test, prediction)
    print(confusion_matrix(y_test, prediction))
    print(classification_report(y_test, prediction))
    print("Accuracy: "+str(accuracy_score(y_test, prediction)))
    return confusionMatrix, model

In [132]:
print('Test 1:')
vr_cm, vr_model = svmTest(svm1)

Test 1:
[[1027  484]
 [ 434 1805]]
              precision    recall  f1-score   support

         0.0       0.70      0.68      0.69      1511
         1.0       0.79      0.81      0.80      2239

   micro avg       0.76      0.76      0.76      3750
   macro avg       0.75      0.74      0.74      3750
weighted avg       0.75      0.76      0.75      3750

Accuracy: 0.7552


In [133]:
TN = vr_cm[0][0]
FN = vr_cm[1][0]
TP = vr_cm[1][1]
FP = vr_cm[0][1]
sums = TN+TP+FN+FP

acc = (TN+TP)/sums

print('False Acceptance: '+str(FP/sums))
print('False Rejection: '+str(FN/sums))
print(acc)

False Acceptance: 0.12906666666666666
False Rejection: 0.11573333333333333
0.7552
