In [1]:
import mne
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import itertools
from scipy import signal
from scipy.stats import entropy
mne.utils.use_log_level('error')

<mne.utils.use_log_level at 0x10f3a3278>

In [2]:
#Add new featres in feature list and again in getFeatures method's "flist"
feature_list = ['Mean', 'Median', 'StdDev','Mean/StdDev','Skew','Kurt']
columns = ['Subject','Test', 'Channel', *feature_list]

In [3]:
#subject = subject number
#testtype = VR or Non-VR
def loadRawData(subject, testtype):
    path = "Preprocessed_Data/Subject-"+str(subject)+"_"+testtype+".fif"
    raw = mne.io.read_raw_fif(path, verbose='error')
    return raw

def getFeatures(raw, time):
    features = pd.DataFrame(columns = feature_list)
    for i in range(time[0],time[1],5): 
        start, stop = raw.time_as_index([i, i+5])
        
        #only looking at CH 4
        picks = mne.pick_types(raw.info, eeg=True, exclude=['CH 1','CH 2','CH 3','CH 5','CH 6','CH 7','CH 8'])
        
        try:    
            d, t = raw[picks[:], start:stop]
            ds = pd.DataFrame(d[0])
            #normalize
            #ds = (ds-ds.mean())/(ds.max()-ds.min())
            
            mean = ds.mean()[0]
            median = ds.median()[0]
            std = ds.std()[0]
            mos = mean/std
            skew = ds.skew()[0]
            kurt = ds.kurt()[0]
            
            #add new features here too 
            flist = [mean, median, std, mos, skew, kurt]
            features = features.append(pd.Series(flist, index=feature_list), ignore_index=True)
        except:
            continue
    return features

def getDataFrame(f, data):
    for j in range(len(f)):
        fl = [None]*len(feature_list)
        k=0
        for feature in feature_list:
            fl[k] = f[feature][j]
            k+=1
        data = data.append(pd.Series([i, 'VR', 'CH 4', *fl], index=columns), ignore_index=True)
    return data

In [4]:
#Test 1
#testtype = VR or Non-VR
df1 = pd.DataFrame(columns = columns)
df2 = pd.DataFrame(columns = columns)
for i in range(1,33):
    data = loadRawData(i,"VR")
    f = getFeatures(raw=data, time=(55,110))
    df1 = getDataFrame(f,df1)
    
    f = getFeatures(raw=data, time=(110,165))
    df2 = getDataFrame(f,df2)

In [5]:
def distanceCalculation(df):
    subs = df['Subject'].unique()    # All subjects
    all_subs= list(itertools.combinations(subs, 2)) # All possible combination for all subjects

    distance_col = ['Subject',*feature_list, 'Type']
    intra_data = pd.DataFrame(columns = distance_col)

    #Intra Distance Computation (Same Person)
    for sub in subs:
        rows = df.loc[df['Subject'] == sub]
        each_comb = list(itertools.combinations(rows.index, 2))
        for i in range(len(each_comb)):
            comb = each_comb[i]
            fdr = absDistance(df, feature_list, comb[0], comb[1])
            intra_data = intra_data.append(pd.Series([sub,*fdr,0], index=distance_col), ignore_index=True)
    
    inter_data = pd.DataFrame(columns = distance_col)
    # Inter Distance Computation (Different Person) 
    all_rows=len(df)
    for sub_pair in all_subs: # Pairs of subjets
        sp1 = df.loc[df['Subject'] == sub_pair[0]].index
        sp2 = df.loc[df['Subject'] == sub_pair[1]].index
        for i in range(len(sp1)):
            for j in range(len(sp2)):
                fdr = absDistance(df, feature_list, sp1[i], sp2[j])
                inter_data = inter_data.append(pd.Series([sub_pair, *fdr, 1], index=distance_col), ignore_index=True)    
    return intra_data, inter_data

def absDistance(df, features, s1, s2):
    r=0
    fdr = [None]*len(features)
    for feature in features:
        f1 = df.iloc[s1][feature] 
        f2 = df.iloc[s2][feature] 
        Inter_dis = np.absolute(f1-f2) # absolute difference
        fdr[r] = Inter_dis
        r+=1
    return fdr

In [6]:
#15 min to run both (6 fetures), 18 min (8 features) - 4/17
intra1, inter1 = distanceCalculation(df1)
intra2, inter2 = distanceCalculation(df2)

In [7]:
print("Sample 1:")
print("Intra length: "+str(len(intra1)))
print("Inter length: "+str(len(inter1)))
print("Sample 2:")
print("Intra length: "+str(len(intra2)))
print("Inter length: "+str(len(inter2)))

Sample 1:
Intra length: 1595
Inter length: 49126
Sample 2:
Intra length: 1595
Inter length: 49126


In [8]:
import random
#takes subframe and returns a more managble table for SVM
def get_SVM_Table(intra, inter):
    svmTable = pd.DataFrame()
    rands = random.sample(range(0, len(intra)), 1500)
    for rand in rands:
        svmTable = svmTable.append(intra.iloc[rand],ignore_index=True)
    
    rands = random.sample(range(0, len(inter)), 1500)
    for rand in rands:
        svmTable = svmTable.append(inter.iloc[rand],ignore_index=True)
    return svmTable

In [9]:
svm1 = get_SVM_Table(intra1,inter1)
svm2 = get_SVM_Table(intra2,inter2)

In [10]:
all_svm = pd.concat([svm1, svm2], ignore_index=True)

In [11]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [12]:
def svmTest(svm):
    X = svm.drop(['Subject','Type'], axis=1)
    y = svm["Type"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)
    
    model = SVC(C=1.0, kernel = 'rbf', degree=3, gamma=0.1)
    model.fit(X_train, y_train)
    prediction = model.predict(X_test)
    print(confusion_matrix(y_test, prediction))
    print(classification_report(y_test, prediction))
    print("Accuracy: "+str(accuracy_score(y_test, prediction)))

In [13]:
print('Test 1:')
svmTest(svm1)
print('')
print('Test 2:')
svmTest(svm2)
print('')
print('Test All:')
svmTest(all_svm)

Test 1:
[[330 148]
 [117 305]]
              precision    recall  f1-score   support

         0.0       0.74      0.69      0.71       478
         1.0       0.67      0.72      0.70       422

   micro avg       0.71      0.71      0.71       900
   macro avg       0.71      0.71      0.71       900
weighted avg       0.71      0.71      0.71       900

Accuracy: 0.7055555555555556

Test 2:
[[368 110]
 [111 311]]
              precision    recall  f1-score   support

         0.0       0.77      0.77      0.77       478
         1.0       0.74      0.74      0.74       422

   micro avg       0.75      0.75      0.75       900
   macro avg       0.75      0.75      0.75       900
weighted avg       0.75      0.75      0.75       900

Accuracy: 0.7544444444444445

Test All:
[[668 240]
 [260 632]]
              precision    recall  f1-score   support

         0.0       0.72      0.74      0.73       908
         1.0       0.72      0.71      0.72       892

   micro avg       0.72    