In [1]:
import mne
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import itertools
from scipy import signal
from scipy.stats import entropy
mne.utils.use_log_level('error')

<mne.utils.use_log_level at 0x120199828>

In [2]:
#Add new featres in feature list and again in getFeatures method's "flist"
feature_list = ['Mean', 'Median', 'StdDev','Mean/StdDev','Skew','Kurt', 'PSD Mean', 'PSE']
columns = ['Subject','Test', 'Channel', *feature_list]

In [22]:
#subject = subject number
#testtype = VR or Non-VR
def loadRawData(subject, testtype):
    path = "Preprocessed_Data/Subject-"+str(subject)+"_"+testtype+".fif"
    raw = mne.io.read_raw_fif(path, verbose='error')
    return raw

def getFeatures(raw, time):
    features = pd.DataFrame(columns = feature_list)
    for i in range(time[0],time[1],5): 
        start, stop = raw.time_as_index([i, i+5])
        
        #only looking at CH 4
        picks = mne.pick_types(raw.info, eeg=True, exclude=['CH 1','CH 2','CH 3','CH 5','CH 6','CH 7','CH 8'])
        
        try:    
            d, t = raw[picks[:], start:stop]
            ds = pd.DataFrame(d[0])
            #normalize
            #ds = (ds-ds.mean())/(ds.max()-ds.min())
            
            mean = ds.mean()[0]
            median = ds.median()[0]
            std = ds.std()[0]
            mos = mean/std
            skew = ds.skew()[0]
            kurt = ds.kurt()[0]
            freqs, psd = signal.welch(ds[0], 100, nperseg=200) #nperseg is 2*sf
            
            psd_mean = psd[:61].mean()  
            pse = entropy(psd)
            
            #add new features here too 
            flist = [mean, median, std, mos, skew, kurt, psd_mean, pse]
            features = features.append(pd.Series(flist, index=feature_list), ignore_index=True)
        except:
            continue
    return features

def getDataFrame(f, data):
    for j in range(len(f)):
        fl = [None]*len(feature_list)
        k=0
        for feature in feature_list:
            fl[k] = f[feature][j]
            k+=1
        data = data.append(pd.Series([i, 'VR', 'CH 4', *fl], index=columns), ignore_index=True)
    return data

In [29]:
#Test 1
#testtype = VR or Non-VR
df1 = pd.DataFrame(columns = columns)
df2 = pd.DataFrame(columns = columns)
for i in range(1,33):
    data = loadRawData(i,"VR")
    f = getFeatures(raw=data, time=(55,110))
    df1 = getDataFrame(f,df1)
    
    f = getFeatures(raw=data, time=(110,165))
    df2 = getDataFrame(f,df2)

In [31]:
df2

Unnamed: 0,Subject,Test,Channel,Mean,Median,StdDev,Mean/StdDev,Skew,Kurt,PSD Mean,PSE
0,1,VR,CH 4,-0.020631,-0.716390,7.532904,-0.002739,1.501903,5.410669,1.894352,3.777068
1,1,VR,CH 4,-0.061582,-0.550611,7.926098,-0.007770,0.852723,2.609989,2.054986,3.598592
2,1,VR,CH 4,0.023579,0.064222,9.579325,0.002461,0.070000,0.620118,2.932391,3.514742
3,1,VR,CH 4,0.055466,-0.838362,9.691808,0.005723,1.232621,4.967096,3.199319,3.436080
4,1,VR,CH 4,-0.036695,-0.361576,7.943127,-0.004620,0.772337,2.623669,2.253540,3.538570
5,1,VR,CH 4,-0.032738,-0.406328,9.104772,-0.003596,0.767533,3.957892,2.814384,3.446087
6,1,VR,CH 4,0.053810,-0.188338,9.780690,0.005502,0.059022,1.793608,3.523189,3.209330
7,1,VR,CH 4,0.002187,-0.260940,9.685363,0.000226,0.417638,1.346142,3.437854,3.436168
8,1,VR,CH 4,-0.042562,-0.700742,8.348107,-0.005098,0.401145,2.583479,2.314133,3.557519
9,1,VR,CH 4,0.024441,-0.115121,9.288553,0.002631,0.586502,4.074112,2.647119,3.476713


In [None]:
def distanceCalculation(df):
    subs = df['Subject'].unique()    # All subjects
    all_subs= list(itertools.combinations(subs, 2)) # All possible combination for all subjects

    distance_col = [*columns, 'Type']
    intra_data = pd.DataFrame(columns = distance_col)

    #Intra Distance Computation (Same Person)
    for sub in subs:
        rows = df.loc[df['Subject'] == sub]
        each_comb = list(itertools.combinations(rows.index, 2))
        for i in range(len(each_comb)):
            comb = each_comb[i]
            fdr = absDistance(df, feature_list, comb[0], comb[1])
            intra_data = intra_data.append(pd.Series([sub,*fdr,0], index=columns), ignore_index=True)
    
    inter_data = pd.DataFrame(columns = distance_col)
    # Inter Distance Computation (Different Person) 
    all_rows=len(df)
    for sub_pair in all_subs: # Pairs of subjets
        sp1 = df.loc[df['Subject'] == sub_pair[0]].index
        sp2 = df.loc[df['Subject'] == sub_pair[1]].index
        for i in range(len(sp1)):
            for j in range(len(sp2)):
                fdr = absDistance(df, feature_list, sp1[i], sp2[j])
                inter_data = inter_data.append(pd.Series([sub_pair, *fdr, 1], index=columns), ignore_index=True)    
    return intra_data, inter_data

def absDistance(df, features, s1, s2):
    r=0
    fdr = [None]*len(features)
    for feature in features:
        f1 = df.iloc[s1][feature] 
        f2 = df.iloc[s2][feature] 
        Inter_dis = np.absolute(f1-f2) # absolute difference
        fdr[r] = Inter_dis
        r+=1
    return fdr

In [None]:
#15 min to run both (6 fetures), 18 min (8 features) - 4/17
intra1, inter1 = distanceCalculation(df1)
intra2, inter2 = distanceCalculation(df2)

In [None]:
print("Sample 1:")
print("Intra length: "+str(len(intra1)))
print("Inter length: "+str(len(inter1)))
print("Sample 2:")
print("Intra length: "+str(len(intra2)))
print("Inter length: "+str(len(inter2)))

In [None]:
import random
#takes subframe and returns a more managble table for SVM
def get_SVM_Table(intra, inter):
    svmTable = pd.DataFrame()
    rands = random.sample(range(0, len(intra)), 1500)
    for rand in rands:
        svmTable = svmTable.append(intra.iloc[rand],ignore_index=True)
    
    rands = random.sample(range(0, len(inter)), 1500)
    for rand in rands:
        svmTable = svmTable.append(inter.iloc[rand],ignore_index=True)
    return svmTable

In [None]:
svm1 = get_SVM_Table(intra1,inter1)
svm2 = get_SVM_Table(intra2,inter2)

In [None]:
all_svm = pd.concat([svm1, svm2], ignore_index=True)

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [None]:
def svmTest(svm):
    X = svm.drop(['Subject','Type','PSD Mean'], axis=1)
    y = svm["Type"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)
    
    model = SVC(C=1.0, kernel = 'rbf', degree=3, gamma=0.1)
    model.fit(X_train, y_train)
    prediction = model.predict(X_test)
    print(confusion_matrix(y_test, prediction))
    print(classification_report(y_test, prediction))
    print("Accuracy: "+str(accuracy_score(y_test, prediction)))

In [None]:
print('Test 1:')
svmTest(svm1)
print('')
print('Test 2:')
svmTest(svm2)
print('')
print('Test All:')
svmTest(all_svm)