In [1]:
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix


In [2]:
df = pd.read_csv("dataset_eeg.csv")

In [3]:
df.drop(['Unnamed: 0'],axis=1, inplace=True)

In [4]:
df = df.set_index(['id_user','session','video'])

In [5]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0,1,2,3,4,5,6,7,8,9,...,1410,1411,1412,1413,1414,1415,1416,1417,1418,emotion
id_user,session,video,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
10,1,1,0.000459,0.002594,0.001768,0.000890,0.000771,0.000439,0.000206,0.000186,0.000154,0.000096,...,0.681261,0.680017,0.680084,0.680192,0.680615,0.680930,0.680136,0.680227,0.679984,1
10,1,2,0.000101,0.001035,0.001001,0.000736,0.000356,0.000234,0.000099,0.000098,0.000140,0.000098,...,0.680135,0.680342,0.679829,0.680472,0.681057,0.680634,0.680616,0.680224,0.680036,2
10,1,3,0.000478,0.002222,0.001565,0.000634,0.000343,0.000313,0.000261,0.000199,0.000135,0.000096,...,0.680331,0.680164,0.680400,0.680197,0.680177,0.680187,0.680268,0.680641,0.681036,3
10,1,4,0.000384,0.001675,0.001930,0.001100,0.000564,0.000379,0.000327,0.000239,0.000152,0.000083,...,0.681704,0.681042,0.680627,0.680690,0.680684,0.680954,0.681885,0.681888,0.682665,0
10,1,5,0.000335,0.001552,0.001229,0.000712,0.000556,0.000369,0.000169,0.000108,0.000080,0.000126,...,0.680988,0.680125,0.681325,0.681589,0.681848,0.681047,0.680562,0.680181,0.679801,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,3,20,0.000075,0.000462,0.000521,0.000377,0.000186,0.000138,0.000128,0.000105,0.000066,0.000085,...,0.683585,0.683690,0.683770,0.683633,0.683790,0.684027,0.683810,0.683845,0.683834,0
9,3,21,0.000125,0.000570,0.000366,0.000513,0.000389,0.000254,0.000157,0.000104,0.000094,0.000066,...,0.683630,0.683688,0.683734,0.683707,0.683570,0.683824,0.683602,0.683535,0.683567,2
9,3,22,0.000090,0.000342,0.000350,0.000284,0.000187,0.000151,0.000155,0.000173,0.000142,0.000111,...,0.683840,0.683795,0.683588,0.683668,0.683906,0.683953,0.683855,0.683831,0.683782,0
9,3,23,0.000127,0.000779,0.000857,0.000584,0.000386,0.000325,0.000230,0.000073,0.000042,0.000037,...,0.683671,0.683764,0.683896,0.683653,0.683651,0.683636,0.683633,0.683599,0.683631,1


## Split train e test
### Subject Dependent
16 trials train e 8 test
2 testing group for labels 

In [6]:
df_sd = df.reset_index()
# generiamo 8 video su cui eseguiamo i test
test_videos = np.random.randint(1, 25, size=8)
print(test_videos)
X_testSD = df_sd[df_sd['video'].isin(test_videos)]
print(X_testSD.emotion.unique()) #check se sono rappresentate tutte le emozioni
X_testSD.set_index(['id_user','session','video'], inplace=True)
y_testSD = X_testSD.emotion
X_testSD = X_testSD.drop(['emotion'],axis=1)
X_trainSD = df_sd[~df_sd['video'].isin(test_videos)]
X_trainSD.set_index(['id_user','session','video'], inplace=True)
y_trainSD = X_trainSD.emotion
X_trainSD = X_trainSD.drop(['emotion'],axis=1)

[24 22  6  7 18 15 24 10]
[0 1 2 3]


In [7]:
clfSD = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0).fit(X_trainSD, y_trainSD)

In [8]:
clfSD.score(X_testSD, y_testSD)

0.28888888888888886

### Subject Indipendent
Leave One Out Cross Validation
#### GradientBoostingClassifier

In [6]:
df_indip = df.reset_index()
groups = df_indip['id_user']

In [7]:
groups

0       10
1       10
2       10
3       10
4       10
        ..
1075     9
1076     9
1077     9
1078     9
1079     9
Name: id_user, Length: 1080, dtype: int64

In [8]:
model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)

In [9]:
loo = LeaveOneOut()

In [10]:
scores = cross_val_score(model, df_indip.drop(['emotion','id_user','session','video'],axis=1), df_indip.emotion, cv=loo, verbose=1 , groups = groups, n_jobs = -1)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  7.0min
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed: 34.9min
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed: 83.2min
[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed: 144.8min
[Parallel(n_jobs=-1)]: Done 1080 out of 1080 | elapsed: 196.8min finished


In [13]:
scores
scori = pd.DataFrame(scores)
scori.to_csv("scores.csv")


PermissionError: [Errno 13] Permission denied: 'scores.csv'

In [15]:
import statistics

print(statistics.mean(scores))

0.3287037037037037


### Subject-Biased Experiment
train_test_split

In [28]:
X_trainSB, X_testSB, y_trainSB, y_testSB = train_test_split(df.drop('emotion',axis=1), df.emotion, test_size=0.20, random_state=42)

In [29]:
X_testSB

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0,1,2,3,4,5,6,7,8,9,...,1409,1410,1411,1412,1413,1414,1415,1416,1417,1418
id_user,session,video,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
12,2,16,0.000305,0.001928,0.000870,0.000505,0.000328,0.000328,0.000489,0.000376,0.000193,0.000207,...,0.680079,0.680667,0.680658,0.680106,0.681802,0.682166,0.679579,0.678482,0.682893,0.685609
2,2,21,0.000474,0.001760,0.001627,0.001237,0.000806,0.000365,0.000249,0.000304,0.000246,0.000150,...,0.681692,0.682242,0.681388,0.681752,0.682317,0.681962,0.682208,0.681929,0.681844,0.681459
10,3,12,0.000718,0.003106,0.003392,0.003272,0.001834,0.001151,0.000634,0.000448,0.000262,0.000269,...,0.683742,0.683580,0.683671,0.683832,0.683243,0.682858,0.682967,0.683616,0.684239,0.684053
4,2,13,0.001133,0.005724,0.004970,0.002677,0.001593,0.000809,0.000546,0.000612,0.000344,0.000172,...,0.681066,0.682130,0.682051,0.681252,0.680855,0.680208,0.682095,0.682052,0.681295,0.681579
8,3,20,0.000309,0.001668,0.001472,0.001170,0.000661,0.000462,0.000365,0.000332,0.000229,0.000192,...,0.681727,0.681631,0.680784,0.680844,0.681285,0.680967,0.681324,0.681479,0.681403,0.681408
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15,3,4,0.022472,0.063170,0.026366,0.009884,0.007091,0.003794,0.002043,0.001941,0.001595,0.000799,...,0.681943,0.681975,0.682550,0.682506,0.681908,0.681647,0.682894,0.680831,0.681417,0.681640
3,3,5,0.000778,0.003987,0.000821,0.000059,0.000009,0.000006,0.000002,0.000002,0.000001,0.000001,...,0.683866,0.683932,0.683794,0.683775,0.683942,0.683794,0.683750,0.683597,0.684092,0.684181
11,3,10,0.000301,0.001965,0.001265,0.000396,0.000424,0.000434,0.000127,0.000117,0.000056,0.000076,...,0.684171,0.684283,0.684662,0.684572,0.684762,0.684406,0.684115,0.684002,0.684144,0.683960
12,2,2,0.001520,0.014837,0.011639,0.005375,0.002257,0.001106,0.000762,0.000617,0.000315,0.000217,...,0.682203,0.683904,0.683107,0.682763,0.683646,0.683313,0.682855,0.682270,0.681508,0.681069


#### GradientBoostingClassifier

In [30]:
clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0).fit(X_trainSB, y_trainSB)

In [31]:
clf.score(X_testSB, y_testSB)

0.3425925925925926

#### SVM
neutral, sad, fear, and happy => 0 , 1 , 2 , 3

In [None]:
neutral = svm.SVC()
sad = svm.SVC()
fear = svm.SVC()
happy = svm.SVC()