# Test NSEG dataset

In [1]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.model_selection import cross_val_score
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv("./dataset_eeg_des.csv")
df.drop(['Unnamed: 0'],axis=1, inplace=True)
df = df.set_index(['id_user','session','video'])

In [3]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0,1,2,3,4,5,6,7,8,9,...,492,493,494,495,496,497,498,499,500,emotion
id_user,session,video,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
10,1,1,2.135436,2.887686,3.465504,3.810436,3.955764,4.053275,3.646478,3.300221,3.606062,3.223825,...,-1.011859,-1.020659,-0.602725,-0.584323,-0.602491,-0.579400,-1.200410,-0.910379,-1.369580,1
10,1,2,2.549241,2.254038,3.489210,3.876865,3.767604,3.407996,3.706311,3.790671,3.835916,3.726078,...,-0.654046,-0.287855,-0.064083,-0.382718,-0.854156,-0.538963,-0.978321,-0.520367,-1.108300,2
10,1,3,2.436902,3.456774,4.163457,4.401829,4.272075,3.567253,3.214838,3.632954,3.966298,3.774067,...,-0.102593,-0.080281,-0.138853,0.001005,-0.083322,-0.067288,-0.063967,0.042125,-0.123110,3
10,1,4,1.841611,2.513253,3.124816,3.421180,3.492676,3.306529,3.099885,3.100102,3.221046,3.320871,...,-0.990079,-0.927056,-0.745584,-0.978420,-1.031638,-0.682465,-0.665910,-1.033460,-1.373817,0
10,1,5,2.320207,2.961560,3.857012,4.111162,3.868319,3.717754,3.698447,3.367067,3.021535,3.351307,...,-0.247413,-0.758625,-1.643860,-1.252678,-0.605932,-0.750933,-1.202377,-0.686867,-1.160245,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,3,20,1.398441,2.604782,3.687751,3.648057,3.009953,3.942856,3.935395,2.950334,3.604765,3.776026,...,-0.389243,-0.347923,-0.460666,-0.483391,-0.797307,-0.593601,-0.401147,-0.498099,-0.751132,0
9,3,21,1.571485,1.894192,2.786308,3.305627,3.571761,3.696536,3.393062,2.770488,2.746998,2.790538,...,-0.804578,-1.045797,-1.104529,-0.949304,-0.719607,-0.668664,-0.650504,-0.838495,-1.361561,2
9,3,22,1.270886,2.200624,2.915375,3.471395,3.427353,2.811443,3.023582,2.642579,3.219103,3.161054,...,-0.531922,-0.662631,-0.867511,-1.125926,-0.903615,-1.054831,-0.916811,-1.142544,-1.251611,0
9,3,23,2.621798,3.373337,3.905828,3.839850,3.868095,3.649109,3.723545,3.259310,3.605300,3.623675,...,-0.544234,-0.343033,-0.551949,-0.976702,-1.112032,-0.834240,-0.739685,-0.623760,-1.128035,1


In [4]:
def preprocessing_for_LSTM(X_train, X_test, y_train, y_test):
    X_train = np.array(X_train).reshape((X_train.shape[0], X_train.shape[1], 1))
    y_train = to_categorical(y_train)
    X_test = np.array(X_test).reshape((X_test.shape[0], X_test.shape[1], 1))
    y_test = to_categorical(y_test)
    return X_train, X_test, y_train, y_test

# Subject dependent

In [5]:
def subject_dependent_split(df,session):
    df = df.reset_index()
    np.random.seed(75)
    test_videos = np.random.choice(np.arange(1, 25), replace=False, size=(8))
    df_sess = df.loc[df['session'] == session]
    X_test = df_sess[df_sess['video'].isin(test_videos)].set_index(['id_user','session','video']).drop('emotion',axis=1)
    y_test = df_sess[df_sess['video'].isin(test_videos)].set_index(['id_user','session','video']).emotion
    X_train = df_sess[~df_sess['video'].isin(test_videos)].set_index(['id_user','session','video']).drop('emotion',axis=1)
    y_train = df_sess[~df_sess['video'].isin(test_videos)].set_index(['id_user','session','video']).emotion
    return X_train, X_test, y_train, y_test

In [7]:
accuracy_test = []
for i in range(1,4):
    X_trainSD, X_testSD, y_trainSD, y_testSD = subject_dependent_split(df,i)
    X_trainSD, X_testSD, y_trainSD, y_testSD = preprocessing_for_LSTM(X_trainSD, X_testSD, y_trainSD, y_testSD)
    model = Sequential()
    model.add(LSTM(units=64, input_shape=(X_trainSD.shape[1], 1)))
    model.add(Dense(units=4, activation='relu'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.fit(X_trainSD, y_trainSD, epochs=10, batch_size=32)
    loss, accuracy = model.evaluate(X_testSD, y_testSD)
    accuracy_test.append(accuracy)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [9]:
print("ACCURACY: ", np.array(accuracy_test).mean())

ACCURACY:  0.25555556019147235
LOSS:  nan


# Subject-Biased Experiment


In [10]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('emotion',axis=1), df.emotion, test_size=0.20, random_state=22)
X_train, X_test, y_train, y_test = preprocessing_for_LSTM(X_train, X_test, y_train, y_test)

In [11]:
del model
model = Sequential()
model.add(LSTM(units=64, input_shape=(X_train.shape[1], 1)))
model.add(Dense(units=4, activation='relu'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x149d2a0c610>

In [12]:
loss, accuracy = model.evaluate(X_test, y_test)



# Subject Indipendent

In [None]:
def subject_independent_split(df,session):
    df = df.reset_index()
    df_sess = df.loc[df['session'] == session]
    groups = df_sess['id_user']
    X = df_sess.set_index(['id_user','session','video']).drop('emotion',axis=1)
    y = df_sess.set_index(['id_user','session','video']).emotion
    return X, y, groups

# TO DO: da implementare kfold
https://stackoverflow.com/questions/48085182/cross-validation-in-keras


"""
from sklearn.model_selection import RepeatedKFold, cross_val_score
from tensorflow.keras.models import * 
from tensorflow.keras.layers import * 
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

def buildmodel():
    model= Sequential([
        Dense(10, activation="relu"),
        Dense(5, activation="relu"),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    return(model)

estimator= KerasRegressor(build_fn=buildmodel, epochs=100, batch_size=10, verbose=0)
kfold= RepeatedKFold(n_splits=5, n_repeats=100)
results= cross_val_score(estimator, x, y, cv=kfold, n_jobs=2)  # 2 cpus
results.mean()  # Mean MSE
"""

In [None]:
scores_test = []
logo = LeaveOneGroupOut()
for i in range(1,4):
    X_SI,y_SI,groups = subject_independent_split(df,i)
    X_SI = np.array(X_SI).reshape((X_SI.shape[0], X_SI.shape[1], 1))
    y_SI = to_categorical(y_SI)
    del model
    model = Sequential()
    model.add(LSTM(units=64, input_shape=(int(X_SI.shape[1])-15, 1)))
    model.add(Dense(units=4, activation='relu'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    scores_gbc = cross_val_score(model, X_SI, y_SI, cv=logo, verbose=1 , groups = groups, n_jobs = -1)
    scores_test.append(rfSD.scores.mean())

In [None]:
print("TEST:", np.array(scores_test).mean())