# How to use this notebook

1. Change the datapath and load the data (For the scatter plot we are working with the data with 48 brain regions and a threshold of 0.5, the full set of data is available in the Drive in the Folder Split_48_Neutre, be careful to use these data, there was a mistake in previous ones)


2. Run the preprocessing (Both steps)


3. Train one of the models, skip the others


4. Run the two cells of the section Analysis of Prediction, they will return a table with all accuracies per emotion

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics

## Load the data

In [35]:
#--------data paths------------------------------------------------------------------------------------------
data_train_path = 'C:/Users/manon/Desktop/Projet_2 _ML/code/datas/Split_48_neutre/Combined_All_Train_80.csv'
data_test_path = 'C:/Users/manon/Desktop/Projet_2 _ML/code/datas/Split_48_neutre/Combined_All_Test_80.csv'

#--------loading the data and spliting between features and predictions--------------------------------------
tx_train = pd.read_csv(data_train_path,sep=",",squeeze=True)
X_train=tx_train.iloc[:, :48]
ytr = pd.read_csv(data_train_path,sep=",",usecols=[48],squeeze=True)

tx_test = pd.read_csv(data_test_path,sep=",",squeeze=True)
X_test=tx_test.iloc[:, :48]
yte = pd.read_csv(data_test_path,sep=",",usecols=[48],squeeze=True)

#---------Adding features name (i.e. number of the brain region represented by the feature)------------------
def add_column_names(data):
    
    liste=[]
    for i in range(48):
        liste.append(str(i+1))
    data.columns=liste
    return data,liste

X_train,liste = add_column_names(X_train)
X_test,liste = add_column_names(X_test)

#---------Convert y to int to be compatible with future prediction--------------------------------------------
ytr = ytr.astype(np.int64)
yte = yte.astype(np.int64)

# Preprocessing
###  1. Data standardization

In [36]:
def standardize(x):
    """Standardize the original data set."""
    mean_x = np.mean(x)
    x = x - mean_x
    std_x = np.std(x)
    x = x / std_x
    return x

In [37]:
Xtrain=standardize(X_train)
X_test=standardize(X_test)

### 2. Feature Scaling

In [5]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Training
### 1. Random Forest

In [38]:
classifier = RandomForestClassifier(n_estimators = 500, criterion = 'gini', max_depth=9,random_state = 42)
classifier.fit(X_train, ytr)
Y_pred = classifier.predict(X_test)
print("Accuracy:",metrics.accuracy_score(yte, Y_pred))

Accuracy: 0.330603889457523


### 2. Decision Tree Classifier

In [12]:
from sklearn.tree import DecisionTreeClassifier
dtree_model = DecisionTreeClassifier(max_depth = 4, criterion='gini').fit(X_train, ytr)
dtree_predictions = dtree_model.predict(X_test)
print("Accuracy:",metrics.accuracy_score(yte, dtree_predictions))

Accuracy: 0.283179802115319


### 3. SVM

In [14]:
from sklearn.svm import SVC
svm_model_linear = SVC(kernel = 'linear', C = 0.025).fit(X_train, ytr) #c=0.02 for linear,0.33 for poly, 0.01 for sigmoid
svm_predictions = svm_model_linear.predict(X_test)
accuracy = svm_model_linear.score(X_test, yte)
print("Accuracy:",accuracy)

Accuracy: 0.2811327192084613


### 4. KNN

In [16]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 873, weights='distance').fit(X_train, ytr)
accuracy = knn.score(X_test, yte)
print("Accuracy:",accuracy)

Accuracy: 0.2739679290344592


### 5. Logistic Regression (if we decide to keep it)

In [18]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(solver = 'saga',multi_class='auto')
logreg.fit(X_train,ytr)
accuracy = logreg.score(X_test, yte)
print("Accuracy:",accuracy)

Accuracy: 0.2517911975435005




### 6. Convolutional NN

In [28]:
X_train=tf.expand_dims(X_train, axis=-1)
X_test=tf.expand_dims(X_test, axis=-1)
y_train = tf.keras.utils.to_categorical(ytr,num_classes=14)
y_test = tf.keras.utils.to_categorical(yte,num_classes=14)

n_samples, n_features = X_train.shape[0], X_train.shape[1]
n_outputs=14

In [32]:
from tensorflow.keras.layers import Conv1D, Dropout, MaxPooling1D, Flatten, Dense

model = tf.keras.Sequential()
model.add(Conv1D(filters=12, kernel_size=5, activation='tanh',kernel_initializer='he_uniform',input_shape=(n_features, 1)))
model.add(Conv1D(filters=64, kernel_size=5, activation='tanh',kernel_initializer='he_uniform'))
model.add(Dropout(0.4))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(60, input_dim=199, activation='tanh'))
model.add(Dense(14, activation='softmax'))
    
    # Compile model
optimizer = tf.keras.optimizers.Nadam(lr=0.01)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [33]:
# fit and evaluate a model
def evaluate_model(trainX, trainy, testX, testy, model):
    epochs, batch_size = 10, 1
    n_samples, n_features = X_train.shape[0], X_train.shape[1]
    n_outputs=14
    # fit network
    model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size)
    # evaluate model
    _, accuracy = model.evaluate(testX, testy, batch_size=batch_size)
    return accuracy

In [34]:
evaluate_model(X_train, y_train, X_test, y_test, model)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


0.26680314540863037

### 7. Fully Connected NN

In [42]:
X_train=tf.expand_dims(X_train, axis=-1)
X_test=tf.expand_dims(X_test, axis=-1)
y_train = tf.keras.utils.to_categorical(ytr,num_classes=14)
y_test = tf.keras.utils.to_categorical(yte,num_classes=14)

n_samples, n_features = X_train.shape[0], X_train.shape[1]
n_outputs=14

In [43]:
from tensorflow.keras.layers import Conv1D, Dropout, MaxPooling1D, Flatten, Dense

model = tf.keras.Sequential()
model.add(Dense(6, input_dim=48, activation='relu',kernel_initializer='zero'))
model.add(Dense(14, activation='softmax'))
    
# Compile model
optimizer = tf.keras.optimizers.Adagrad(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [44]:
# fit and evaluate a model
def evaluate_model(trainX, trainy, testX, testy, model):
    epochs, batch_size = 10, 100
    n_samples, n_features = X_train.shape[0], X_train.shape[1]
    n_outputs=14
    # fit network
    model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size)
    # evaluate model
    _, accuracy = model.evaluate(testX, testy, batch_size=batch_size)
    return accuracy

In [45]:
evaluate_model(X_train, y_train, X_test, y_test, model)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


0.26475605368614197

# Analysis of predictions

In [43]:
Yte = np.expand_dims(yte, axis=1)
#Y_pred = np.expand_dims(Y_pred, axis=1)
Yte=pd.DataFrame(Yte,columns = ['Emotions'])
Y_pred=pd.DataFrame(Y_pred,columns = ['Emotions'])

dict = pd.DataFrame({0:'Anger',1:'Sad',2:'Guilt',3:'Shame',4:'Disgust',5:'Anxiety',6:'Fear',7:'Surprise',8:'Contempt',9:'Satisfaction',
            10:'WarmHeart.',11:'Happiness',12:'Love',13:'Neutral'}, index=[0])

Yte=Yte.replace({"Emotions": dict})
Y_pred=Y_pred.replace({"Emotions": dict})


In [44]:
from sklearn import metrics
print(metrics.classification_report(Yte, Y_pred))

              precision    recall  f1-score   support

       Anger       0.41      0.25      0.31        60
     Anxiety       0.32      0.05      0.09       148
    Contempt       0.66      0.09      0.15       240
     Disgust       0.56      0.20      0.29       192
        Fear       0.23      0.04      0.08       156
   Happiness       0.32      0.79      0.46       776
        Love       0.21      0.05      0.08       104
     Neutral       0.44      0.37      0.40       208
         Sad       0.38      0.24      0.29       168
Satisfaction       0.21      0.15      0.17       315
       Shame       0.32      0.27      0.30       304
    Surprise       0.41      0.19      0.26        72
  WarmHeart.       0.12      0.01      0.01       188

    accuracy                           0.33      2931
   macro avg       0.35      0.21      0.22      2931
weighted avg       0.35      0.33      0.27      2931

