In [224]:
import pandas as pd
from tensorflow import keras
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [225]:
dfe = pd.read_csv("../Data/Data_cleaned/emotion_cleaned_rudy.csv")[["target", "clean_text"]].dropna()

In [226]:
encoder = LabelEncoder()
dfe["cible"] = encoder.fit_transform(dfe["target"])

In [227]:
liste = dfe["target"].unique()

## Spliting des données

In [228]:
X_train, X_test, y_train, y_test = train_test_split(dfe["clean_text"], dfe["cible"], train_size=0.8, random_state=1, stratify=dfe["target"])
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, train_size=0.5, stratify=y_train)

## Vectorisation

In [229]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, HashingVectorizer

In [230]:
vectorizer =CountVectorizer()
vectorizer.fit(X_train)

CountVectorizer()

In [231]:
X_train_t = vectorizer.transform(X_train)
X_test_t = vectorizer.transform(X_test)
X_val_t = vectorizer.transform(X_val)

## Neural Network

In [232]:
model = keras.Sequential()

initializer = keras.initializers.HeNormal()
regularizer = keras.regularizers.L2(0.005)

model.add(keras.layers.Dense(64, input_dim=X_train_t.shape[1], activation="relu",
                             kernel_initializer=initializer,kernel_regularizer=regularizer))
model.add(keras.layers.AlphaDropout(20))
model.add(keras.layers.Dense(32, input_dim=X_train_t.shape[1],kernel_regularizer=regularizer, activation="relu"))
model.add(keras.layers.AlphaDropout(20))
model.add(keras.layers.Dense(6, activation="softmax"))

In [233]:
model.compile(
    loss= keras.losses.SparseCategoricalCrossentropy(),
    optimizer= keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"]
)

In [234]:
history = model.fit(
    X_train_t.toarray(),
    y_train,
    epochs=100,
    callbacks=keras.callbacks.EarlyStopping(patience=3, monitor="val_loss"),
    batch_size=8,
    validation_data=(X_val_t.toarray(), y_val)
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100


In [235]:
from sklearn.metrics import f1_score
import numpy as np

In [236]:
print(47*'-'+"weighted"+47*'-')
print("Train f1_score:", f1_score(np.argmax(model.predict(X_train_t), axis=1), y_train, average="weighted"))
print("Val f1_score:", f1_score(np.argmax(model.predict(X_val_t), axis=1), y_val, average="weighted"))
print(47*'-'+"macro"+47*'-')
print("Train f1_score:", f1_score(np.argmax(model.predict(X_train_t), axis=1), y_train, average="macro"))
print("Val f1_score:", f1_score(np.argmax(model.predict(X_val_t), axis=1), y_val, average="macro"))
print(47*'-'+"micro"+47*'-')
print("Train f1_score:", f1_score(np.argmax(model.predict(X_train_t), axis=1), y_train, average="micro"))
print("Val f1_score:", f1_score(np.argmax(model.predict(X_val_t), axis=1), y_val, average="micro"))

-----------------------------------------------weighted-----------------------------------------------
Train f1_score: 0.9811063143302133
Val f1_score: 0.8491216190389638
-----------------------------------------------macro-----------------------------------------------
Train f1_score: 0.9743873574500634
Val f1_score: 0.8090466434290912
-----------------------------------------------micro-----------------------------------------------
Train f1_score: 0.9811232812864135
Val f1_score: 0.8487532043812631


## F1 score weighted par classe

In [237]:
data_score = pd.DataFrame()
data_score["y_pred"] = np.argmax(model.predict(X_val_t),axis=1)
data_score["y_true"] = y_val.reset_index()["cible"]

In [238]:
def f1_score_classe(df,classe):
    df_score = df[df.y_true==classe]
    return f1_score(df_score["y_pred"],df_score["y_true"], average="weighted")

In [239]:
for i in range(6):
    print(f"Classe {liste[i]} : {f1_score_classe(data_score,i)}")

Classe sadness : 0.7497544422243098
Classe anger : 0.7533854057973406
Classe love : 0.8547412460536069
Classe surprise : 0.6331385881305125
Classe fear : 0.8002884725481115
Classe happy : 0.5079801654196117


In [240]:
predictions_test = encoder.inverse_transform(y_val)

In [241]:
predictions_test

array(['sadness', 'happy', 'fear', ..., 'sadness', 'happy', 'happy'],
      dtype=object)

In [242]:
y_val

9800     4
4153     2
1117     1
16054    4
19678    2
        ..
3766     0
586      0
2821     4
15910    2
3402     2
Name: cible, Length: 8582, dtype: int32