In [119]:
import pandas as pd
from tensorflow import keras
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [120]:
dfe = pd.read_csv("../Data/Data_cleaned/emotion_cleaned_rudy.csv")[["target", "clean_text"]].dropna()

In [121]:
encoder = LabelEncoder()
dfe["cible"] = encoder.fit_transform(dfe["target"])

In [122]:
liste = dfe["target"].unique()

## Spliting des données

In [123]:
X_train, X_test, y_train, y_test = train_test_split(dfe["clean_text"], dfe["cible"], train_size=0.8, random_state=1, stratify=dfe["target"])
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, train_size=0.5, stratify=y_train)

## Vectorisation

In [124]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, HashingVectorizer

In [125]:
vectorizer =CountVectorizer()
vectorizer.fit(X_train)

CountVectorizer()

In [126]:
X_train_t = vectorizer.transform(X_train).toarray()
X_test_t = vectorizer.transform(X_test).toarray()
X_val_t = vectorizer.transform(X_val).toarray()

## Neural Network

In [127]:
model = keras.Sequential()

initializer = keras.initializers.HeNormal()
regularizer = keras.regularizers.L2(0.005)

model.add(keras.layers.Dense(128, input_dim=X_train_t.shape[1], activation="relu",
                             kernel_initializer=initializer,kernel_regularizer=regularizer))
model.add(keras.layers.Dense(64, input_dim=X_train_t.shape[1],kernel_regularizer=regularizer, activation="relu"))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(6, activation="softmax"))

In [128]:
model.compile(
    loss= keras.losses.SparseCategoricalCrossentropy(),
    optimizer= keras.optimizers.Adam(learning_rate=0.0001),
    metrics=["accuracy"]
)

In [129]:
history = model.fit(
    X_train_t,
    y_train,
    epochs=100,
    callbacks=keras.callbacks.EarlyStopping(patience=3, monitor="val_loss"),
    batch_size=8,
    validation_data=(X_val_t, y_val)
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100


In [130]:
from sklearn.metrics import f1_score
import numpy as np

In [135]:
print(45*'-'+"weighted"+46*'-')
print("Train f1_score:", f1_score(np.argmax(model.predict(X_train_t), axis=1), y_train, average="weighted"))
print("Val f1_score:", f1_score(np.argmax(model.predict(X_val_t), axis=1), y_val, average="weighted"))
print(47*'-'+"macro"+47*'-')
print("Train f1_score:", f1_score(np.argmax(model.predict(X_train_t), axis=1), y_train, average="macro"))
print("Val f1_score:", f1_score(np.argmax(model.predict(X_val_t), axis=1), y_val, average="macro"))
print(47*'-'+"micro"+47*'-')
print("Train f1_score:", f1_score(np.argmax(model.predict(X_train_t), axis=1), y_train, average="micro"))
print("Val f1_score:", f1_score(np.argmax(model.predict(X_val_t), axis=1), y_val, average="micro"))

---------------------------------------------weighted----------------------------------------------
Train f1_score: 0.9923186928369349
Val f1_score: 0.8764107022133538
-----------------------------------------------macro-----------------------------------------------
Train f1_score: 0.9893361799452013
Val f1_score: 0.8292262811961892
-----------------------------------------------micro-----------------------------------------------
Train f1_score: 0.9923094849685388
Val f1_score: 0.8749708692612445


## F1 score weighted par classe

In [136]:
data_score = pd.DataFrame()
data_score["y_pred"] = np.argmax(model.predict(X_val_t),axis=1)
data_score["y_true"] = y_val.reset_index()["cible"]

In [137]:
def f1_score_classe(df,classe):
    df_score = df[df.y_true==classe]
    return f1_score(df_score["y_pred"],df_score["y_true"], average="weighted")

In [138]:
for i in range(6):
    print(f"Classe {liste[i]} : {f1_score_classe(data_score,i)}")

Classe sadness : 0.7403957483008866
Classe anger : 0.7732853908514686
Classe love : 0.9003156920208354
Classe surprise : 0.6433697306597842
Classe fear : 0.8772956561205646
Classe happy : 0.4759086516555761


## TextVectorization


In [182]:
max_features = 20000
embedding_dim = 256
sequence_length = 100

In [183]:
from keras.layers import TextVectorization

In [184]:
vectorize_layer = TextVectorization(
    max_tokens=max_features,
    output_mode="int",
    output_sequence_length=sequence_length,
)

In [185]:
vectorize_layer.adapt(X_train)

In [186]:
def vectorize_text(text):
    text = tf.expand_dims(text, -1)
    return vectorize_layer(text)

In [187]:
train_ds = X_train.map(vectorize_text)
val_ds = X_val.map(vectorize_text)
test_ds = X_test.map(vectorize_text)

KeyboardInterrupt: 

In [None]:
for item in train_ds:
    print(item)

tf.Tensor(
[[ 252  127    8   22 1143   70    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0]], shape=(1, 100), dtype=int64)
tf.Tensor(
[[ 1697   923   174    81  6705   247 10843  2056   348  4539  8196  5037
  11049   551  4625     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     

In [206]:
text_input = tf.keras.Input(shape=(1,), dtype=tf.string, name='text')

inputs = vectorize_layer(text_input)
x = keras.layers.Embedding(max_features, embedding_dim)(inputs)

x = keras.layers.Conv1D(128, 7, padding="same", activation="relu", strides=3)(x)
x = keras.layers.Conv1D(128, 7, padding="same", activation="relu", strides=3)(x)

# x = keras.layers.BatchNormalization()(x)
x = keras.layers.Dense(64, activation="relu")(x)
# x = keras.layers.BatchNormalization()(x)

outputs = keras.layers.Dense(5, activation="softmax")(x)

model = tf.keras.Model(inputs, outputs)

In [207]:
model.compile(
    loss= keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"]
)

In [208]:
model.fit(
    X_train,
    y_train,
    epochs=50,
    batch_size=8,
    callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", patience=3)]
)

Epoch 1/50


UnimplementedError: Graph execution error:

Detected at node 'model_11/Cast' defined at (most recent call last):
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
      app.start()
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
      self.io_loop.start()
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\asyncio\base_events.py", line 596, in run_forever
      self._run_once()
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\asyncio\base_events.py", line 1890, in _run_once
      handle._run()
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\ipykernel\kernelbase.py", line 457, in dispatch_queue
      await self.process_one()
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\ipykernel\kernelbase.py", line 446, in process_one
      await dispatch(*args)
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\ipykernel\kernelbase.py", line 353, in dispatch_shell
      await result
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\ipykernel\kernelbase.py", line 648, in execute_request
      reply_content = await reply_content
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\ipykernel\ipkernel.py", line 353, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
      return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\IPython\core\interactiveshell.py", line 2914, in run_cell
      result = self._run_cell(
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\IPython\core\interactiveshell.py", line 2960, in _run_cell
      return runner(coro)
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\IPython\core\async_helpers.py", line 78, in _pseudo_sync_runner
      coro.send(None)
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\IPython\core\interactiveshell.py", line 3185, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\APPREN~1\AppData\Local\Temp/ipykernel_8100/3745915319.py", line 1, in <module>
      model.fit(
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\keras\engine\training.py", line 1384, in fit
      tmp_logs = self.train_function(iterator)
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\keras\engine\training.py", line 1021, in train_function
      return step_function(self, iterator)
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\keras\engine\training.py", line 1010, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\keras\engine\training.py", line 1000, in run_step
      outputs = model.train_step(data)
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\keras\engine\training.py", line 859, in train_step
      y_pred = self(x, training=True)
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\keras\engine\base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\keras\engine\functional.py", line 451, in call
      return self._run_internal_graph(
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\keras\engine\functional.py", line 571, in _run_internal_graph
      y = self._conform_to_reference_input(y, ref_input=x)
    File "c:\Users\Apprenant\anaconda3\envs\rb_ds\lib\site-packages\keras\engine\functional.py", line 671, in _conform_to_reference_input
      tensor = tf.cast(tensor, dtype=ref_input.dtype)
Node: 'model_11/Cast'
Cast string to int64 is not supported
	 [[{{node model_11/Cast}}]] [Op:__inference_train_function_5537225]