In [None]:
!pip install datasets

In [None]:
#  Imports
import tensorflow as tf
from tensorflow.keras.layers import Dense, LayerNormalization, Dropout, Embedding, MultiHeadAttention
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
from datasets import load_dataset


In [None]:
# Paramètres de base du modèle

embed_dim = 512 # (Dimension de l'embedding)
num_heads = 8 # (nombre de mot important dans un phrase)
ff_dim = 2048 # (dimension du PMC)
maxlen = 200 # (mot total dans une phrase)
vocab_size = 10000 # (taile du vocabulaire)
batch_size = 32 # Batch size

In [None]:
# Chargement du dataset "emotion"
dataset = load_dataset("emotion")

train_texts = dataset['train']['text']
train_labels = dataset['train']['label']
test_texts = dataset['test']['text']
test_labels = dataset['test']['label']


In [None]:
tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(train_texts)

x_train = tokenizer.texts_to_sequences(train_texts)
x_test = tokenizer.texts_to_sequences(test_texts)

x_train = pad_sequences(x_train, maxlen=maxlen, padding='post')
x_test = pad_sequences(x_test, maxlen=maxlen, padding='post')

y_train = np.array(train_labels)
y_test = np.array(test_labels)


In [None]:
# TransformerBlock (bloc d'encodage)
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="relu"),
            Dense(embed_dim),
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training=False):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)


In [None]:
# Modèle Transformer complet
class Transformer(tf.keras.Model):
    def __init__(self, vocab_size, embed_dim, num_heads, ff_dim, rate=0.1):
        super(Transformer, self).__init__()
        self.embedding = Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = Embedding(input_dim=maxlen, output_dim=embed_dim)
        self.trans_block = TransformerBlock(embed_dim, num_heads, ff_dim, rate)
        self.final_layer = Dense(6, activation='softmax')

    def call(self, inputs, training=False):
        x = self.embedding(inputs)
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = x + positions
        x = self.trans_block(x, training=training)
        x = x[:, -1, :]
        return self.final_layer(x)


In [None]:
 # Compilation et entraînement
model = Transformer(vocab_size, embed_dim, num_heads, ff_dim)
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

model.fit(x_train, y_train, batch_size=batch_size, epochs=3, validation_data=(x_test, y_test))


In [None]:
model.save("mon_modele_transformer.h5")


In [None]:
# from tensorflow.keras.models import load_model

# # Charger le modèle .h5
# model = load_model("D:/Projet_Deep_Learning/emotion_transformer_project/Models/mon_modele_transformer.h5")

In [None]:
# Fonction pour prédire une émotion
def predict_emotion(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(sequence, maxlen=maxlen, padding='post')
    prediction = model.predict(padded)
    predicted_label = np.argmax(prediction)
    label_names = dataset['train'].features['label'].names
    print("Texte :", text)
    print("Émotion prédite :", label_names[predicted_label])


In [None]:
# Test de prédiction
predict_emotion("I feel so happy and excited today!")
predict_emotion("I'm scared and I don't know what to do.")


In [None]:
predict_emotion("What a beautiful day, I can't stop smiling!")
predict_emotion("I just got promoted, I'm so proud of myself.")


In [None]:
predict_emotion("I feel like crying, everything is falling apart.")
predict_emotion("Nothing brings me joy anymore, I feel empty.")


In [None]:
predict_emotion("i hate those fuckers")

In [None]:
import collections
print(collections.Counter(y_train))