In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

2023-05-12 15:21:04.663798: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
df = pd.read_csv("emotion_dataset_2.csv")

In [3]:
df

Unnamed: 0.1,Unnamed: 0,Emotion,Text,Clean_Text
0,0,neutral,Why ?,
1,1,joy,Sage Act upgrade on my to do list for tommorow.,Sage Act upgrade list tommorow
2,2,sadness,ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...,WAY HOMEGIRL BABY FUNERAL MAN HATE FUNERALS SH...
3,3,joy,Such an eye ! The true hazel eye-and so brill...,eye true hazel eyeand brilliant Regular feat...
4,4,joy,@Iluvmiasantos ugh babe.. hugggzzz for u .! b...,ugh babe hugggzzz u babe naamazed nga ako e...
...,...,...,...,...
34787,34787,surprise,@MichelGW have you gift! Hope you like it! It'...,gift Hope like it hand wear Itll warm Lol
34788,34788,joy,The world didnt give it to me..so the world MO...,world didnt meso world DEFINITELY cnt away
34789,34789,anger,A man robbed me today .,man robbed today
34790,34790,fear,"Youu call it JEALOUSY, I call it of #Losing YO...",Youu JEALOUSY #Losing YOU


In [4]:
train_texts, test_texts, train_labels, test_labels = train_test_split(df["Text"], df["Emotion"], test_size=0.2)

In [5]:
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(train_texts)

train_sequences = tokenizer.texts_to_sequences(train_texts)
test_sequences = tokenizer.texts_to_sequences(test_texts)

In [6]:
maxlen = 50
train_padded = pad_sequences(train_sequences, maxlen=maxlen, padding='post', truncating='post')
test_padded = pad_sequences(test_sequences, maxlen=maxlen, padding='post', truncating='post')

In [7]:
df['Emotion'].unique()

array(['neutral', 'joy', 'sadness', 'fear', 'surprise', 'anger', 'shame',
       'disgust'], dtype=object)

In [8]:
label_dict = {"neutral": 0, "joy": 1, "sadness": 2, "anger": 5, "fear": 3,"surprise":4,"shame":6,"disgust":7}
num_classes = len(label_dict)
train_labels = train_labels.map(label_dict)
test_labels = test_labels.map(label_dict)
train_labels = tf.keras.utils.to_categorical(train_labels, num_classes)
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes)

In [9]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=5000, output_dim=50, input_length=maxlen),
    tf.keras.layers.GRU(units=64, dropout=0.2, recurrent_dropout=0.2),
    tf.keras.layers.Dense(units=num_classes, activation='softmax')
])

In [10]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(train_padded, train_labels, epochs=10, batch_size=32, validation_data=(test_padded, test_labels))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x13dfc9e50>

In [12]:
test_loss, test_accuracy = model.evaluate(test_padded, test_labels)
print("Test loss:", test_loss)
print("Test accuracy:", test_accuracy)

from sklearn.metrics import classification_report
test_predictions = model.predict(test_padded)
test_pred_labels = np.argmax(test_predictions, axis=1)
test_true_labels = np.argmax(test_labels, axis=1)
label_names = list(label_dict.keys())
print(classification_report(test_true_labels, test_pred_labels, target_names=label_names))

Test loss: 1.191728115081787
Test accuracy: 0.6335680484771729
              precision    recall  f1-score   support

     neutral       0.72      0.70      0.71       469
         joy       0.67      0.75      0.71      2184
     sadness       0.58      0.58      0.58      1353
       anger       0.67      0.67      0.67      1083
        fear       0.51      0.45      0.47       780
    surprise       0.62      0.59      0.61       884
       shame       0.93      0.86      0.89        29
     disgust       0.62      0.19      0.29       177

    accuracy                           0.63      6959
   macro avg       0.66      0.60      0.62      6959
weighted avg       0.63      0.63      0.63      6959

