In [23]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from tensorflow.keras.callbacks import EarlyStopping

In [24]:
data = pd.read_csv('combined_emotion.csv')
data.head()

Unnamed: 0,sentence,emotion
0,i just feel really helpless and heavy hearted,fear
1,ive enjoyed being able to slouch about relax a...,sad
2,i gave up my internship with the dmrg and am f...,fear
3,i dont know i feel so lost,sad
4,i am a kindergarten teacher and i am thoroughl...,fear


In [25]:
data.describe()

Unnamed: 0,sentence,emotion
count,422746,422746
unique,393822,6
top,i feel more adventurous willing to take risks,joy
freq,16,143067


In [26]:
data['emotion'].unique()

array(['fear', 'sad', 'love', 'joy', 'suprise', 'anger'], dtype=object)

In [27]:
data.duplicated().sum()
data = data.drop_duplicates()

In [28]:
texts = data['sentence']
labels = data['emotion']

label_map = {'fear': 0, 'sad': 1, 'love': 2, 'joy': 3, 'suprise': 4, 'anger': 5 }
labels = labels.map(label_map)

In [29]:
max_words = 10000  # Maximum number of words for tokenization
max_len = 1000
embedding_dim = 128  # Dimension of embedding vectors
num_classes = len(label_map) # Number of emotion classes

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(texts)

X = tokenizer.texts_to_sequences(texts)
X = pad_sequences(X, maxlen=max_len)

y = to_categorical(labels, num_classes=num_classes)

In [30]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
class_weights_array = class_weight.compute_class_weight('balanced', classes=np.unique(labels), y=labels)
class_weights_dict = {i: class_weights_array[i] for i in range(len(class_weights_array))}

In [31]:
model = models.Sequential([
    layers.Embedding(input_dim=max_words, output_dim=embedding_dim),
    layers.LSTM(128, return_sequences=True),
    layers.LSTM(64),
    layers.Dense(64, activation='relu'),
    layers.Dense(num_classes, activation='softmax') ])

In [32]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
early_stopping = EarlyStopping( monitor='val_loss', patience=10, restore_best_weights=True  )
model.fit(
    X_train, y_train,
    epochs=200,
    batch_size= 1024,
    class_weight=class_weights_dict,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping]
)

Epoch 1/200
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 634ms/step - accuracy: 0.5462 - loss: 1.1737 - val_accuracy: 0.9118 - val_loss: 0.2219
Epoch 2/200
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 636ms/step - accuracy: 0.9224 - loss: 0.1648 - val_accuracy: 0.9297 - val_loss: 0.1511
Epoch 3/200
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m264s[0m 641ms/step - accuracy: 0.9361 - loss: 0.1213 - val_accuracy: 0.9339 - val_loss: 0.1319
Epoch 4/200
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m264s[0m 646ms/step - accuracy: 0.9410 - loss: 0.1085 - val_accuracy: 0.9339 - val_loss: 0.1331
Epoch 5/200
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 668ms/step - accuracy: 0.9413 - loss: 0.1074 - val_accuracy: 0.9360 - val_loss: 0.1244
Epoch 6/200
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m258s[0m 657ms/step - accuracy: 0.9433 - loss: 0.1003 - val_accuracy: 0.9363 - val_loss: 0.130

<keras.src.callbacks.history.History at 0x7fda2832b130>

In [33]:
sentences = [
    "I am so happy to see this!",
    "This makes me really sad.",
    "I am absolutely furious right now!",
    "I love this so much!",
    "I feel scared about the future."
]

for i in range(len(sentences)):
  input_sequence = tokenizer.texts_to_sequences([sentences[i]])
  input_padded = pad_sequences(input_sequence, maxlen=max_len)
  predicted_probs = model.predict(input_padded)
  predicted_class_idx = np.argmax(predicted_probs, axis=1)[0]
  reverse_label_map = {v: k for k, v in label_map.items()}
  predicted_emotion = reverse_label_map[predicted_class_idx]
  print(f"Predicted Emotion: {predicted_emotion}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 327ms/step
Predicted Emotion: joy
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
Predicted Emotion: sad
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
Predicted Emotion: anger
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
Predicted Emotion: anger
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
Predicted Emotion: fear
