<a href="https://colab.research.google.com/github/Phantom9029/emotiondetection/blob/main/dl_emotional.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# 🔹 Load dataset
df = pd.read_csv("goemotions_subset.csv")

# 🔹 Reduce labels to 6 categories (Grouping emotions)
emotion_map = {
    "joy": "joy", "happiness": "joy", "contentment": "joy",
    "anger": "anger", "annoyance": "anger", "rage": "anger",
    "sadness": "sadness", "grief": "sadness",
    "fear": "fear", "nervousness": "fear",
    "surprise": "surprise", "shock": "surprise",
    "neutral": "neutral"
}
df["label"] = df["label"].map(emotion_map).fillna("neutral")  # Map emotions

# 🔹 Encode labels
label_encoder = LabelEncoder()
df["label"] = label_encoder.fit_transform(df["label"])

# 🔹 Split dataset
X_train, X_test, y_train, y_test = train_test_split(df["text"], df["label"], test_size=0.2, random_state=42)

# 🔹 Tokenization
vocab_size = 20000  # Increased vocab size
tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# 🔹 Padding sequences
max_length = 100  # Increased length to capture more context
X_train_pad = pad_sequences(X_train_seq, maxlen=max_length, padding="post")
X_test_pad = pad_sequences(X_test_seq, maxlen=max_length, padding="post")

# 🔹 Build Optimized LSTM Model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=256, input_length=max_length),
    LSTM(128, return_sequences=True),
    LSTM(64),
    Dropout(0.5),
    Dense(64, activation="relu"),
    Dense(len(label_encoder.classes_), activation="softmax")
])

# 🔹 Compile Model
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# 🔹 Train Model
model.fit(X_train_pad, y_train, epochs=20, batch_size=64, validation_data=(X_test_pad, y_test))





Epoch 1/20




[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 507ms/step - accuracy: 0.6918 - loss: 1.1873 - val_accuracy: 0.7264 - val_loss: 1.0320
Epoch 2/20
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 513ms/step - accuracy: 0.7175 - loss: 1.0535 - val_accuracy: 0.7264 - val_loss: 1.0395
Epoch 3/20
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 510ms/step - accuracy: 0.7095 - loss: 1.0705 - val_accuracy: 0.7264 - val_loss: 1.0262
Epoch 4/20
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 501ms/step - accuracy: 0.7170 - loss: 1.0557 - val_accuracy: 0.7264 - val_loss: 1.0340
Epoch 5/20
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 509ms/step - accuracy: 0.7063 - loss: 1.0760 - val_accuracy: 0.7264 - val_loss: 1.0511
Epoch 6/20
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 516ms/step - accuracy: 0.7136 - loss: 1.0566 - val_accuracy: 0.7264 - val_loss: 1.0407
Epoch 7/20
[1m130/13



✅ Accuracy: 72.64%


In [7]:
# 🔹 Evaluate Model
loss, acc = model.evaluate(X_test_pad, y_test)
print(f"✅ Accuracy: {acc * 100:.2f}%")

[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 96ms/step - accuracy: 0.7266 - loss: 1.0506
✅ Accuracy: 72.64%
