In [20]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split


In [21]:
from gensim.models import Word2Vec

# Load Gensim Word2Vec model correctly
w2v_model = Word2Vec.load("D:/Projects/Emotion Detection/model/word2vec_model.bin")


In [22]:
## Load the datatset
df=pd.read_csv('dataset\combined_emotion.csv')

# Extract text and labels
texts = df["sentence"].values
labels = df["emotion"].values

# Encode labels into integers
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)  # Convert labels to numerical values

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

In [42]:
labels

array([1, 4, 1, ..., 2, 1, 0])

In [49]:
label_encoder.inverse_transform([4])

array(['sad'], dtype=object)

In [31]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 422746 entries, 0 to 422745
Data columns (total 2 columns):
 #   Column    Non-Null Count   Dtype 
---  ------    --------------   ----- 
 0   sentence  422746 non-null  object
 1   emotion   422746 non-null  object
dtypes: object(2)
memory usage: 6.5+ MB


In [23]:
# Tokenization
max_words = 10000  # Vocabulary size
max_len = 100      # Maximum sequence length

tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

# Convert text to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# Padding sequences to ensure uniform input size
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len, padding='post', truncating='post')
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len, padding='post', truncating='post')


In [25]:
# Define model
model = Sequential([
    Embedding(input_dim=max_words, output_dim=128, input_length=max_len),  # Word embeddings
    LSTM(128, return_sequences=True),  # LSTM layer
    Dropout(0.5),
    LSTM(64),
    Dense(32, activation='relu'),
    Dropout(0.5),
    Dense(len(np.unique(labels)), activation='softmax')  # Output layer (adjust based on label count)
])

# Compile model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Model summary
#model.summary()


In [28]:
# Define early stopping
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

In [30]:
# Train the model
history = model.fit(
    X_train_pad, y_train,
    epochs=10,
    batch_size=32,
    validation_data=(X_test_pad, y_test),
    callbacks=[early_stopping]
)



Epoch 1/10
[1m10569/10569[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1369s[0m 130ms/step - accuracy: 0.3369 - loss: 1.5762 - val_accuracy: 0.3404 - val_loss: 1.5759
Epoch 2/10
[1m10569/10569[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1455s[0m 138ms/step - accuracy: 0.3380 - loss: 1.5764 - val_accuracy: 0.3404 - val_loss: 1.5759
Epoch 3/10
[1m10569/10569[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1148s[0m 109ms/step - accuracy: 0.3362 - loss: 1.5760 - val_accuracy: 0.3404 - val_loss: 1.5758
Epoch 4/10
[1m10569/10569[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4065s[0m 385ms/step - accuracy: 0.3379 - loss: 1.5738 - val_accuracy: 0.3404 - val_loss: 1.5759
Epoch 5/10
[1m10569/10569[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1268s[0m 120ms/step - accuracy: 0.3378 - loss: 1.5760 - val_accuracy: 0.3404 - val_loss: 1.5758
Epoch 6/10
[1m10569/10569[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1178s[0m 111ms/step - accuracy: 0.3380 - loss: 1.5751 - val_accuracy: 

In [32]:
# Evaluate on test data
loss, accuracy = model.evaluate(X_test_pad, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Make predictions
predictions = model.predict(X_test_pad)
predicted_labels = np.argmax(predictions, axis=1)

# Decode predicted labels
predicted_emotions = label_encoder.inverse_transform(predicted_labels)


[1m2643/2643[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 30ms/step - accuracy: 0.3398 - loss: 1.5762
Test Accuracy: 0.3404
[1m2643/2643[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 32ms/step


In [None]:
def predict_emotion(sentence):
    # Load tokenizer and label encoder
    #with open("tokenizer.pkl", "rb") as file:
        #tokenizer = pickle.load(file)

    #with open("label_encoder.pkl", "rb") as file:
        #label_encoder = pickle.load(file)

    # Preprocess input sentence
    sequence = tokenizer.texts_to_sequences([sentence])
    padded_sequence = pad_sequences(sequence, maxlen=max_len, padding='post', truncating='post')

    # Load model
    #model = tf.keras.models.load_model("emotion_lstm_model.h5")

    # Make prediction
    prediction = model.predict(padded_sequence)
    predicted_label = np.argmax(prediction)
    predicted_emotion = label_encoder.inverse_transform([predicted_label])[0]

    return predicted_emotion




In [55]:
# Example usage
example_sentences =  [
        "The weather is wonderful today!",
        "I am feeling really sad and lonely.",
        "What an exciting game we had last night!",
        "I can't believe how much I've learned this year!",
        "I was so angry during the meeting today.",
        "I can't believe how everything turned out today, it's a bit overwhelming.",
        "I guess I'm doing okay, but it's not as good as I hoped.",
        "That was such a fantastic presentation, even though I made some mistakes.",
        "I'm so glad I made it through the day, but I'm exhausted!",
        "I can't stop thinking about how much better things could have been.",
        "It was a long day, but I had a lot of fun!",
        "My cat died today."
    ]

for i in example_sentences:
    print(i,":",predict_emotion(i))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
The weather is wonderful today! : 2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
I am feeling really sad and lonely. : 2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
What an exciting game we had last night! : 2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
I can't believe how much I've learned this year! : 2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
I was so angry during the meeting today. : 2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
I can't believe how everything turned out today, it's a bit overwhelming. : 2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
I guess I'm doing okay, but it's not as good as I hoped. : 2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
That was such a fantastic presentation, even though I made some mi