In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split


In [2]:
## Load the datatset
df=pd.read_csv('dataset\combined_emotion.csv')

# Extract text and labels
texts = df["sentence"].values
labels = df["emotion"].values

# Encode labels into integers
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)  # Convert labels to numerical values

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)
# Tokenization
max_words = 10000  # Vocabulary size
max_len = 100      # Maximum sequence length

tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

# Convert text to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# Padding sequences to ensure uniform input size
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len, padding='post', truncating='post')
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len, padding='post', truncating='post')

In [3]:
from gensim.models import Word2Vec

# Load Gensim Word2Vec model
w2v_model = Word2Vec.load("D:/Projects/Emotion Detection/model/word2vec_model.bin")

# Create an embedding matrix for Keras Embedding layer
embedding_dim = w2v_model.vector_size
embedding_matrix = np.zeros((max_words, embedding_dim))

word_index = tokenizer.word_index
for word, i in word_index.items():
    if i < max_words:  # Limit to max_words
        if word in w2v_model.wv:
            embedding_matrix[i] = w2v_model.wv[word]

# Now use this matrix in the embedding layer
embedding_layer = Embedding(input_dim=max_words, 
                            output_dim=embedding_dim, 
                            weights=[embedding_matrix], 
                            input_length=max_len, 
                            trainable=False)  # Set trainable=False to keep Word2Vec embeddings fixed




In [4]:
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout, Embedding, LayerNormalization, LeakyReLU
from tensorflow.keras.models import Sequential

model = Sequential([
    embedding_layer,  # Using the pre-trained Word2Vec embeddings
    Bidirectional(LSTM(256, return_sequences=True)),
    LayerNormalization(),
    Dropout(0.3),
    Bidirectional(LSTM(128)),
    Dense(64),
    LeakyReLU(),
    Dropout(0.5),
    Dense(len(np.unique(labels)), activation='softmax')  # Output layer
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [None]:
# TRAIN MODEL
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(
    X_train_pad, y_train,
    epochs=10,
    batch_size=32,
    validation_data=(X_test_pad, y_test),
    callbacks=[early_stopping]
)

## Training done on Google Colab.

In [5]:
from tensorflow.keras.models import load_model

model = load_model("../model/biLSTM.keras")

  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
def predict_emotion(sentence):
    # Load tokenizer and label encoder
    #with open("tokenizer.pkl", "rb") as file:
        #tokenizer = pickle.load(file)

    #with open("label_encoder.pkl", "rb") as file:
        #label_encoder = pickle.load(file)

    # Preprocess input sentence
    sequence = tokenizer.texts_to_sequences([sentence])
    padded_sequence = pad_sequences(sequence, maxlen=max_len, padding='post', truncating='post')

    # Make prediction
    prediction = model.predict(padded_sequence)
    predicted_label = np.argmax(prediction)
    predicted_emotion = label_encoder.inverse_transform([predicted_label])[0]

    return predicted_emotion




In [7]:
# Example usage
example_sentences =  [
        "The weather is wonderful today!",
        "I am feeling really sad and lonely.",
        "What an exciting game we had last night!",
        "I can't believe how much I've learned this year!",
        "I was so angry during the meeting today.",
        "I can't believe how everything turned out today, it's a bit overwhelming.",
        "I guess I'm doing okay, but it's not as good as I hoped.",
        "That was such a fantastic presentation, even though I made some mistakes.",
        "I'm so glad I made it through the day, but I'm exhausted!",
        "I can't stop thinking about how much better things could have been.",
        "It was a long day, but I had a lot of fun!",
        "My cat died today."
    ]

for i in example_sentences:
    print(i,":",predict_emotion(i))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 374ms/step
The weather is wonderful today! : joy
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
I am feeling really sad and lonely. : sad
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
What an exciting game we had last night! : anger
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
I can't believe how much I've learned this year! : sad
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
I was so angry during the meeting today. : anger
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
I can't believe how everything turned out today, it's a bit overwhelming. : sad
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
I guess I'm doing okay, but it's not as good as I hoped. : joy
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
That was such a fantastic presentation, even th

In [11]:
import joblib
import os
model_path = "../model"
os.makedirs(model_path, exist_ok=True)


joblib.dump(tokenizer, os.path.join(model_path,'tokenizer.joblib'))
joblib.dump(label_encoder, os.path.join(model_path,'label_encoder.joblib'))

['../model\\label_encoder.joblib']