In [5]:
import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from imblearn.over_sampling import RandomOverSampler
from collections import Counter

In [6]:
df=pd.read_csv('emotion_dataset.csv', encoding='latin-1')

In [7]:
df.head()

Unnamed: 0,text,label
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [8]:
def preprocess_text(text):
    text = re.sub(r'[^a-zA-Z]', '', text)
    text = re.sub(r"@\w+", '', text)
    text = re.sub(r'#\w+', '', text)
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r"\S+", '', text).strip()
    return text

In [9]:
df['text']=df['text'].astype(str).apply(preprocess_text)

In [11]:
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

In [13]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['text'])
vocab_size = len(tokenizer.word_index) + 1
max_len = 100

X= tokenizer.texts_to_sequences(df['text'])
X= pad_sequences(X, maxlen=max_len, padding='post')
Y= df['label'].values

In [14]:
ros= RandomOverSampler(random_state=42)
X_resampled, Y_resampled = ros.fit_resample(X, Y)

In [15]:
X_train, X_test, Y_train, Y_test = train_test_split(X_resampled, Y_resampled, test_size=0.2, random_state=42)

In [17]:
model=Sequential(
    [
        Embedding(vocab_size, 100),
        Bidirectional(LSTM(128, return_sequences=True)),
        BatchNormalization(),
        Dropout(0.3),
        Bidirectional(LSTM(64)),
        BatchNormalization(),
        Dropout(0.3),
        Dense(32, activation='relu'),
        BatchNormalization(),
        Dense(len(np.unique(Y_resampled)), activation='softmax')
    
    ]
)

In [19]:
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history=model.fit(X_train, Y_train, epochs=10, batch_size=32,validation_split=0.1,verbose=1)

Epoch 1/10
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 327ms/step - accuracy: 0.2506 - loss: 1.4932 - val_accuracy: 0.2363 - val_loss: 1.3932
Epoch 2/10
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 267ms/step - accuracy: 0.2485 - loss: 1.4227 - val_accuracy: 0.2363 - val_loss: 1.5212
Epoch 3/10
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 194ms/step - accuracy: 0.2426 - loss: 1.4052 - val_accuracy: 0.2483 - val_loss: 1.4119
Epoch 4/10
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 198ms/step - accuracy: 0.2472 - loss: 1.3978 - val_accuracy: 0.2697 - val_loss: 1.3937
Epoch 5/10
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 361ms/step - accuracy: 0.2472 - loss: 1.3942 - val_accuracy: 0.2483 - val_loss: 1.5428
Epoch 6/10
[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 220ms/step - accuracy: 0.2504 - loss: 1.3919 - val_accuracy: 0.2483 - val_loss: 1.3870
Epoch 

In [23]:
import joblib
model.save('emotion_model.h5')
joblib.dump(tokenizer, 'tokenizer.jb')
joblib.dump(label_encoder, 'label_encoder.jb')



['label_encoder.jb']