In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

emo_df=pd.read_csv('data/text.csv')
emo_df

Unnamed: 0.1,Unnamed: 0,text,label
0,0,i just feel really helpless and heavy hearted,4
1,1,ive enjoyed being able to slouch about relax a...,0
2,2,i gave up my internship with the dmrg and am f...,4
3,3,i dont know i feel so lost,0
4,4,i am a kindergarten teacher and i am thoroughl...,4
...,...,...,...
416804,416804,i feel like telling these horny devils to find...,2
416805,416805,i began to realize that when i was feeling agi...,3
416806,416806,i feel very curious be why previous early dawn...,5
416807,416807,i feel that becuase of the tyranical nature of...,3


In [2]:
emo_df.drop(columns=['Unnamed: 0'],inplace=True)
# 0: sadness 1:joy 2:love 3:anger 4:fear 5:surprise

In [4]:
emo_df['label'].value_counts()

label
1    141067
0    121187
3     57317
4     47712
2     34554
5     14972
Name: count, dtype: int64

In [6]:
from sklearn.model_selection import train_test_split
train_set,val_set=train_test_split(emo_df,test_size=0.2,random_state=42,stratify=emo_df['label'])
train_set.shape,val_set.shape

((333447, 2), (83362, 2))

In [7]:
from tensorflow.keras.layers import TextVectorization,Embedding,Dense,Input

In [8]:
train_inputs=train_set['text'].to_numpy()
val_inputs=val_set['text'].to_numpy()
train_target=train_set['label'].to_numpy()
val_target=val_set['label'].to_numpy()

In [9]:
text_vectorizer=TextVectorization(max_tokens=None,output_mode='int',split='whitespace',ngrams=None,output_sequence_length=None,
                                  standardize='lower_and_strip_punctuation')

In [15]:
max_vocab=10000
tl=[len(i.split()) for i in train_inputs]
print(np.percentile(tl,[50,75,90,95,99]))
max_length=35

[17. 25. 35. 41. 52.]


In [16]:
text_vectorizer=TextVectorization(max_tokens=max_vocab,output_sequence_length=max_length)

In [17]:
text_vectorizer.adapt(train_inputs)

In [18]:
embed=Embedding(input_dim=max_vocab,output_dim=128)

In [19]:
import mlflow
import mlflow.keras

In [22]:
from tensorflow.keras.layers import Dense,GRU,LSTM,Bidirectional,Input,Dropout
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau

In [23]:
mlflow.set_experiment('Emotion_Detection')

2025/10/27 00:52:42 INFO mlflow.tracking.fluent: Experiment with name 'Emotion_Detection' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///c:/KM/Coding%21%21%21/vs%20code/Scikit%20Learn/Emotions%20Sentiment%20Analysis/mlruns/914653073148891092', creation_time=1761506562096, experiment_id='914653073148891092', last_update_time=1761506562096, lifecycle_stage='active', name='Emotion_Detection', tags={}>

In [27]:
from sklearn.metrics import classification_report
def model_evaluate(model):
    train_preds=np.argmax(model.predict(train_inputs),axis=1)
    val_preds=np.argmax(model.predict(val_inputs),axis=1)
    train_report=classification_report(train_target,train_preds,output_dict=True)
    test_report=classification_report(val_target,val_preds,output_dict=True)

    train_acc = train_report['accuracy']
    train_prec = train_report['weighted avg']['precision']
    train_rec = train_report['weighted avg']['recall']
    train_f1 = train_report['weighted avg']['f1-score']

    test_acc = test_report['accuracy']
    test_prec = test_report['weighted avg']['precision']
    test_rec = test_report['weighted avg']['recall']
    test_f1 = test_report['weighted avg']['f1-score']

    print(f'TRAIN Metrics: \n {train_report}')
    print(f'TEST Metrics: \n {test_report}')
    mlflow.log_metric('Train_Accuracy',train_acc)
    mlflow.log_metric('Train_Precision',train_prec)
    mlflow.log_metric('Train_Recall',train_rec)
    mlflow.log_metric('Train_F1',train_f1)
    mlflow.log_metric('Test_Accuracy',test_acc)
    mlflow.log_metric('Test_Precison',test_prec)
    mlflow.log_metric('Test_Recall',test_rec)
    mlflow.log_metric('Test_F1',test_f1)

In [26]:
from sklearn.utils.class_weight import compute_class_weight
class_weights=compute_class_weight(class_weight='balanced',classes=np.unique(train_target),y=train_target)
class_weights=dict(enumerate(class_weights))

In [None]:

with mlflow.start_run(run_name='GRU'):
    inputs=Input(shape=(1,),dtype=tf.string)
    x=text_vectorizer(inputs)
    x=embed(x)
    x=GRU(32,activation='tanh',return_sequences=True,recurrent_dropout=0.2,dropout=0.3)(x)
    x=GRU(32,activation='tanh',recurrent_dropout=0.2,dropout=0.3)(x)
    x=Dropout(0.3)(x)
    outputs=Dense(6,activation='softmax')(x)
    model_1=Model(inputs,outputs)
    model_1.compile(loss=SparseCategoricalCrossentropy(),optimizer=Adam(),metrics=['accuracy'])
    early_stopping=EarlyStopping(monitor='val_loss',patience=2,restore_best_weights=True)
    reduce_lr=ReduceLROnPlateau(monitor='val_loss',patience=2,factor=0.5,min_lr=1e-6)
    model_1.fit(train_inputs,train_target,epochs=25,validation_data=(val_inputs,val_target),callbacks=[early_stopping,reduce_lr],batch_size=64,class_weight=class_weights)

    mlflow.log_params({'epochs':25,'loss':'SparseCategoricalCrossentropy','optimizer':'Adam','learning_rate':'0.001',
                       'model_type':'GRU','activation':'tanh','layers':2,'units':'[32,32]','batch_size':64,'recurrent_dropout':0.2,'dropout':0.3,
                       'callbacks':'EarlyStopping,ReduceLROnPlateau'})
    model_evaluate(model_1)
    mlflow.set_tag('model_type','GRU')

Epoch 1/25
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 17ms/step - accuracy: 0.7562 - loss: 0.5565 - val_accuracy: 0.9263 - val_loss: 0.1715 - learning_rate: 0.0010
Epoch 2/25
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 16ms/step - accuracy: 0.9273 - loss: 0.1641 - val_accuracy: 0.9275 - val_loss: 0.1557 - learning_rate: 0.0010
Epoch 3/25
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 17ms/step - accuracy: 0.9302 - loss: 0.1507 - val_accuracy: 0.9280 - val_loss: 0.1578 - learning_rate: 0.0010
Epoch 4/25
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 17ms/step - accuracy: 0.9315 - loss: 0.1433 - val_accuracy: 0.9279 - val_loss: 0.1609 - learning_rate: 0.0010
[1m10421/10421[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 5ms/step
[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 5ms/step
TRAIN Metrics: 
 {'0': {'precision': 0.9981513689714392, 'recall': 0.9412165159001

In [30]:

with mlflow.start_run(run_name='GRU v2'):
    inputs=Input(shape=(1,),dtype=tf.string)
    x=text_vectorizer(inputs)
    x=embed(x)
    x=GRU(64,activation='tanh',return_sequences=True,recurrent_dropout=0.3,dropout=0.3)(x)
    x=GRU(64,activation='tanh',recurrent_dropout=0.3,dropout=0.3)(x)
    x=Dropout(0.3)(x)
    outputs=Dense(6,activation='softmax')(x)
    model_2=Model(inputs,outputs)
    model_2.compile(loss=SparseCategoricalCrossentropy(),optimizer=Adam(),metrics=['accuracy'])
    early_stopping=EarlyStopping(monitor='val_loss',patience=2,restore_best_weights=True)
    reduce_lr=ReduceLROnPlateau(monitor='val_loss',patience=2,factor=0.5,min_lr=1e-6)
    model_2.fit(train_inputs,train_target,epochs=25,validation_data=(val_inputs,val_target),callbacks=[early_stopping,reduce_lr],batch_size=64,class_weight=class_weights)

    mlflow.log_params({'epochs':25,'loss':'SparseCategoricalCrossentropy','optimizer':'Adam','learning_rate':'0.001',
                       'model_type':'GRU','activation':'tanh','layers':2,'units':'[64,64]','batch_size':64,'recurrent_dropout':0.3,'dropout':0.3,
                       'callbacks':'EarlyStopping,ReduceLROnPlateau'})
    model_evaluate(model_2)
    mlflow.set_tag('model_type','GRU')

Epoch 1/25
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 21ms/step - accuracy: 0.8634 - loss: 0.3117 - val_accuracy: 0.9272 - val_loss: 0.1570 - learning_rate: 0.0010
Epoch 2/25
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 21ms/step - accuracy: 0.9306 - loss: 0.1436 - val_accuracy: 0.9267 - val_loss: 0.1505 - learning_rate: 0.0010
Epoch 3/25
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 23ms/step - accuracy: 0.9328 - loss: 0.1345 - val_accuracy: 0.9278 - val_loss: 0.1484 - learning_rate: 0.0010
Epoch 4/25
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 24ms/step - accuracy: 0.9345 - loss: 0.1288 - val_accuracy: 0.9274 - val_loss: 0.1522 - learning_rate: 0.0010
Epoch 5/25
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 24ms/step - accuracy: 0.9356 - loss: 0.1242 - val_accuracy: 0.9279 - val_loss: 0.1556 - learning_rate: 0.0010
[1m10421/10421[0m [32m━━━━━━━━━━━━━━━━━━━━

In [31]:

with mlflow.start_run(run_name='LSTM v1'):
    inputs=Input(shape=(1,),dtype=tf.string)
    x=text_vectorizer(inputs)
    x=embed(x)
    x=LSTM(32,activation='tanh',return_sequences=True,recurrent_dropout=0.2,dropout=0.3)(x)
    x=LSTM(32,activation='tanh',recurrent_dropout=0.2,dropout=0.3)(x)
    x=Dropout(0.3)(x)
    outputs=Dense(6,activation='softmax')(x)
    model_3=Model(inputs,outputs)
    model_3.compile(loss=SparseCategoricalCrossentropy(),optimizer=Adam(),metrics=['accuracy'])
    early_stopping=EarlyStopping(monitor='val_loss',patience=2,restore_best_weights=True)
    reduce_lr=ReduceLROnPlateau(monitor='val_loss',patience=2,factor=0.5,min_lr=1e-6)
    model_3.fit(train_inputs,train_target,epochs=25,validation_data=(val_inputs,val_target),callbacks=[early_stopping,reduce_lr],batch_size=64,class_weight=class_weights)

    mlflow.log_params({'epochs':25,'loss':'SparseCategoricalCrossentropy','optimizer':'Adam','learning_rate':'0.001',
                       'model_type':'LSTM','activation':'tanh','layers':2,'units':'[32,32]','batch_size':64,'recurrent_dropout':0.2,'dropout':0.3,
                       'callbacks':'EarlyStopping,ReduceLROnPlateau'})
    model_evaluate(model_3)
    mlflow.set_tag('model_type','LSTM')

Epoch 1/25
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 20ms/step - accuracy: 0.8567 - loss: 0.3533 - val_accuracy: 0.9249 - val_loss: 0.1608 - learning_rate: 0.0010
Epoch 2/25
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 20ms/step - accuracy: 0.9303 - loss: 0.1463 - val_accuracy: 0.9270 - val_loss: 0.1546 - learning_rate: 0.0010
Epoch 3/25
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 20ms/step - accuracy: 0.9325 - loss: 0.1333 - val_accuracy: 0.9270 - val_loss: 0.1481 - learning_rate: 0.0010
Epoch 4/25
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 20ms/step - accuracy: 0.9345 - loss: 0.1264 - val_accuracy: 0.9272 - val_loss: 0.1444 - learning_rate: 0.0010
Epoch 5/25
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 20ms/step - accuracy: 0.9354 - loss: 0.1222 - val_accuracy: 0.9281 - val_loss: 0.1544 - learning_rate: 0.0010
Epoch 6/25
[1m5211/5211[0m [32m━━━━━━━━━━━

In [33]:
model_3.save('Models/lstm.keras')