Installing libraries

In [None]:
!pip install numpy pandas neattext seaborn matplotlib scikit-learn joblib

Defaulting to user installation because normal site-packages is not writeable


Importing libraries

In [1]:
import numpy as np
import pandas as pd
import neattext as nt
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
import joblib
from sklearn.naive_bayes import MultinomialNB

Loading the data

In [2]:
column_names = ["content", "sentiment"]

train_data = pd.read_csv("data/train.txt", delimiter=";", names=column_names, encoding="utf-8")
test_data = pd.read_csv("data/test.txt", delimiter=";", names=column_names, encoding="utf-8")
val_data = pd.read_csv("data/val.txt", delimiter=";", names=column_names, encoding="utf-8")

print(train_data.head())

print(f"Train Data: {train_data.shape}")
print(f"Test Data: {test_data.shape}")
print(f"Validation Data: {val_data.shape}")

print(train_data.isnull().sum())
print(test_data.isnull().sum())
print(val_data.isnull().sum())

print(train_data["sentiment"].value_counts())

                                             content sentiment
0                            i didnt feel humiliated   sadness
1  i can go from feeling so hopeless to so damned...   sadness
2   im grabbing a minute to post i feel greedy wrong     anger
3  i am ever feeling nostalgic about the fireplac...      love
4                               i am feeling grouchy     anger
Train Data: (16000, 2)
Test Data: (2000, 2)
Validation Data: (2000, 2)
content      0
sentiment    0
dtype: int64
content      0
sentiment    0
dtype: int64
content      0
sentiment    0
dtype: int64
sentiment
joy         5362
sadness     4666
anger       2159
fear        1937
love        1304
surprise     572
Name: count, dtype: int64


text preprocessing

In [3]:
train_data["cleaned_content"] = train_data["content"].apply(lambda text: nt.TextCleaner(text).remove_puncts().remove_stopwords().text.lower())
test_data["cleaned_content"] = test_data["content"].apply(lambda text: nt.TextCleaner(text).remove_puncts().remove_stopwords().text.lower())
val_data["cleaned_content"] = val_data["content"].apply(lambda text: nt.TextCleaner(text).remove_puncts().remove_stopwords().text.lower())

print(train_data[["content", "cleaned_content"]].head())

                                             content  \
0                            i didnt feel humiliated   
1  i can go from feeling so hopeless to so damned...   
2   im grabbing a minute to post i feel greedy wrong   
3  i am ever feeling nostalgic about the fireplac...   
4                               i am feeling grouchy   

                               cleaned_content  
0                        didnt feel humiliated  
1  feeling hopeless damned hopeful cares awake  
2    im grabbing minute post feel greedy wrong  
3    feeling nostalgic fireplace know property  
4                              feeling grouchy  


convert text to vectors

In [4]:
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(train_data["cleaned_content"])
X_test = vectorizer.transform(test_data["cleaned_content"])
X_val = vectorizer.transform(val_data["cleaned_content"])

Label Encoding

In [5]:
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(train_data["sentiment"])
y_test = label_encoder.transform(test_data["sentiment"])
y_val = label_encoder.transform(val_data["sentiment"])

Trainingl logistic regression model

In [6]:
model = LogisticRegression(multi_class="multinomial", random_state=42)
model.fit(X_train, y_train)

evaluating lr model

In [7]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Test Accuracy: {accuracy:.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=label_encoder.classes_))

Test Accuracy: 0.8720

Classification Report:
               precision    recall  f1-score   support

       anger       0.89      0.83      0.86       275
        fear       0.88      0.82      0.85       224
         joy       0.85      0.95      0.90       695
        love       0.79      0.62      0.69       159
     sadness       0.90      0.93      0.91       581
    surprise       0.90      0.53      0.67        66

    accuracy                           0.87      2000
   macro avg       0.87      0.78      0.81      2000
weighted avg       0.87      0.87      0.87      2000



Saving the model

In [18]:
joblib.dump(model, "./models/logistic_regression_emotion.pkl")
joblib.dump(vectorizer, "./models/tfidf_vectorizer.pkl")
joblib.dump(label_encoder, "./models/label_encoder.pkl")

print("Model and vectorizer saved successfully!")

Model and vectorizer saved successfully!


Loading the model

In [8]:
saved_model = joblib.load("./models/logistic_regression_emotion.pkl")
saved_vectorizer = joblib.load("./models/tfidf_vectorizer.pkl")
saved_label_encoder = joblib.load("./models/label_encoder.pkl")

sample_text = ["I am feeling very happy today!"]
text_vector = saved_vectorizer.transform(sample_text)
predicted_label = saved_model.predict(text_vector)

predicted_emotion = saved_label_encoder.inverse_transform(predicted_label)

print("Predicted Emotion:", predicted_emotion[0])

Predicted Emotion: joy


Training naive bayes model

In [9]:
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)

Evaluating nb model

In [14]:
y_pred_nb = nb_model.predict(X_test)
accuracy_nb = accuracy_score(y_test, y_pred_nb)

print(f"Test Accuracy: {accuracy_nb:.4f}\n")
print("Classification Report:\n", classification_report(y_test, y_pred_nb, target_names=label_encoder.classes_))

Test Accuracy: 0.6995

Classification Report:
               precision    recall  f1-score   support

       anger       0.95      0.36      0.52       275
        fear       0.90      0.33      0.49       224
         joy       0.66      0.98      0.79       695
        love       1.00      0.07      0.13       159
     sadness       0.69      0.92      0.79       581
    surprise       0.00      0.00      0.00        66

    accuracy                           0.70      2000
   macro avg       0.70      0.44      0.45      2000
weighted avg       0.74      0.70      0.64      2000



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving the nb model

In [19]:
joblib.dump(nb_model, "./models/naive_bayes_emotion.pkl")
print("Naive Bayes model saved successfully!")

Naive Bayes model saved successfully!


Training SVM model

In [11]:
svm_model = SVC(kernel="linear", random_state=42)
svm_model.fit(X_train, y_train)

Evaluating SVM model

In [15]:
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Test Accuracy: {accuracy:.4f}")
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=label_encoder.classes_))

Test Accuracy: 0.8795
Classification Report:
               precision    recall  f1-score   support

       anger       0.88      0.87      0.87       275
        fear       0.86      0.87      0.86       224
         joy       0.88      0.93      0.91       695
        love       0.77      0.71      0.74       159
     sadness       0.93      0.90      0.92       581
    surprise       0.73      0.61      0.66        66

    accuracy                           0.88      2000
   macro avg       0.84      0.81      0.83      2000
weighted avg       0.88      0.88      0.88      2000



Saving SVM model

In [24]:
joblib.dump(svm_model, "./models/svm_emotion.pkl")
joblib.dump(svm_model, "best_model_final.pkl")

print("SVM model saved successfully!")

SVM model saved successfully!
