In [28]:
!pip install pandas numpy seaborn neattext scikit-learn joblib



In [29]:
import pandas as pd
import numpy as np
import seaborn as sns
import neattext.functions as nfx
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import joblib
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [30]:
train_df = pd.read_csv("training.csv")
test_df = pd.read_csv("test.csv")
train_df.head()

Unnamed: 0,text,label
0,i didnt feel humiliated,0
1,i can go from feeling so hopeless to so damned...,0
2,im grabbing a minute to post i feel greedy wrong,3
3,i am ever feeling nostalgic about the fireplac...,2
4,i am feeling grouchy,3


In [31]:
combined_df = pd.concat([train_df, test_df], ignore_index=True)
combined_df['clean_text'] = combined_df['text'].apply(lambda x: nfx.remove_stopwords(nfx.remove_punctuations(str(x))))

In [32]:
X = combined_df['clean_text']
y = combined_df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [33]:
tfidf = TfidfVectorizer(ngram_range=(1,2))
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

In [34]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=40)
rf.fit(X_train_tfidf,y_train)

predict1 = rf.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, predict1))
print(confusion_matrix(y_test, predict1))
print(classification_report(y_test, predict1))

Accuracy: 0.8827777777777778
[[1012   28    6   23   10    0]
 [  38 1113   32    4    8    4]
 [  12   74  212    3    1    0]
 [  24   32    1  398   10    0]
 [  30    8    1   16  346   19]
 [   2   15    1    0   20   97]]
              precision    recall  f1-score   support

           0       0.91      0.94      0.92      1079
           1       0.88      0.93      0.90      1199
           2       0.84      0.70      0.76       302
           3       0.90      0.86      0.88       465
           4       0.88      0.82      0.85       420
           5       0.81      0.72      0.76       135

    accuracy                           0.88      3600
   macro avg       0.87      0.83      0.85      3600
weighted avg       0.88      0.88      0.88      3600



In [35]:
from sklearn.svm import SVC
svm = SVC(max_iter=1000)
svm.fit(X_train_tfidf, y_train)

predict2 = svm.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, predict2))
print(confusion_matrix(y_test, predict2))
print(classification_report(y_test, predict2))



Accuracy: 0.8830555555555556
[[1002   23   10   24   18    2]
 [  32 1122   27    5   11    2]
 [   6   85  209    1    1    0]
 [  26   15    3  410   11    0]
 [  28    8    1   15  364    4]
 [  10   20    1    0   32   72]]
              precision    recall  f1-score   support

           0       0.91      0.93      0.92      1079
           1       0.88      0.94      0.91      1199
           2       0.83      0.69      0.76       302
           3       0.90      0.88      0.89       465
           4       0.83      0.87      0.85       420
           5       0.90      0.53      0.67       135

    accuracy                           0.88      3600
   macro avg       0.88      0.81      0.83      3600
weighted avg       0.88      0.88      0.88      3600



In [36]:
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train_tfidf, y_train)

predict3 = lr.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, predict3))
print(confusion_matrix(y_test, predict3))
print(classification_report(y_test, predict3))

Accuracy: 0.8722222222222222
[[1037   24    2   10    6    0]
 [  16 1159   19    0    5    0]
 [  15  101  185    1    0    0]
 [  41   42    1  367   14    0]
 [  43   31    1   13  329    3]
 [  15   27    1    0   29   63]]
              precision    recall  f1-score   support

           0       0.89      0.96      0.92      1079
           1       0.84      0.97      0.90      1199
           2       0.89      0.61      0.72       302
           3       0.94      0.79      0.86       465
           4       0.86      0.78      0.82       420
           5       0.95      0.47      0.63       135

    accuracy                           0.87      3600
   macro avg       0.89      0.76      0.81      3600
weighted avg       0.88      0.87      0.87      3600



In [37]:
joblib.dump(rf, "emotion_model.pkl")
joblib.dump(tfidf, "tfidf_vectorizer.pkl")

['tfidf_vectorizer.pkl']

In [38]:
def predict_emotion(text):
  model = joblib.load("emotion_model.pkl")
  tfidf = joblib.load("tfidf_vectorizer.pkl")
  cleaned = nfx.remove_stopwords(nfx.remove_punctuations(str(text)))
  vector = tfidf.transform([cleaned])
  return model.predict(vector)[0]

In [39]:
while True:
    user_input = input("Enter text (or 'exit'): ")
    if user_input.lower() == 'exit':
        break
    emotion = predict_emotion(user_input)
    print("Predicted Emotion:", emotion)

Enter text (or 'exit'): i feel strong and good 
Predicted Emotion: 1
Enter text (or 'exit'): i feel like this was such a rude comment
Predicted Emotion: 3
Enter text (or 'exit'): i know a lot but i feel stupid
Predicted Emotion: 0
Enter text (or 'exit'): exit
