# Emotion Classifier : Text Based using LogisticRegression

### 1. Imports

In [89]:
import pandas as pd

In [90]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import confusion_matrix, classification_report

In [91]:
import sys
sys.path.append("../..")
from scripts import utils

### 2. Load and Train data

In [92]:
data = pd.read_csv("../../data/Emotion_classify_Data.csv")

In [93]:
labels = data['Emotion'].unique()

Mapping emotions to numeric labels: 
fear=0, anger=1, joy=2

In [94]:
emotion_map = {
    'fear' : 0,
    'anger' : 1,
    'joy' : 2
}

In [95]:
data['Emotion_encoded'] = data['Emotion'].map(emotion_map)

In [96]:
X = data.Comment
y = data.Emotion_encoded

In [97]:
vectorizer = TfidfVectorizer()

In [98]:
X_vector = vectorizer.fit_transform(X)

In [99]:
X_train, X_test, y_train, y_test = train_test_split(X_vector, y, test_size = 0.20, random_state = 42)

### 3. Model Development

In [100]:
model = LogisticRegression()

In [101]:
model.fit(X_train, y_train)

### 4. Prediction and Evaluation

In [102]:
pred = model.predict(X_test)

In [103]:
accuracy = sum(pred == y_test)/len(y_test)

In [104]:
print("Accuracy = ", accuracy)

Accuracy =  0.9166666666666666


In [105]:
print("Confusion Matrix :\n")
print(confusion_matrix(y_test, pred))

Confusion Matrix :

[[372  21  23]
 [ 10 359  23]
 [ 10  12 358]]


In [106]:
print("Classification Report :\n")
print(classification_report(y_test, pred))

Classification Report :

              precision    recall  f1-score   support

           0       0.95      0.89      0.92       416
           1       0.92      0.92      0.92       392
           2       0.89      0.94      0.91       380

    accuracy                           0.92      1188
   macro avg       0.92      0.92      0.92      1188
weighted avg       0.92      0.92      0.92      1188



### 5. Test

In [107]:
string = ["I finally got the internship - I'm so happy and excited",
         "I can't believe they ignored my work again - it's so frustrating",
          "I felt a chill down my spine when I heard someone behind me in the dark"
         ]

In [108]:
clean_text = [utils.clean(text) for text in string]

In [109]:
vector_text = vectorizer.transform(clean_text)

In [110]:
prediction = model.predict(vector_text)

In [111]:
for text, pred in zip(string, prediction) :
    print("Sentence : ", text)
    print("Prediction :", labels[pred])
    print()

Sentence :  I finally got the internship - I'm so happy and excited
Prediction : joy

Sentence :  I can't believe they ignored my work again - it's so frustrating
Prediction : fear

Sentence :  I felt a chill down my spine when I heard someone behind me in the dark
Prediction : fear



In [112]:
print(data['Emotion'].value_counts())

Emotion
anger    2000
joy      2000
fear     1937
Name: count, dtype: int64


### 6. Save model and vectorizer 

In [113]:
utils.save_file("1_model_lr.pkl", model)
utils.save_file("1_vectorizer_lr.pkl", vectorizer)