# Emotion Classifier : Text Based using MultinomialNB

### 1. Import

In [2]:
import pandas as pd

In [3]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

In [4]:
import sys
sys.path.append("../..")
from scripts.utils import save_file, clean

### 2. Loading and Training data

In [5]:
df = pd.read_csv("../../data/Emotion_classify_Data.csv")

In [7]:
emotion_map = {
    'fear' : 0,
    'anger' : 1,
    'joy' :2
}

In [8]:
df['Emotion_encoded'] = df['Emotion'].map(emotion_map)

In [10]:
X = df.Comment
y = df.Emotion_encoded
labels = df['Emotion'].unique()

In [11]:
vectorizer = TfidfVectorizer()

In [12]:
X_vector = vectorizer.fit_transform(X)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X_vector, y, test_size = 0.20, random_state = 42)

### 3. Model Development

In [14]:
model = MultinomialNB()

In [15]:
model.fit(X_train, y_train)

### 4. Prediction and Evaluation

In [16]:
pred = model.predict(X_test)

In [17]:
print("Confusion Matrix :\n", confusion_matrix(y_test, pred))

Confusion Matrix :
 [[369  26  21]
 [ 14 368  10]
 [ 17  30 333]]


In [18]:
print("Classification Report :\n", classification_report(y_test, pred))

Classification Report :
               precision    recall  f1-score   support

           0       0.92      0.89      0.90       416
           1       0.87      0.94      0.90       392
           2       0.91      0.88      0.90       380

    accuracy                           0.90      1188
   macro avg       0.90      0.90      0.90      1188
weighted avg       0.90      0.90      0.90      1188



### 5. Trial with test inputs

In [19]:
string = ["I finally got the internship - I'm so happy and excited",
         "I can't believe they ignored my work again - it's so frustrating",
          "I felt a chill down my spine when I heard someone behind me in the dark"
         ]

In [20]:
clean_texts = [clean(text) for text in string]

In [21]:
vector_text = vectorizer.transform(clean_texts)

In [22]:
emo_pred = model.predict(vector_text)

In [None]:
for text, pred in zip(string, emo_pred) :
    print("Sentence : ", text)
    print("Emotion : ", labels[pred)