# Emotions Classifier

In [2]:
import spacy

nlp = spacy.load("en_core_web_lg")

In [3]:
import pandas as pd

df = pd.read_csv("./archive/text.csv")

df.head()

Unnamed: 0.1,Unnamed: 0,text,label
0,0,i just feel really helpless and heavy hearted,4
1,1,ive enjoyed being able to slouch about relax a...,0
2,2,i gave up my internship with the dmrg and am f...,4
3,3,i dont know i feel so lost,0
4,4,i am a kindergarten teacher and i am thoroughl...,4


In [4]:
df.label.value_counts()

label
1    141067
0    121187
3     57317
4     47712
2     34554
5     14972
Name: count, dtype: int64

In [5]:
min_sample = 14972

df_0 = df[df.label == 0 ].sample(min_sample , random_state=350)
df_1 = df[df.label == 1 ].sample(min_sample , random_state=350)
df_2 = df[df.label == 2 ].sample(min_sample , random_state=350)
df_3 = df[df.label == 3 ].sample(min_sample , random_state=350)
df_4 = df[df.label == 4 ].sample(min_sample , random_state=350)
df_5 = df[df.label == 5 ].sample(min_sample , random_state=350)

df_balance = pd.concat([df_0 , df_1 , df_2 , df_3 , df_4 , df_5])
df_balance.label.value_counts()


label
0    14972
1    14972
2    14972
3    14972
4    14972
5    14972
Name: count, dtype: int64

## Preprocessd Text

In [6]:
def prepreocessed(text):
    doc = nlp(text)
    return " ".join([token.lemma_ for token in doc if not token.is_stop or token.is_punct])

print(prepreocessed("i ate pizza"))

eat pizza


In [7]:
df_balance["preocessedText"] = df_balance["text"].apply(prepreocessed)
df_balance.head()

Unnamed: 0.1,Unnamed: 0,text,label,preocessedText
157443,157443,i feel like im at least trying to be a little ...,0,feel like m try little low fat
239050,239050,im leaving the group because im not playing we...,0,m leave group m play feel unloved close
122976,122976,i definitely didn t think that what we had was...,0,definitely didn t think seemingly easily turn ...
227167,227167,i dont know whether to be happy that i can be ...,0,not know happy strong feel devastated m damn g...
192152,192152,i do love selling but i feel i have been beate...,0,love selling feel beat far don t interest anym...


In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df_balance.text, df_balance.label, test_size=0.2, random_state=42 , stratify=df_balance.label   )



In [9]:
from sklearn.feature_extraction.text import CountVectorizer

v = CountVectorizer()

from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report


clf = Pipeline(
    [
        ("vectorizer_bow" , CountVectorizer()),
        ("Multi NB" , MultinomialNB())
    ]
)


clf.fit(X_train , y_train)

y_pred = clf.predict(X_test)

print(classification_report(y_test, y_pred))

text = ["i have interest in someone"]

# Use the pipeline to predict the emotion
predicted_emotion = clf.predict(text)

print( "text is : " , predicted_emotion)

              precision    recall  f1-score   support

           0       0.91      0.86      0.88      2994
           1       0.86      0.80      0.83      2994
           2       0.84      0.90      0.87      2995
           3       0.90      0.88      0.89      2995
           4       0.83      0.84      0.84      2995
           5       0.85      0.90      0.87      2994

    accuracy                           0.86     17967
   macro avg       0.86      0.86      0.86     17967
weighted avg       0.86      0.86      0.86     17967

text is :  [2]


# Word Vector 

In [10]:
df_balance['vectorText'] = df_balance['preocessedText'].apply( lambda x : nlp(x).vector )

Unnamed: 0.1,Unnamed: 0,text,label
0,0,i just feel really helpless and heavy hearted,4
1,1,ive enjoyed being able to slouch about relax a...,0
2,2,i gave up my internship with the dmrg and am f...,4
3,3,i dont know i feel so lost,0
4,4,i am a kindergarten teacher and i am thoroughl...,4


In [11]:
df_balance.head()   

Unnamed: 0.1,Unnamed: 0,text,label,preocessedText,vectorText
157443,157443,i feel like im at least trying to be a little ...,0,feel like m try little low fat,"[2.1222343, 0.53254575, -4.157814, -0.38740137..."
239050,239050,im leaving the group because im not playing we...,0,m leave group m play feel unloved close,"[-0.33729875, 0.23104003, -2.16777, -3.3596566..."
122976,122976,i definitely didn t think that what we had was...,0,definitely didn t think seemingly easily turn ...,"[0.015287304, 2.8443923, -3.207748, -1.948345,..."
227167,227167,i dont know whether to be happy that i can be ...,0,not know happy strong feel devastated m damn g...,"[0.55273134, 0.5485274, -2.8807786, -1.7979311..."
192152,192152,i do love selling but i feel i have been beate...,0,love selling feel beat far don t interest anym...,"[-0.9785776, 0.9377, -3.2451966, -2.4879098, 0..."


In [15]:
X_train , X_test , y_train , y_test = train_test_split(
    df_balance.vectorText.values, df_balance.label, test_size=0.2, random_state=42 , stratify=df_balance.label   
)

X_train

array([array([ 0.8991022 ,  1.4231014 , -2.5065825 , -1.258206  ,  2.1953022 ,
               0.8945227 ,  1.0413512 ,  3.2808402 , -1.309209  , -0.98715055,
               2.3367066 ,  0.11758554, -3.141752  ,  1.3153912 ,  0.2602119 ,
               0.8889685 ,  1.359522  ,  1.0417418 , -1.2952564 , -0.8712814 ,
               0.02369042,  0.08144138, -2.6389844 , -0.24808316, -0.71262735,
              -1.6890942 , -1.2870609 , -1.7193729 ,  0.21374173,  0.4213646 ,
               2.0131474 ,  1.9738144 , -1.008734  , -2.1158025 ,  0.4987984 ,
               0.82632726,  0.8182083 , -0.15052003,  2.5175796 ,  1.7167488 ,
              -1.4976425 , -0.86071813, -0.02943973,  0.6942281 , -4.7862225 ,
               2.083457  ,  1.4660468 , -2.8296854 ,  0.335834  , -0.65661275,
               1.9719287 , -0.9638029 , -0.47751483, -0.76445615, -1.1204778 ,
               2.0164938 , -1.1448189 ,  1.1739436 ,  2.2024732 , -0.00917409,
               3.0531952 ,  0.6643777 , -3.8485544 ,

In [16]:
import numpy as np

X_train_2d = np.stack(X_train)
X_test_2d = np.stack(X_test)


X_test_2d

array([[-0.11343908, -0.4781787 , -1.6537163 , ...,  1.2174063 ,
        -2.0532637 ,  1.1136473 ],
       [ 1.3762064 ,  0.9063263 , -4.1467433 , ...,  0.7529774 ,
        -4.8182    ,  1.2856725 ],
       [ 0.43321005,  1.2577454 , -2.127867  , ...,  1.3693086 ,
        -3.8605027 ,  1.9878286 ],
       ...,
       [ 0.146781  ,  1.4250826 , -3.830343  , ...,  0.76732665,
        -3.5224478 ,  0.90967995],
       [ 0.8159659 ,  0.58670753, -0.9384367 , ...,  0.7971483 ,
        -3.074605  ,  1.7904917 ],
       [-0.26016334,  1.4839834 , -2.0349307 , ...,  2.1940534 ,
        -3.9280999 ,  3.4384003 ]], dtype=float32)