In [1]:
import pandas as pd
import spacy

In [2]:
df=pd.read_csv('Emotion_classify_Data.csv')

In [3]:
df

Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear
...,...,...
5932,i begun to feel distressed for you,fear
5933,i left feeling annoyed and angry thinking that...,anger
5934,i were to ever get married i d have everything...,joy
5935,i feel reluctant in applying there because i w...,fear


In [5]:
df.Emotion.value_counts()

Emotion
anger    2000
joy      2000
fear     1937
Name: count, dtype: int64

In [6]:
def processdata(text):
    nlp=spacy.load('en_core_web_sm')
    doc=nlp(text)
    filter_token=[]
    for token in doc:
        if token.is_stop or token.is_punct:
            continue
        else:
            filter_token.append(token.lemma_)
    return " ".join(filter_token)

In [7]:
import sklearn
from sklearn.model_selection import train_test_split

In [9]:
df['emotion_number']=df.Emotion.map({
    'joy':0,
    'fear':1,
    'anger':2
})

In [10]:
df

Unnamed: 0,Comment,Emotion,emotion_number
0,i seriously hate one subject to death but now ...,fear,1
1,im so full of life i feel appalled,anger,2
2,i sit here to write i start to dig out my feel...,fear,1
3,ive been really angry with r and i feel like a...,joy,0
4,i feel suspicious if there is no one outside l...,fear,1
...,...,...,...
5932,i begun to feel distressed for you,fear,1
5933,i left feeling annoyed and angry thinking that...,anger,2
5934,i were to ever get married i d have everything...,joy,0
5935,i feel reluctant in applying there because i w...,fear,1


In [12]:
X_train,X_test,y_train,y_test=train_test_split(df['Comment'],df.emotion_number,test_size=0.2,stratify=df.emotion_number)

In [13]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(4749,)
(1188,)
(4749,)
(1188,)


In [14]:
print(y_train.value_counts())
print(y_test.value_counts())

emotion_number
2    1600
0    1600
1    1549
Name: count, dtype: int64
emotion_number
0    400
2    400
1    388
Name: count, dtype: int64


In [15]:
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer
model_unprocessed=Pipeline([
    ('tf-idf',TfidfVectorizer()),
    ('multinomialNB',MultinomialNB())
])

In [16]:
model_unprocessed.fit(X_train,y_train)

In [18]:
from sklearn.metrics import classification_report
res=model_unprocessed.predict(X_test)
print(classification_report(y_test,res))

              precision    recall  f1-score   support

           0       0.91      0.91      0.91       400
           1       0.90      0.88      0.89       388
           2       0.88      0.91      0.89       400

    accuracy                           0.90      1188
   macro avg       0.90      0.90      0.90      1188
weighted avg       0.90      0.90      0.90      1188



In [21]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test,res))

0.8981481481481481


In [23]:
processed_text_train=[processdata(text) for text in X_train]

In [24]:
processed_text_test=[processdata(text) for text in X_test]

In [25]:
processed_text_train

['realised wrong started feel hated saying things wanted talk pitied',
 'tired feeling overwhelmed everyday responsibilities brings point post',
 'kept feeling enraged',
 'feel strategy worthwhile',
 'feel distraught sad',
 'feel smart telling people like wally lamb s actually chick lit mention people respect',
 'nt want approach topic lightly time feel apprehensive putting',
 'feeling adventurous took stairs',
 'feel content achieved know don t write today ll tomorrow',
 'feel like s acceptable favourite terpene responsible flavour',
 'arcade simple purpose try feel absolutely comfortable physically emotionally practically absolutely',
 'm going honest feel distraught',
 'm feeling shaky feverish mad',
 'things personally feel wronged little memories stay',
 'shared trusted friend feeling respected friend',
 'escorting relative bike',
 'feel paranoid don t want feel like',
 'finally found afternoon wear feeling like vicious lurker',
 'prepare bunch dishes safety control home feel safe

In [26]:
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer
model_processed=Pipeline([
    ('tf-idf',TfidfVectorizer()),
    ('multinomialNB',MultinomialNB())
])

In [27]:
model_processed.fit(processed_text_train,y_train)

In [28]:
res=model_processed.predict(processed_text_test)

In [29]:
print(classification_report(y_test,res))

              precision    recall  f1-score   support

           0       0.93      0.91      0.92       400
           1       0.90      0.90      0.90       388
           2       0.90      0.92      0.91       400

    accuracy                           0.91      1188
   macro avg       0.91      0.91      0.91      1188
weighted avg       0.91      0.91      0.91      1188



In [30]:
accuracy_score(y_test,res)

0.9099326599326599

In [34]:
import joblib
joblib.dump(model_processed,'model_processed_data.pkl')

['model_processed_data.pkl']