In [44]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import SGDClassifier
from sklearn import svm
from sklearn.metrics import accuracy_score, classification_report
from sklearn import tree

import spacy

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
df = pd.read_csv('/content/drive/MyDrive/DataScience/EmotionDataset/Emotion_classify_Data.csv')
df.head()

Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5937 entries, 0 to 5936
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Comment  5937 non-null   object
 1   Emotion  5937 non-null   object
dtypes: object(2)
memory usage: 92.9+ KB


In [6]:
df.describe()

Unnamed: 0,Comment,Emotion
count,5937,5937
unique,5934,3
top,i feel like a tortured artist when i talk to her,anger
freq,2,2000


In [7]:
print(f"{df['Comment'][0]} *->* {df['Emotion'][0]} ")

i seriously hate one subject to death but now i feel reluctant to drop it *->* fear 


# PreProcessing

In [8]:
nlp = spacy.load("en_core_web_sm")

doc = nlp(df['Comment'][0])

In [9]:
df['Comment'][0]

'i seriously hate one subject to death but now i feel reluctant to drop it'

In [10]:
doc

i seriously hate one subject to death but now i feel reluctant to drop it

In [11]:
for i in doc:
  print(i)

i
seriously
hate
one
subject
to
death
but
now
i
feel
reluctant
to
drop
it


In [12]:
for i in doc:
  print(i.lemma_)

I
seriously
hate
one
subject
to
death
but
now
I
feel
reluctant
to
drop
it


In [13]:
for i in doc:
  print(f'{i}  -  {i.lemma_}')

i  -  I
seriously  -  seriously
hate  -  hate
one  -  one
subject  -  subject
to  -  to
death  -  death
but  -  but
now  -  now
i  -  I
feel  -  feel
reluctant  -  reluctant
to  -  to
drop  -  drop
it  -  it


In [14]:
for i in doc:
  if not i.is_stop:
    print(i)

seriously
hate
subject
death
feel
reluctant
drop


In [15]:
for i in doc:
  if i.is_punct:
    print(i)

In [16]:
example =nlp("I live in USA, and I have'nt been on wolk, since 1990")

In [17]:
for i in example:
  if i.is_punct:
    print(i)

,
,


#Function to preprocess text

In [18]:
def preprocess(text):
  doc = nlp(text)
  filtered_tokens=[]
  for i in doc:
    if not i.is_punct and not i.is_stop:
      filtered_tokens.append(i.lemma_)
  return " ".join(filtered_tokens)






In [19]:

print(f"{df['Comment'][4]} -> {preprocess(df['Comment'][4])}")

i feel suspicious if there is no one outside like the rapture has happened or something -> feel suspicious outside like rapture happen


In [20]:
df['Processed_Comments'] = df['Comment'].apply(preprocess)

In [21]:
df

Unnamed: 0,Comment,Emotion,Processed_Comments
0,i seriously hate one subject to death but now ...,fear,seriously hate subject death feel reluctant drop
1,im so full of life i feel appalled,anger,m life feel appalled
2,i sit here to write i start to dig out my feel...,fear,sit write start dig feeling think afraid accep...
3,ive been really angry with r and i feel like a...,joy,ve angry r feel like idiot trust place
4,i feel suspicious if there is no one outside l...,fear,feel suspicious outside like rapture happen
...,...,...,...
5932,i begun to feel distressed for you,fear,begin feel distressed
5933,i left feeling annoyed and angry thinking that...,anger,leave feel annoyed angry thinking center stupi...
5934,i were to ever get married i d have everything...,joy,marry d ready offer ve get club perfect good l...
5935,i feel reluctant in applying there because i w...,fear,feel reluctant apply want able find company kn...


In [22]:
df["Emotion"]

0        fear
1       anger
2        fear
3         joy
4        fear
        ...  
5932     fear
5933    anger
5934      joy
5935     fear
5936    anger
Name: Emotion, Length: 5937, dtype: object

#Changing Emotion to numerical value


In [23]:
df['Emotion_val'] = df['Emotion'].map({'joy':1,'fear':2,'anger':3})

In [24]:
df

Unnamed: 0,Comment,Emotion,Processed_Comments,Emotion_val
0,i seriously hate one subject to death but now ...,fear,seriously hate subject death feel reluctant drop,2
1,im so full of life i feel appalled,anger,m life feel appalled,3
2,i sit here to write i start to dig out my feel...,fear,sit write start dig feeling think afraid accep...,2
3,ive been really angry with r and i feel like a...,joy,ve angry r feel like idiot trust place,1
4,i feel suspicious if there is no one outside l...,fear,feel suspicious outside like rapture happen,2
...,...,...,...,...
5932,i begun to feel distressed for you,fear,begin feel distressed,2
5933,i left feeling annoyed and angry thinking that...,anger,leave feel annoyed angry thinking center stupi...,3
5934,i were to ever get married i d have everything...,joy,marry d ready offer ve get club perfect good l...,1
5935,i feel reluctant in applying there because i w...,fear,feel reluctant apply want able find company kn...,2


In [25]:
X_train,X_test,y_train,y_test = train_test_split(df['Processed_Comments'], df['Emotion_val'], test_size=0.2, random_state=42, stratify=df['Emotion_val'])

In [26]:
X_train

738                     thrilled lot thing feel petrified
4034            know good feel look honor word help start
2400                                     m feel way trust
841     come quarter feel invigorated work m end sprin...
3988    m firm believer make woman feel terrific great...
                              ...                        
148       feel motivation satisfied read write understand
2826    feel slightly distressed pride resist wait per...
384                                  wake today feel piss
5631                            get home start feel weird
4915      feel strange bonding bed wardrode little decade
Name: Processed_Comments, Length: 4749, dtype: object

In [27]:
X_test

4155                    feel like pop face fist obnoxious
4100                          m sure ill feel bit nervous
2888                wake feel pretty energetic s positive
5168                    feel like get apprehensive reason
1030    hate feel pressure have carry conversation not...
                              ...                        
2740                   m feel wimpy whiny generally tired
4557    sit listen wind blow tree feel calm finally re...
34      not help feel little bit bitter great big happ...
3450                feel nervous try new lesson horse shy
5440    not help feel little envious treat body power ...
Name: Processed_Comments, Length: 1188, dtype: object

In [28]:
y_train

738     2
4034    1
2400    1
841     1
3988    1
       ..
148     1
2826    2
384     3
5631    2
4915    2
Name: Emotion_val, Length: 4749, dtype: int64

In [83]:
v = TfidfVectorizer()

X_train_v = v.fit_transform(X_train)
X_test_v = v.transform(X_test)

#Creating Models

NaiveBayes

In [43]:
NBmodel = MultinomialNB()
NBmodel.fit(X_train_v, y_train)

nb_pred = NBmodel.predict(X_test_v)
print(accuracy_score(y_test, nb_pred))

0.9031986531986532


In [51]:
print(classification_report(y_test, nb_pred))

              precision    recall  f1-score   support

           1       0.90      0.89      0.89       400
           2       0.91      0.90      0.91       388
           3       0.90      0.92      0.91       400

    accuracy                           0.90      1188
   macro avg       0.90      0.90      0.90      1188
weighted avg       0.90      0.90      0.90      1188



Decision Tree

In [34]:
DecisionTree = tree.DecisionTreeClassifier()
DecisionTree.fit(X_train_v, y_train)

dt_pred = DecisionTree.predict(X_test_v)
print(accuracy_score(y_test, dt_pred))

0.9292929292929293


In [52]:
print(classification_report(y_test, dt_pred))

              precision    recall  f1-score   support

           1       0.94      0.94      0.94       400
           2       0.91      0.93      0.92       388
           3       0.94      0.91      0.92       400

    accuracy                           0.93      1188
   macro avg       0.93      0.93      0.93      1188
weighted avg       0.93      0.93      0.93      1188



Random Forest

In [37]:
RandomForest = RandomForestClassifier()
RandomForest.fit(X_train_v, y_train)

rf_pred = RandomForest.predict(X_test_v)
print(accuracy_score(y_test, rf_pred))

0.9217171717171717


In [50]:
print(classification_report(rf_pred,y_test))

              precision    recall  f1-score   support

           1       0.94      0.92      0.93       411
           2       0.93      0.92      0.92       391
           3       0.90      0.93      0.91       386

    accuracy                           0.92      1188
   macro avg       0.92      0.92      0.92      1188
weighted avg       0.92      0.92      0.92      1188



SVM

In [42]:
svm = svm.SVC()
svm.fit(X_train_v, y_train)

svm_pred = svm.predict(X_test_v)
print(accuracy_score(y_test, svm_pred))

0.9183501683501684


In [49]:
print(classification_report(y_test, svm_pred))

              precision    recall  f1-score   support

           1       0.88      0.95      0.92       400
           2       0.94      0.89      0.92       388
           3       0.93      0.91      0.92       400

    accuracy                           0.92      1188
   macro avg       0.92      0.92      0.92      1188
weighted avg       0.92      0.92      0.92      1188



Gradiend Descent

In [47]:
sgd = SGDClassifier()
sgd.fit(X_train_v, y_train)

sgd_pred= sgd.predict(X_test_v)
print(accuracy_score(y_test, sgd_pred))

0.9276094276094277


In [48]:
print(classification_report(y_test, sgd_pred))

              precision    recall  f1-score   support

           1       0.92      0.94      0.93       400
           2       0.93      0.93      0.93       388
           3       0.93      0.92      0.92       400

    accuracy                           0.93      1188
   macro avg       0.93      0.93      0.93      1188
weighted avg       0.93      0.93      0.93      1188



#TESTING

In [86]:
text=df["Comment"][450]
text

'i was feeling apprehensive about my life as a student i felt like i couldnt succeed wouldnt succeed could never succeed'

In [87]:
preprocessed_text = [preprocess(text)]
preprocessed_text

['feel apprehensive life student feel like not succeed not succeed succeed']

In [89]:
text_v= v.transform(preprocessed_text)
text_v

<1x6128 sparse matrix of type '<class 'numpy.float64'>'
	with 7 stored elements in Compressed Sparse Row format>

In [96]:
test_pred = DecisionTree.predict(text_v)
test_pred

print(f"Predicted value {test_pred} - correct value {df['Emotion_val'][450]}")

Predicted value [2] - correct value 2
