In [2]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

import spacy

In [4]:
df = pd.read_csv("/kaggle/input/emotion-dataset/Emotion_classify_Data.csv")
df.head()

Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear


In [5]:
df['Emotion'].value_counts()

Emotion
anger    2000
joy      2000
fear     1937
Name: count, dtype: int64

In [7]:
print(f"{df['Comment'][0]} -> {df['Emotion'][0]}")

i seriously hate one subject to death but now i feel reluctant to drop it -> fear


In [8]:
nlp = spacy.load("en_core_web_sm")

In [9]:
txt = df['Comment'][4]
txt

'i feel suspicious if there is no one outside like the rapture has happened or something'

In [10]:
doc = nlp(txt)

In [11]:
for token in doc:
    print(token)

i
feel
suspicious
if
there
is
no
one
outside
like
the
rapture
has
happened
or
something


In [12]:
for token in doc:
    print(f"Word: {token} | -> {token.lemma_}")

Word: i | -> I
Word: feel | -> feel
Word: suspicious | -> suspicious
Word: if | -> if
Word: there | -> there
Word: is | -> be
Word: no | -> no
Word: one | -> one
Word: outside | -> outside
Word: like | -> like
Word: the | -> the
Word: rapture | -> rapture
Word: has | -> have
Word: happened | -> happen
Word: or | -> or
Word: something | -> something


In [13]:
for token in doc:
    if token.is_stop or token.is_left_punct:
        print(token)

i
if
there
is
no
one
the
has
or
something


In [14]:
def preprocess(text):
    doc = nlp(text)
    filtered_tokens = []
    for token in doc:
        if token.is_stop or token.is_punct:
            continue
        filtered_tokens.append(token.lemma_)
    return " ".join(filtered_tokens)

In [15]:
print(txt)
procces_txt = preprocess(txt)
print(procces_txt)

i feel suspicious if there is no one outside like the rapture has happened or something
feel suspicious outside like rapture happen


In [16]:
df['preprocessed_comment'] = df['Comment'].apply(preprocess)

In [17]:
df

Unnamed: 0,Comment,Emotion,preprocessed_comment
0,i seriously hate one subject to death but now ...,fear,seriously hate subject death feel reluctant drop
1,im so full of life i feel appalled,anger,m life feel appalled
2,i sit here to write i start to dig out my feel...,fear,sit write start dig feeling think afraid accep...
3,ive been really angry with r and i feel like a...,joy,ve angry r feel like idiot trust place
4,i feel suspicious if there is no one outside l...,fear,feel suspicious outside like rapture happen
...,...,...,...
5932,i begun to feel distressed for you,fear,begin feel distressed
5933,i left feeling annoyed and angry thinking that...,anger,leave feel annoyed angry thinking center stupi...
5934,i were to ever get married i d have everything...,joy,marry d ready offer ve get club perfect good l...
5935,i feel reluctant in applying there because i w...,fear,feel reluctant apply want able find company kn...


In [20]:
df['Emotion_num'] = df['Emotion'].map({'joy' : 0, 'fear': 1, 'anger': 2})

df.head(5)

Unnamed: 0,Comment,Emotion,preprocessed_comment,Emotion_num
0,i seriously hate one subject to death but now ...,fear,seriously hate subject death feel reluctant drop,1
1,im so full of life i feel appalled,anger,m life feel appalled,2
2,i sit here to write i start to dig out my feel...,fear,sit write start dig feeling think afraid accep...,1
3,ive been really angry with r and i feel like a...,joy,ve angry r feel like idiot trust place,0
4,i feel suspicious if there is no one outside l...,fear,feel suspicious outside like rapture happen,1


In [22]:
df['Emotion_num'] = df['Emotion'].map({'joy': 0, 'fear': 1, 'anger': 2})
df.head(5)

Unnamed: 0,Comment,Emotion,preprocessed_comment,Emotion_num
0,i seriously hate one subject to death but now ...,fear,seriously hate subject death feel reluctant drop,1
1,im so full of life i feel appalled,anger,m life feel appalled,2
2,i sit here to write i start to dig out my feel...,fear,sit write start dig feeling think afraid accep...,1
3,ive been really angry with r and i feel like a...,joy,ve angry r feel like idiot trust place,0
4,i feel suspicious if there is no one outside l...,fear,feel suspicious outside like rapture happen,1


In [31]:
X_train, X_test, Y_train, Y_test = train_test_split(df['preprocessed_comment'], df['Emotion_num'], 
                                                    test_size=0.2, random_state=42, stratify=df['Emotion_num'])

In [32]:
print("Shape of x_train: ", X_train.shape)
print("Shape of x_test: ", X_test.shape)

Shape of x_train:  (4749,)
Shape of x_test:  (1188,)


****Convert text column to numeric vector

In [33]:
v = TfidfVectorizer()
X_train_cv = v.fit_transform(X_train)
X_test_cv = v.fit_transform(X_test)
print(v.vocabulary_)

{'feel': 888, 'like': 1414, 'pop': 1844, 'face': 854, 'fist': 920, 'obnoxious': 1664, 'sure': 2378, 'ill': 1210, 'bit': 228, 'nervous': 1624, 'wake': 2641, 'pretty': 1885, 'energetic': 781, 'positive': 1851, 'get': 1016, 'apprehensive': 121, 'reason': 1972, 'hate': 1107, 'pressure': 1879, 'have': 1112, 'carry': 327, 'conversation': 494, 'not': 1649, 'end': 777, 'breathe': 281, 'receiver': 1978, 'steamy': 2308, 'glad': 1029, 'proud': 1914, 'answer': 103, 'complicated': 443, 'question': 1940, 'reluctant': 2009, 'change': 354, 'work': 2720, 'step': 2311, 'plan': 1815, 'take': 2403, 'divine': 692, 'help': 1135, 'possibility': 1852, 'away': 168, 'long': 1438, 'golden': 1043, 'day': 578, 'right': 2053, 'amuse': 87, 'sound': 2259, 'hear': 1122, 'aircleaner': 68, 'bed': 202, 'cat': 333, 'connected': 468, 'person': 1781, 'michael': 1534, 'think': 2453, 'weird': 2672, 'mom': 1558, 'inside': 1263, 'warm': 2648, 'compare': 433, 'outside': 1713, 'temp': 2427, 'survivor': 2385, 'man': 1481, 'skill':

Machine Learning Model

1.Naive Bayes

In [34]:
NB_model = MultinomialNB()
NB_model.fit(X_train_cv, Y_train)

In [36]:
RFC_model = RandomForestClassifier()

RFC_model.fit(X_train_cv, Y_train)

In [37]:
y_pred = RFC_model.predict(X_test_cv)

ValueError: X has 2760 features, but RandomForestClassifier is expecting 6126 features as input.