In [None]:
import pandas as pd

In [None]:
df=pd.read_csv("/content/Emotion_classify_Data.csv")
df.shape

(5937, 2)

In [None]:
df.head(5)

Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear


In [None]:
#checking the distribution of Emotion
df.Emotion.value_counts()

Unnamed: 0_level_0,count
Emotion,Unnamed: 1_level_1
anger,2000
joy,2000
fear,1937


In [None]:
# encoding the target variable
df["emotion_num"]=df['Emotion'].map(
    {"joy":0,
    "fear":1,
    "anger":2})

df.head(5)

Unnamed: 0,Comment,Emotion,emotion_num
0,i seriously hate one subject to death but now ...,fear,1
1,im so full of life i feel appalled,anger,2
2,i sit here to write i start to dig out my feel...,fear,1
3,ive been really angry with r and i feel like a...,joy,0
4,i feel suspicious if there is no one outside l...,fear,1


In [None]:
# train test splitt
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(
    df.Comment,
    df.emotion_num,
    test_size=0.2,
    stratify=df.emotion_num) # for equal distribution of sample

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report

In [None]:
# making the pipeline
clf=Pipeline([
    ("tfidf",TfidfVectorizer()),
    ("mnnb",MultinomialNB())
])

clf.fit(x_train,y_train)
y_predict=clf.predict(x_test)
print(classification_report(y_test,y_predict))

              precision    recall  f1-score   support

           0       0.89      0.88      0.89       400
           1       0.86      0.87      0.87       388
           2       0.88      0.89      0.88       400

    accuracy                           0.88      1188
   macro avg       0.88      0.88      0.88      1188
weighted avg       0.88      0.88      0.88      1188



In [None]:
# making the pipeline
clf=Pipeline([
    ("tfidf",TfidfVectorizer()),
    ("rfc",RandomForestClassifier())
])

clf.fit(x_train,y_train)
y_predict=clf.predict(x_test)
print(classification_report(y_test,y_predict))

              precision    recall  f1-score   support

           0       0.85      0.94      0.89       400
           1       0.91      0.89      0.90       388
           2       0.93      0.84      0.88       400

    accuracy                           0.89      1188
   macro avg       0.90      0.89      0.89      1188
weighted avg       0.90      0.89      0.89      1188



In [None]:
# making the pipeline
clf=Pipeline([
    ("tfidf",TfidfVectorizer()),
    ("svc",SVC())
])

clf.fit(x_train,y_train)
y_predict=clf.predict(x_test)
print(classification_report(y_test,y_predict))

              precision    recall  f1-score   support

           0       0.86      0.94      0.90       400
           1       0.92      0.88      0.90       388
           2       0.93      0.87      0.90       400

    accuracy                           0.90      1188
   macro avg       0.90      0.90      0.90      1188
weighted avg       0.90      0.90      0.90      1188



In [None]:
clf.score(x_test,y_test)

0.8981481481481481

In [None]:
# preprocess tha text
import spacy
nlp=spacy.load("en_core_web_sm")
def preprocess(text):
  doc=nlp(text)
  filtered_tokns=[token.lemma_ for token in doc if not token.is_stop or not token.is_punct]
  return " ".join(filtered_tokns)

In [None]:
df["processed txt"]=df.Comment.apply(preprocess)
df.head(5)

Unnamed: 0,Comment,Emotion,emotion_num,processed txt
0,i seriously hate one subject to death but now ...,fear,1,I seriously hate one subject to death but now ...
1,im so full of life i feel appalled,anger,2,I m so full of life I feel appalled
2,i sit here to write i start to dig out my feel...,fear,1,I sit here to write I start to dig out my feel...
3,ive been really angry with r and i feel like a...,joy,0,I ve be really angry with r and I feel like an...
4,i feel suspicious if there is no one outside l...,fear,1,I feel suspicious if there be no one outside l...


In [None]:
# train test splitt
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(
    df['processed txt'],
    df.emotion_num,
    test_size=0.2,
    stratify=df.emotion_num)

In [None]:
# making the pipeline
clf=Pipeline([
    ("tfidf",TfidfVectorizer()),
    ("rfc",RandomForestClassifier())
])

clf.fit(x_train,y_train)
y_predict=clf.predict(x_test)
print(classification_report(y_test,y_predict))

              precision    recall  f1-score   support

           0       0.85      0.93      0.89       400
           1       0.92      0.89      0.90       388
           2       0.93      0.87      0.90       400

    accuracy                           0.90      1188
   macro avg       0.90      0.90      0.90      1188
weighted avg       0.90      0.90      0.90      1188



In [None]:
clf.score(x_test,y_test)

0.8956228956228957