# RandomForest : Text Based Emotion Classification

### 1. Imports

In [11]:
import sys
sys.path.append("..")
from scripts.file import save_file
from scripts.cleaner import clean

In [12]:
import pandas as pd 
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report,confusion_matrix
from collections import Counter

### 2. Load and Train Data

In [13]:
df=pd.read_csv("../data/Emotion_classify_Data.csv")

In [14]:
emotion_map={
    'fear': 0,
    'anger':1,
    'joy':2
}

In [16]:
df['Emotion_encoded']=df['Emotion'].map(emotion_map)

In [18]:
X=df['Comment']
y=df['Emotion_encoded']
label=df['Emotion'].unique()

In [19]:
X = X.apply(clean)

In [20]:
vectorizer=TfidfVectorizer()

In [21]:
vectorized_X=vectorizer.fit_transform(X)

In [22]:
X_train,X_test,y_train,y_test=train_test_split(vectorized_X, y, test_size=0.2, random_state=42, stratify=y)

### 3. Model Development

In [24]:
model=RandomForestClassifier(class_weight='balanced')

In [25]:
model.fit(X_train,y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


### 4. Prediction and Evaluation

In [26]:
pred=model.predict(X_test)

In [27]:
print(classification_report(y_test,pred))

              precision    recall  f1-score   support

           0       0.93      0.90      0.91       388
           1       0.93      0.88      0.90       400
           2       0.86      0.94      0.90       400

    accuracy                           0.90      1188
   macro avg       0.90      0.90      0.90      1188
weighted avg       0.90      0.90      0.90      1188



In [28]:
print(confusion_matrix(y_test,pred))

[[348  16  24]
 [ 14 350  36]
 [ 14  12 374]]


In [None]:
print(df['Emotion'].value_counts())

### 5. Trial with Test Data

In [34]:
texts = [
    "Oh no! Why did you do  that",
    "That is so funny",
    "What am I gonna do now! I am scared"
]

In [35]:
text = [clean(t) for t in texts]

In [36]:
vectorized_text = vectorizer.transform(text)

In [37]:
pred = model.predict(vectorized_text)

In [38]:
for p,t in zip(pred, texts) :
    print("Sentence : ", t)
    print("Prediction : ", label[p])
    print()

Sentence :  Oh no! Why did you do  that
Prediction :  anger

Sentence :  That is so funny
Prediction :  joy

Sentence :  What am I gonna do now! I am scared
Prediction :  fear



### 6. Save model and vectorizer as pkl file

In [39]:
save_file('model.pkl',model,'wb')
save_file('vectorizer.pkl',vectorizer)