# Emotion analysis on emotion dataset

*The dataset is taken from: https://www.kaggle.com/datasets/praveengovi/emotions-dataset-for-nlp*

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
from preprocessing import filter_and_extract_lemma
from preprocessing import enumerate_encode, one_hot_encode
from tensorflow.data import Dataset

In [3]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report

In [4]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Input, Attention
from keras.layers import Dropout, Embedding, BatchNormalization
from keras.optimizers import Adam

### Preprocess the data

In [5]:
train = pd.read_csv('data/emotion_dataset/train.txt', sep=';', names=['Text', 'Emotion'])
validation = pd.read_csv('data/emotion_dataset/val.txt', sep=';', names=['Text', 'Emotion'])
test = pd.read_csv('data/emotion_dataset/test.txt', sep=';', names=['Text', 'Emotion'])

In [6]:
train['Text'] = train['Text'].apply(filter_and_extract_lemma, True)
validation['Text'] = validation['Text'].apply(filter_and_extract_lemma, True)
test['Text'] = test['Text'].apply(filter_and_extract_lemma, True)

In [7]:
tokenizer = TfidfVectorizer()
tokenizer.fit_transform(train['Text'])

<16000x11830 sparse matrix of type '<class 'numpy.float64'>'
	with 123796 stored elements in Compressed Sparse Row format>

In [42]:
x_train = tokenizer.transform(train['Text'])
x_val = tokenizer.transform(validation['Text'])
x_test = tokenizer.transform(test['Text'])

In [43]:
y_train = enumerate_encode(train['Emotion'])
y_val = enumerate_encode(validation['Emotion'])
y_test = enumerate_encode(test['Emotion'])

## Traditional Machine Learning methods

### K-Nearest Neighbours Classifier model

In [10]:
knn = KNeighborsClassifier()
knn.fit(x_train, y_train)
y_predict = knn.predict(x_test)
report = classification_report(y_test, y_predict)
print(report)

              precision    recall  f1-score   support

           0       0.73      0.75      0.74       275
           1       0.78      0.74      0.76       224
           2       0.78      0.86      0.82       695
           3       0.70      0.54      0.61       159
           4       0.83      0.82      0.82       581
           5       0.69      0.41      0.51        66

    accuracy                           0.78      2000
   macro avg       0.75      0.69      0.71      2000
weighted avg       0.78      0.78      0.78      2000



### Random Forest Classifier model

In [11]:
rf = RandomForestClassifier()
rf.fit(x_train, y_train)
y_predict = rf.predict(x_test)
report = classification_report(y_test, y_predict)
print(report)

              precision    recall  f1-score   support

           0       0.89      0.83      0.86       275
           1       0.83      0.86      0.85       224
           2       0.84      0.92      0.88       695
           3       0.77      0.64      0.70       159
           4       0.92      0.89      0.91       581
           5       0.64      0.58      0.61        66

    accuracy                           0.86      2000
   macro avg       0.82      0.79      0.80      2000
weighted avg       0.86      0.86      0.86      2000



### Support Vector Machine Classifier

In [12]:
svc = SVC()
svc.fit(x_train, y_train)
y_predict = svc.predict(x_test)
report = classification_report(y_test, y_predict)
print(report)

              precision    recall  f1-score   support

           0       0.88      0.78      0.83       275
           1       0.88      0.79      0.83       224
           2       0.81      0.95      0.88       695
           3       0.82      0.53      0.64       159
           4       0.88      0.90      0.89       581
           5       0.81      0.52      0.63        66

    accuracy                           0.85      2000
   macro avg       0.85      0.74      0.78      2000
weighted avg       0.85      0.85      0.84      2000

