# Emotions Text Classifier project

### Project Prerequisites

In [2]:
import pandas as pd
import numpy as np
import spacy
import joblib
import seaborn as sns
from sklearn.feature_extraction.text import CountVectorizer
from matplotlib import pyplot as plt
from sklearn import svm
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from imblearn.over_sampling import SMOTE
from transformers import RobertaTokenizerFast, TFRobertaForSequenceClassification, pipeline

  from .autonotebook import tqdm as notebook_tqdm
2023-02-15 16:01:33.080728: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-15 16:01:33.378472: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-02-15 16:01:33.384537: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-02-15 16:01:33.384564: I tensorflow/stream_executor/cuda/cu

In [3]:
plt.style.use("seaborn-darkgrid")

  plt.style.use("seaborn-darkgrid")


### Preparing the Data

In [4]:
processed_text_df = pd.read_csv("processed_text.csv")
processed_emotions_df = pd.read_csv("processed_emotions.csv")

In [5]:
DeepnoteChart(processed_emotions_df, """{"layer":[{"layer":[{"mark":{"clip":true,"type":"bar","tooltip":true},"encoding":{"x":{"axis":{"title":"Emotions"},"sort":null,"type":"nominal","field":"emotion","scale":{"type":"linear"}},"y":{"axis":{"title":"Number of Values"},"sort":null,"type":"quantitative","scale":{"type":"linear"},"aggregate":"count"},"color":{"sort":null,"type":"quantitative","scale":{"scheme":"purples"},"aggregate":"count"}}},{"mark":{"dx":0,"dy":-1,"fill":"black","type":"text","align":"center","baseline":"bottom"},"encoding":{"x":{"axis":{"title":"Emotions"},"sort":null,"type":"nominal","field":"emotion","scale":{"type":"linear"}},"y":{"axis":{"title":"Number of Values"},"sort":null,"type":"quantitative","scale":{"type":"linear"},"aggregate":"count"},"text":{"sort":null,"type":"quantitative","aggregate":"count"},"color":{"sort":null,"type":"quantitative","scale":{"scheme":"purples"},"aggregate":"count"}}}]}],"title":"Amount of Values per emotion before handling class imbalance","config":{"legend":{}},"$schema":"https://vega.github.io/schema/vega-lite/v5.json","encoding":{}}""")

<__main__.DeepnoteChart at 0x7fe77f195e50>

#### Checking for NaN's

In [6]:
print(f"TEXT NAN VALUES: \n{processed_text_df.isnull().sum()}")
print("-----------------------")
print(f"EMOTIONS NAN VALUES: \n{processed_emotions_df.isnull().sum()}")

TEXT NAN VALUES: 
processed_content    0
dtype: int64
-----------------------
EMOTIONS NAN VALUES: 
emotion    0
dtype: int64


### Handling class imbalance

In [7]:
processed_emotions_df.emotion.shape

(40000,)

In [8]:
processed_text_df.processed_content.shape

(40000,)

In [9]:
vectorizer = CountVectorizer()
y = processed_emotions_df.emotion
X = vectorizer.fit_transform(processed_text_df.processed_content)

In [10]:
smote = SMOTE(sampling_strategy="all")
X_sm,y_sm = smote.fit_resample(X,y)

In [11]:
smote_y = pd.DataFrame(y_sm)
smote_x = pd.DataFrame(X_sm)

In [12]:
print(f"TEXT NAN VALUES: \n{smote_x.isnull().sum()}")
print("-----------------------")
print(f"EMOTIONS NAN VALUES: \n{smote_y.isnull().sum()}")

TEXT NAN VALUES: 
0    0
dtype: int64
-----------------------
EMOTIONS NAN VALUES: 
emotion    0
dtype: int64


In [13]:
DeepnoteChart(smote_y, """{"layer":[{"layer":[{"mark":{"clip":true,"type":"bar","tooltip":true},"encoding":{"x":{"axis":{"title":"Emotion"},"sort":null,"type":"nominal","field":"emotion","scale":{"type":"linear"}},"y":{"axis":{"title":"Number of Values"},"sort":null,"type":"quantitative","scale":{"type":"linear"},"aggregate":"count"},"color":{"sort":null,"type":"quantitative","scale":{"scheme":"purples"},"aggregate":"count"}}},{"mark":{"dx":0,"dy":-1,"fill":"black","type":"text","align":"center","baseline":"bottom"},"encoding":{"x":{"axis":{"title":"Emotion"},"sort":null,"type":"nominal","field":"emotion","scale":{"type":"linear"}},"y":{"axis":{"title":"Number of Values"},"sort":null,"type":"quantitative","scale":{"type":"linear"},"aggregate":"count"},"text":{"sort":null,"type":"quantitative","aggregate":"count"},"color":{"sort":null,"type":"quantitative","scale":{"scheme":"purples"},"aggregate":"count"}}}]}],"title":"Amount of Values per emotion after handling class imbalance","config":{"legend":{}},"$schema":"https://vega.github.io/schema/vega-lite/v5.json","encoding":{}}""")

<__main__.DeepnoteChart at 0x7fe6cab4c910>

### Model Validation

#### Multinomial Model Pipeline

In [15]:
clf_multinomial_nb = MultinomialNB()
clf_multinomial_nb.fit(X_sm,y_sm)

In [16]:
joblib.dump(clf_multinomial_nb,"multinomial-model.joblib")

['multinomial-model.joblib']

#### SVM Model Pipeline

In [None]:
clf_svm = svm.SVC()
clf_svm.fit(X_sm,y_sm)

In [None]:
joblib.dump(clf_svm,"svm-model.joblib")

['svm-model.joblib']

#### KNN Model Pipeline

In [None]:
clf_knn = KNeighborsClassifier()
clf_knn.fit(X_sm,y_sm)

In [None]:
joblib.dump(clf_knn,"knn-model.joblib")

['knn-model.joblib']

#### GradientBoosting Model Pipeline

In [27]:
clf_gradient = GradientBoostingClassifier()
clf_gradient.fit(X_sm,y_sm)

In [None]:
joblib.dump(clf_gradient,"gradient-model.joblib")

['gradient-model.joblib']

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=94f647e5-3a0b-4004-b525-ea5a35e99fe1' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>