###Imports

In [4]:
!pip install neattext #The "neattext" library is not a built-in library in Python, so you need to install it in colab first before importing its functions.

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting neattext
  Downloading neattext-0.1.3-py3-none-any.whl (114 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.7/114.7 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: neattext
Successfully installed neattext-0.1.3


In [5]:
# data EDA pkgs
import pandas as pd
import numpy as np
# data viz pkg
import seaborn as sns
# text cleaning pkg
import neattext.functions as nfx

In [13]:
# ML pkgs
# Estimators
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB

# Transformers
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [61]:
df=pd.read_csv("emotion_data.csv")

In [62]:
df.head()

Unnamed: 0,Emotion,Text
0,neutral,Why ?
1,joy,Sage Act upgrade on my to do list for tommorow.
2,sadness,ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...
3,joy,Such an eye ! The true hazel eye-and so brill...
4,joy,@Iluvmiasantos ugh babe.. hugggzzz for u .! b...


In [63]:
df.describe()

Unnamed: 0,Emotion,Text
count,34792,34792
unique,8,31110
top,joy,Yes .
freq,11045,76


In [64]:
df.Emotion.value_counts()

joy         11045
sadness      6722
fear         5410
anger        4297
surprise     4062
neutral      2254
disgust       856
shame         146
Name: Emotion, dtype: int64

## Data cleaning

In [65]:
# User handles
df['Clean_Text'] = df['Text'].apply(nfx.remove_userhandles)


In [72]:
# Remove hashtags
df["Clean_Text"] = df["Clean_Text"].apply(nfx.remove_hashtags)

# Remove stop words
df["Clean_Text"] = df["Clean_Text"].apply(nfx.remove_stopwords)

# Remove punctuations
df["Clean_Text"] = df["Clean_Text"].apply(nfx.remove_punctuations)

# Remove numbers
df["Clean_Text"] = df["Clean_Text"].apply(nfx.remove_numbers)



In [73]:
df.head()

Unnamed: 0,Emotion,Text,Clean_Text
0,neutral,Why ?,
1,joy,Sage Act upgrade on my to do list for tommorow.,Sage Act upgrade list tommorow
2,sadness,ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...,WAY HOMEGIRL BABY FUNERAL MAN HATE FUNERALS SH...
3,joy,Such an eye ! The true hazel eye-and so brill...,eye true hazel eyeand brilliant Regular featur...
4,joy,@Iluvmiasantos ugh babe.. hugggzzz for u .! b...,ugh babe hugggzzz u babe naamazed nga ako e ba...


In [68]:
X = df['Clean_Text'] 
y = df['Emotion'] #target
#  Split Data
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)

In [69]:
# Build Pipeline model using Logistic Regession
from sklearn.pipeline import Pipeline
emo_model = Pipeline(steps=[('cv',CountVectorizer()),('lr',LogisticRegression(max_iter=300))]) # default max iterations= 100


In [70]:
# Train and Fit Data
emo_model.fit(x_train,y_train)

In [71]:
emo_model.score(x_test,y_test)

0.6169764322667177

In [103]:
test_string="I feel lonely."

In [104]:
emo_model.predict([test_string]) 

array(['sadness'], dtype=object)

In [105]:
emo_model.classes_

array(['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'shame',
       'surprise'], dtype=object)

In [106]:
import joblib
pipeline_file = open("emotion_analyzer.pkl","wb")
joblib.dump(emo_model,pipeline_file)
pipeline_file.close()