# **Emotion Detection :A text classification Model**

In [82]:
import numpy as np
import joblib
import pandas as pd
import Pipeline
data = pd.read_csv("./archive/tweet_emotions.csv")
data

Unnamed: 0,tweet_id,sentiment,content
0,1956967341,empty,@tiffanylue i know i was listenin to bad habi...
1,1956967666,sadness,Layin n bed with a headache ughhhh...waitin o...
2,1956967696,sadness,Funeral ceremony...gloomy friday...
3,1956967789,enthusiasm,wants to hang out with friends SOON!
4,1956968416,neutral,@dannycastillo We want to trade with someone w...
...,...,...,...
39995,1753918954,neutral,@JohnLloydTaylor
39996,1753919001,love,Happy Mothers Day All my love
39997,1753919005,love,Happy Mother's Day to all the mommies out ther...
39998,1753919043,happiness,@niariley WASSUP BEAUTIFUL!!! FOLLOW ME!! PEE...


In [83]:
data.duplicated().sum()

np.int64(0)

In [84]:
data.isnull().sum()

tweet_id     0
sentiment    0
content      0
dtype: int64

In [85]:
x=data["content"]
y=data["sentiment"]

In [86]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
# vect = CountVectorizer()
vect = TfidfVectorizer()
x_vec = vect.fit_transform(x)


In [87]:
x_vec

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 475946 stored elements and shape (40000, 48212)>

In [88]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(x_vec, y, random_state=42)

## Naive Bayes Multinomial Model

In [89]:
from sklearn.naive_bayes import MultinomialNB
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train,y_train)

y_pred = nb_classifier.predict(X_test)

accuracy = accuracy_score(y_test,y_pred)

print(f'Naive Bayes Accuracy : {accuracy}')

Naive Bayes Accuracy : 0.282


In [90]:
y_pred  

array(['neutral', 'worry', 'worry', ..., 'worry', 'worry', 'worry'],
      shape=(10000,), dtype='<U10')

In [91]:
y_test

32823      neutral
16298        empty
28505         love
6689       neutral
26893      sadness
           ...    
29415    happiness
11359      neutral
575          worry
17398      sadness
4189       neutral
Name: sentiment, Length: 10000, dtype: object

## Using Logistic Regression

In [92]:
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression(max_iter=1000,random_state=42)
log_reg.fit(X_train, y_train)


In [93]:
log_reg_pred = log_reg.predict(X_test)

In [94]:

log_reg_accuracy = accuracy_score(y_test, log_reg_pred)
print(f'Logistic regression Accuracy : {log_reg_accuracy}')

Logistic regression Accuracy : 0.3513


In [96]:
from sklearn.ensemble import RandomForestClassifier

In [97]:
rf_clf = RandomForestClassifier(n_estimators=400, max_depth=30, random_state=42)

# Train the model
rf_clf.fit(X_train, y_train)

# Evaluate the model
y_pred = rf_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f}")

# Save the model locally
joblib.dump(pipeline, "model_rf.pkl")
print("Model saved successfully!")


Model Accuracy: 0.2892
Model saved successfully!


## Creating pipeline for easy integration with Streamlit

In [102]:
X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=42)

In [103]:
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(max_features=3000)),
    ('clf', LogisticRegression(max_iter=1000,random_state=42))
])

In [104]:
pipeline.fit(X_train, y_train)

In [106]:
y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f}")

Model Accuracy: 0.3509


In [107]:
joblib.dump(pipeline, "model.pkl")

['model.pkl']