In [1]:

import pandas as pd
import string
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import joblib


nltk.download('stopwords')
from nltk.corpus import stopwords

df = pd.read_csv('emotions.csv')
print(df.head())


stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()
    text = ''.join([c for c in text if c not in string.punctuation])
    words = text.split()
    words = [w for w in words if w not in stop_words]
    return ' '.join(words)

df['clean_text'] = df['Text'].apply(clean_text)


tfidf = TfidfVectorizer()
X = tfidf.fit_transform(df['clean_text'])
y = df['Emotion']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)


y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))


joblib.dump(model, 'emotion_model.pkl')
joblib.dump(tfidf, 'tfidf_vectorizer.pkl')


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


                                                Text  Emotion
0  i consulted my aunt a doctor partially because...     fear
1           i feel so strange with english right now     fear
2                     i feel ecstatic and privileged      joy
3  i cant find it in my heart to feel the least b...  sadness
4  i waited in line longer than usual i didnt fee...    anger
Accuracy: 0.86375


['tfidf_vectorizer.pkl']