In [3]:
import pandas as pd

In [4]:
df = pd.read_csv('training.csv')

In [6]:
df.head()

Unnamed: 0,text,label
0,i didnt feel humiliated,0
1,i can go from feeling so hopeless to so damned...,0
2,im grabbing a minute to post i feel greedy wrong,3
3,i am ever feeling nostalgic about the fireplac...,2
4,i am feeling grouchy,3


In [7]:
# Define the target labels and their new names
label_mapping = {
    0: 'sad',     # sadness
    1: 'happy',   # joy
    3: 'angry'    # anger
}


filtered_df = df[df['label'].isin(label_mapping.keys())].copy()
filtered_df['emotion'] = filtered_df['label'].map(label_mapping)
filtered_df.drop(columns=['label'], inplace=True)
filtered_df.to_csv("filtered_emotion_dataset.csv", index=False)

print("Filtered dataset saved as 'filtered_emotion_dataset.csv'")


Filtered dataset saved as 'filtered_emotion_dataset.csv'


In [13]:
import neattext.functions as nfx
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

In [21]:
def clean_text(text):
    text = nfx.remove_userhandles(text)       # Remove @mentions
    text = nfx.remove_hashtags(text)          # Remove hashtags
    text = nfx.remove_urls(text)              # Remove URLs
    text = nfx.remove_special_characters(text)
    text = nfx.remove_punctuations(text)
    text = nfx.remove_stopwords(text)
    text = text.lower().strip()
    return text

# Apply cleaning
filtered_df['text'] = filtered_df['text'].astype(str).apply(clean_text)

In [24]:
filtered_df.head()

Unnamed: 0,text,emotion
0,didnt feel humiliated,sad
1,feeling hopeless damned hopeful cares awake,sad
2,im grabbing minute post feel greedy wrong,angry
4,feeling grouchy,angry
5,ive feeling little burdened lately wasnt sure,sad


In [25]:
X = filtered_df['text']
y = filtered_df['emotion']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [28]:
## Vectorize Using TFIDF 

In [27]:
vectorizer = TfidfVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

In [30]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

In [32]:
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [34]:
## Naive Bayes

In [33]:
model = MultinomialNB()
model.fit(X_train_vec, y_train)
y_pred = model.predict(X_test_vec)

In [35]:
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

       angry       0.99      0.37      0.53       432
       happy       0.85      0.97      0.90      1073
         sad       0.82      0.93      0.87       933

    accuracy                           0.85      2438
   macro avg       0.89      0.75      0.77      2438
weighted avg       0.86      0.85      0.83      2438

Accuracy: 0.8461853978671042


In [36]:
import joblib

joblib.dump(model, 'emotion_model.pkl')

joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')

['tfidf_vectorizer.pkl']