In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
import string
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import nltk
import joblib

In [4]:
df = pd.read_csv('emotion_dataset.csv')

# Display the first few rows of the dataset
print(df.head())

   Emotion                                               Text  \
0  neutral                                             Why ?    
1      joy    Sage Act upgrade on my to do list for tommorow.   
2  sadness  ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...   
3      joy   Such an eye ! The true hazel eye-and so brill...   
4      joy  @Iluvmiasantos ugh babe.. hugggzzz for u .!  b...   

                                          Clean_Text  
0                                                NaN  
1                     Sage Act upgrade list tommorow  
2  WAY HOMEGIRL BABY FUNERAL MAN HATE FUNERALS SH...  
3  eye  true hazel eyeand brilliant  Regular feat...  
4    ugh babe hugggzzz u  babe naamazed nga ako e...  


In [6]:
# Preprocess Data

stop_words = set(stopwords.words('english'))

def preprocess_text(Text):
    # Tokenize the text
    tokens = word_tokenize(Text)
    # Convert to lower case
    tokens = [word.lower() for word in tokens]
    # Remove punctuation and stop words
    tokens = [word for word in tokens if word.isalnum() and word not in stop_words]
    return ' '.join(tokens)

df['processed_text'] = df['Text'].apply(preprocess_text)

In [9]:
# Convert Labels to Numbers
label_encoder = LabelEncoder()
df['emotion_label'] = label_encoder.fit_transform(df['Emotion'])

# Step 5: Split Features and Labels
X = df['processed_text']
y = df['emotion_label']

In [10]:
# Feature Extraction
vectorizer = TfidfVectorizer()
vectorizer = TfidfVectorizer()
X_vectorized = vectorizer.fit_transform(X)

In [12]:
# Split Dataset
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)


In [13]:
# Train Model
model = MultinomialNB()
model.fit(X_train, y_train)


In [14]:
# Save Model and Vectorizer
joblib.dump(model, 'emotion_classifier_model.pkl')
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')

['label_encoder.pkl']

In [15]:
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.5068256933467452
Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.25      0.40       836
           1       0.00      0.00      0.00       202
           2       0.88      0.42      0.57      1104
           3       0.42      0.97      0.58      2214
           4       0.95      0.08      0.14       481
           5       0.64      0.40      0.49      1327
           6       0.00      0.00      0.00        23
           7       0.80      0.17      0.28       772

    accuracy                           0.51      6959
   macro avg       0.57      0.29      0.31      6959
weighted avg       0.66      0.51      0.46      6959



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
# Predict Emotions
def predict_emotion(text):
    # Load the saved model and vectorizer
    loaded_model = joblib.load('emotion_classifier_model.pkl')
    loaded_vectorizer = joblib.load('tfidf_vectorizer.pkl')
    loaded_label_encoder = joblib.load('label_encoder.pkl')
    
    processed_text = preprocess_text(text)
    vectorized_text = loaded_vectorizer.transform([processed_text])
    prediction = loaded_model.predict(vectorized_text)
    emotion = loaded_label_encoder.inverse_transform(prediction)
    return emotion[0]

# Test the prediction function
print(predict_emotion("I am so happy today!"))
print(predict_emotion("This is a terrible day."))

joy
sadness
