In [1]:
!pip install kaggle
!pip install -U scikit-learn
!pip install emoji

import os
from google.colab import files
import pandas as pd
import string
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

files.upload()
os.environ['KAGGLE_CONFIG_DIR'] = "/root/.kaggle"
!kaggle datasets download -d bhavikjikadara/emotions-dataset
!unzip emotions-dataset.zip

df = pd.read_csv('emotions.csv')

def clean_text(text):
    text = text.lower()
    text = ''.join([char for char in text if char not in string.punctuation])
    return text

df['text'] = df['text'].apply(clean_text)
df = df[df['label'] != 2]

X = df['text']
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

classifier = LogisticRegression(max_iter=1000)
classifier.fit(X_train_vec, y_train)

y_pred = classifier.predict(X_test_vec)
print("Accuracy:", accuracy_score(y_test, y_pred))

emotion_to_emoji = {
    0: '😢',
    1: '😊',
    3: '😡',
    4: '😱',
    5: '😲'
}

def classify_and_recommend_emoji(text):
    cleaned_text = clean_text(text)
    text_vec = vectorizer.transform([cleaned_text])
    emotion = classifier.predict(text_vec)[0]
    return emotion_to_emoji.get(emotion, "No emoji found")


Collecting emoji
  Downloading emoji-2.14.0-py3-none-any.whl.metadata (5.7 kB)
Downloading emoji-2.14.0-py3-none-any.whl (586 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m586.9/586.9 kB[0m [31m33.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: emoji
Successfully installed emoji-2.14.0


Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/bhavikjikadara/emotions-dataset
License(s): Attribution 4.0 International (CC BY 4.0)
Downloading emotions-dataset.zip to /content
 90% 13.0M/14.5M [00:01<00:00, 14.4MB/s]
100% 14.5M/14.5M [00:01<00:00, 8.65MB/s]
Archive:  emotions-dataset.zip
  inflating: emotions.csv            
Accuracy: 0.9196478790336294


In [2]:

test_sentences = [
    ("That’s amazing! I’m so happy for you! You must be feeling on top of the world right now.", "Joy"),
    ("I’m really sorry to hear that. It’s tough when things don’t go the way you expect.", "Sadness"),
    ("I can understand why you're so angry. It’s so frustrating when people don’t listen, especially after you’ve explained everything.", "Anger"),
    ("That sounds terrifying! I’d be scared too if I were watching that.", "Fear"),
    ("Wow, this is truly amazing!, I didn't expect this at all!", "Surprise")
]

for sentence, expected in test_sentences:
    emoji = classify_and_recommend_emoji(sentence)
    print(f"Text: {sentence}")
    print(f"Predicted Emotion: {expected}")
    print(f"Recommended Emoji: {emoji}\n")


Text: That’s amazing! I’m so happy for you! You must be feeling on top of the world right now.
Predicted Emotion: Joy
Recommended Emoji: 😊

Text: I’m really sorry to hear that. It’s tough when things don’t go the way you expect.
Predicted Emotion: Sadness
Recommended Emoji: 😢

Text: I can understand why you're so angry. It’s so frustrating when people don’t listen, especially after you’ve explained everything.
Predicted Emotion: Anger
Recommended Emoji: 😡

Text: That sounds terrifying! I’d be scared too if I were watching that.
Predicted Emotion: Fear
Recommended Emoji: 😱

Text: Wow, this is truly amazing!, I didn't expect this at all!
Predicted Emotion: Surprise
Recommended Emoji: 😲



In [3]:
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Step 7: Evaluate the model with multiple metrics
y_pred = classifier.predict(X_test_vec)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Calculate precision, recall, and F1 score
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Classification report
class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)


Accuracy: 0.9196478790336294
Precision: 0.9197259862234132
Recall: 0.9196478790336294
F1 Score: 0.9196808072058378
Confusion Matrix:
[[22598   363   587   486    66]
 [  367 27369   144   155   247]
 [  606   192 10438   367    22]
 [  442   186   402  7832   622]
 [   59   244    18   568  2071]]
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.94      0.94     24100
           1       0.97      0.97      0.97     28282
           3       0.90      0.90      0.90     11625
           4       0.83      0.83      0.83      9484
           5       0.68      0.70      0.69      2960

    accuracy                           0.92     76451
   macro avg       0.86      0.87      0.86     76451
weighted avg       0.92      0.92      0.92     76451



In [4]:
import joblib

# Save the trained classifier model
joblib.dump(classifier, 'emotion_classifier.pkl')

# Save the vectorizer as well, for later use
joblib.dump(vectorizer, 'vectorizer.pkl')


['vectorizer.pkl']

In [5]:
# Load the saved model and vectorizer
classifier = joblib.load('emotion_classifier.pkl')
vectorizer = joblib.load('vectorizer.pkl')

# Use the loaded model and vectorizer to make predictions
sample_text = "I'm so excited for today!"
recommended_emoji = classify_and_recommend_emoji(sample_text)
print(f"Recommended Emoji: {recommended_emoji}")

Recommended Emoji: 😊
