<a href="https://colab.research.google.com/github/CHINTALAVIVEK1/natural-language-processing/blob/main/nlp_usecase_emotion_detection_for_texts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# Emotion Detection using Logistic Regression

import pandas as pd
import numpy as np
import re
import string
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 1️⃣ Load Dataset
# You can download an emotion dataset from Kaggle: "Emotion Dataset for NLP"
# Assuming the file is in a format like: text;emotion
df = pd.read_csv("/content/train.txt.zip", sep=';', names=['text', 'emotion'])

print(df.head())

# 2️⃣ Text Cleaning
def clean_text(text):
    # Ensure the input is a string before applying lower()
    text = str(text).lower()
    text = re.sub(r'http\S+|www\S+|https\S+', '', text)
    text = re.sub(r'\@w+|\#','', text)
    text = re.sub(r'[^A-Za-z\s]', '', text)
    return text.strip()

df['clean_text'] = df['text'].apply(clean_text)

# 3️⃣ Split Data
X = df['clean_text']
y = df['emotion']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4️⃣ TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# 5️⃣ Train Model
model = LogisticRegression(max_iter=200)
model.fit(X_train_tfidf, y_train)

# 6️⃣ Evaluate
y_pred = model.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# 7️⃣ Test with Custom Sentences
test_sentences = [
    "I am so happy today!",
    "I hate when things go wrong.",
    "I'm feeling really sad right now.",
    "That was an amazing experience!",
    "I'm scared of losing everything."
]

test_vec = vectorizer.transform(test_sentences)
preds = model.predict(test_vec)

for text, emotion in zip(test_sentences, preds):
    print(f"Text: {text}  --->  Emotion: {emotion}")

                                                text  emotion
0                            i didnt feel humiliated  sadness
1  i can go from feeling so hopeless to so damned...  sadness
2   im grabbing a minute to post i feel greedy wrong    anger
3  i am ever feeling nostalgic about the fireplac...     love
4                               i am feeling grouchy    anger
Accuracy: 0.85

Classification Report:
               precision    recall  f1-score   support

       anger       0.90      0.79      0.84       427
        fear       0.85      0.73      0.78       397
         joy       0.80      0.96      0.88      1021
        love       0.89      0.60      0.71       296
     sadness       0.88      0.93      0.91       946
    surprise       0.88      0.44      0.59       113

    accuracy                           0.85      3200
   macro avg       0.87      0.74      0.78      3200
weighted avg       0.85      0.85      0.84      3200


Confusion Matrix:
 [[338  12  38   0  39   0