In [3]:
# Install dependencies
!pip install scikit-learn pandas joblib

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import joblib

# Load dataset (replace with your file path / Kaggle dataset)
df = pd.read_csv("text_emotion_dataset.csv")
# Dataset should have columns: 'text', 'emotion'

# Split data
X = df['Text']
y = df['Emotion']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Vectorize text
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train model
model = LogisticRegression(max_iter=300)
model.fit(X_train_tfidf, y_train)

# Evaluate
y_pred = model.predict(X_test_tfidf)
print(classification_report(y_test, y_pred))

# Save model + vectorizer
joblib.dump(model, "text_emotion_model.pkl")
joblib.dump(vectorizer, "vectorizer.pkl")
print("✅ Model and vectorizer saved!")

              precision    recall  f1-score   support

       anger       0.93      0.80      0.86       617
        fear       0.86      0.79      0.83       531
       happy       0.84      0.96      0.90      1381
        love       0.90      0.65      0.75       318
     sadness       0.88      0.94      0.91      1277
    surprise       0.88      0.54      0.67       168

    accuracy                           0.87      4292
   macro avg       0.88      0.78      0.82      4292
weighted avg       0.87      0.87      0.87      4292

✅ Model and vectorizer saved!
