In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import joblib

# Load the datasets
train_df = pd.read_csv("training_labeled.csv")
val_df = pd.read_csv("validation_labeled.csv")
test_df = pd.read_csv("test_labeled.csv")

# Prepare the data
X_train = train_df['text']
y_train = train_df['label']

X_val = val_df['text']
y_val = val_df['label']

X_test = test_df['text']
y_test = test_df['label']

# Convert text data into TF-IDF features
vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1, 2))
X_train_vec = vectorizer.fit_transform(X_train)
X_val_vec = vectorizer.transform(X_val)
X_test_vec = vectorizer.transform(X_test)

# Train the classifier
model = LogisticRegression(max_iter=1000)
model.fit(X_train_vec, y_train)

# Evaluate on validation set
val_preds = model.predict(X_val_vec)
print("Validation Accuracy:", accuracy_score(y_val, val_preds))
print("Validation Report:\n", classification_report(y_val, val_preds))

# Evaluate on test set
test_preds = model.predict(X_test_vec)
print("Test Accuracy:", accuracy_score(y_test, test_preds))
print("Test Report:\n", classification_report(y_test, test_preds))

# Save the model and vectorizer
joblib.dump(model, "emotion_model.pkl")
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")
print("Model and vectorizer saved successfully.")


Validation Accuracy: 0.853
Validation Report:
               precision    recall  f1-score   support

       anger       0.94      0.79      0.85       275
        fear       0.88      0.73      0.79       212
         joy       0.83      0.95      0.89       704
        love       0.88      0.60      0.71       178
     sadness       0.84      0.94      0.89       550
    surprise       0.88      0.52      0.65        81

    accuracy                           0.85      2000
   macro avg       0.87      0.75      0.80      2000
weighted avg       0.86      0.85      0.85      2000

Test Accuracy: 0.8475
Test Report:
               precision    recall  f1-score   support

       anger       0.89      0.77      0.83       275
        fear       0.86      0.74      0.80       224
         joy       0.82      0.96      0.88       695
        love       0.85      0.56      0.67       159
     sadness       0.87      0.92      0.89       581
    surprise       0.82      0.42      0.56      