In [2]:


# 📦 Step 1: Import Libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import joblib
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# 📥 Step 2: Load Datasets
train_df = pd.read_csv('train.csv')
val_df = pd.read_csv('valid.csv')
test_df = pd.read_csv('test.csv')

# 🛠️ Step 3: Prepare Data
X_train = train_df['text']
y_train = train_df['emotion']

X_val = val_df['text']
y_val = val_df['emotion']

X_test = test_df['text']
y_test = test_df['emotion']

# ✨ Step 4: Vectorize Text Data
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_val_tfidf = vectorizer.transform(X_val)
X_test_tfidf = vectorizer.transform(X_test)

# 🧠 Step 5: Train the Model
model = LogisticRegression(max_iter=1000, class_weight='balanced')
model.fit(X_train_tfidf, y_train)

# 📊 Step 6: Validate the Model
y_val_pred = model.predict(X_val_tfidf)
print("\nValidation Performance:")
print(f"Accuracy: {accuracy_score(y_val, y_val_pred)}")
print(classification_report(y_val, y_val_pred))

# Confusion Matrix for Validation Set
val_cm = confusion_matrix(y_val, y_val_pred, labels=model.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=val_cm, display_labels=model.classes_)
disp.plot(cmap='Blues')
plt.title('Confusion Matrix - Validation Set')
plt.show()

# 🧪 Step 7: Test the Model
y_test_pred = model.predict(X_test_tfidf)
print("\nTest Performance:")
print(f"Accuracy: {accuracy_score(y_test, y_test_pred)}")
print(classification_report(y_test, y_test_pred))

# 💾 Step 8: Save the Model and Vectorizer
joblib.dump(model, 'emotion_model.pkl')
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')
print("\nModel and vectorizer have been saved successfully!")

     

ModuleNotFoundError: No module named 'pandas'