In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

In [2]:
df = pd.read_csv("synthetic_emotion_dataset.csv")

In [3]:
# 2. Preprocess text and labels
X = df['text'].astype(str)
y = df['label'].astype(str)

# 3. Encode emotion labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# 4. Train/Test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

In [4]:
vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [5]:
# 6. Train KNN model
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train_vec, y_train)

In [6]:
y_pred = knn.predict(X_test_vec)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))


Accuracy: 0.9965

Classification Report:
               precision    recall  f1-score   support

       angry       1.00      0.98      0.99       333
        calm       1.00      1.00      1.00       333
       happy       1.00      1.00      1.00       333
     neutral       1.00      1.00      1.00       334
         sad       1.00      1.00      1.00       334
    surprise       0.98      1.00      0.99       333

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000



In [7]:
def predict_emotion(text_list):
    text_list = [str(t) for t in text_list]
    vec = vectorizer.transform(text_list)
    pred_encoded = knn.predict(vec)
    return le.inverse_transform(pred_encoded)


In [12]:
sample_inputs = [
    "I’m so happy",
    "Everything feels heavy and sad.",
    "Why would they do that to me!?",
    "i am so happy",
    "Breathing deeply and feeling still.",
    "Whoa, didn’t expect that at all!"
]

sample_outputs = predict_emotion(sample_inputs)
for inp, out in zip(sample_inputs, sample_outputs):
    print(f"'{inp}' → {out}")

'I’m so happy' → neutral
'Everything feels heavy and sad.' → sad
'Why would they do that to me!?' → angry
'i am so happy' → neutral
'Breathing deeply and feeling still.' → calm
'Whoa, didn’t expect that at all!' → surprise


In [11]:
import pickle
import os

# Save KNN model
with open("knn_emotion_model.pkl", "wb") as f:
    pickle.dump(knn, f)

# Save TF-IDF vectorizer
with open("tfidf_vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)

# Save LabelEncoder
with open("label_encoder.pkl", "wb") as f:
    pickle.dump(le, f)

print("✅ All components saved as .pkl files.\n")

# List and print all saved .pkl files
print("📦 Saved model files:")
for file in ["knn_emotion_model.pkl", "tfidf_vectorizer.pkl", "label_encoder.pkl"]:
    if os.path.exists(file):
        print("•", file)
    else:
        print("❌ Not found:", file)


✅ All components saved as .pkl files.

📦 Saved model files:
• knn_emotion_model.pkl
• tfidf_vectorizer.pkl
• label_encoder.pkl


In [13]:
import pickle

# 1. Load the saved model components
with open("knn_emotion_model.pkl", "rb") as f:
    knn_model = pickle.load(f)

with open("tfidf_vectorizer.pkl", "rb") as f:
    vectorizer = pickle.load(f)

with open("label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

# 2. Define a prediction function
def predict_emotion(text_list):
    text_list = [str(t) for t in text_list]
    vec = vectorizer.transform(text_list)
    pred = knn_model.predict(vec)
    return label_encoder.inverse_transform(pred)

# 3. Example usage
sample_texts = [
    "I am so happy today!",
    "Why did you do that!",
    "Feeling down and alone.",
    "Just chilling out.",
    "Didn't expect this at all!"
]

predicted_emotions = predict_emotion(sample_texts)

# 4. Print results
for text, emotion in zip(sample_texts, predicted_emotions):
    print(f"'{text}' → {emotion}")


'I am so happy today!' → neutral
'Why did you do that!' → angry
'Feeling down and alone.' → sad
'Just chilling out.' → neutral
'Didn't expect this at all!' → surprise
