In [2]:
pip install nltk

Collecting nltk
  Using cached nltk-3.9.2-py3-none-any.whl.metadata (3.2 kB)
Collecting regex>=2021.8.3 (from nltk)
  Using cached regex-2025.11.3-cp313-cp313-win_amd64.whl.metadata (41 kB)
Collecting tqdm (from nltk)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Using cached nltk-3.9.2-py3-none-any.whl (1.5 MB)
Using cached regex-2025.11.3-cp313-cp313-win_amd64.whl (277 kB)
Using cached tqdm-4.67.1-py3-none-any.whl (78 kB)
Installing collected packages: tqdm, regex, nltk
Successfully installed nltk-3.9.2 regex-2025.11.3 tqdm-4.67.1
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
# -----------------------
# üìå IMPORT LIBRARIES
# -----------------------

import os
import pandas as pd
import numpy as np
import re
import nltk
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib

nltk.download("punkt")
nltk.download("stopwords")
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download("wordnet")

# -----------------------
# üìå LOAD DATA
# -----------------------
df = pd.read_csv("combined_emotion.csv")
print(df.head())
print(df["emotion"].value_counts())

# -----------------------
# üìå TEXT CLEANING FUNCTION
# -----------------------
stop_words = set(stopwords.words("english"))
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    words = nltk.word_tokenize(text)
    words = [lemmatizer.lemmatize(w) for w in words if w not in stop_words]
    return " ".join(words)

df["clean_sentence"] = df["sentence"].apply(clean_text)

# -----------------------
# üìå SPLIT DATA
# -----------------------
X = df["clean_sentence"]
y = df["emotion"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -----------------------
# üìå TF-IDF VECTORIZATION
# -----------------------
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# -----------------------
# üìå TRAIN SVM MODEL
# -----------------------
model = LinearSVC()
model.fit(X_train_vec, y_train)

# -----------------------
# üìå EVALUATION
# -----------------------
predictions = model.predict(X_test_vec)

print("\nüîç Accuracy:", accuracy_score(y_test, predictions))
print("\nüìå Classification Report:\n", classification_report(y_test, predictions))
print("\nüìå Confusion Matrix:\n", confusion_matrix(y_test, predictions))

# -----------------------
# üìå SAVE MODEL & TOKENIZER
# -----------------------
os.makedirs("model", exist_ok=True)  # FIXED üõ†Ô∏è

joblib.dump(model, "model/emotion_svm_model.pkl")
joblib.dump(vectorizer, "model/tfidf.pkl")

print("\n‚úÖ Model successfully saved!")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Priyanshu\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Priyanshu\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Priyanshu\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


                                            sentence emotion
0      i just feel really helpless and heavy hearted    fear
1  ive enjoyed being able to slouch about relax a...     sad
2  i gave up my internship with the dmrg and am f...    fear
3                         i dont know i feel so lost     sad
4  i am a kindergarten teacher and i am thoroughl...    fear
emotion
joy        143067
sad        121187
anger       59317
fear        49649
love        34554
suprise     14972
Name: count, dtype: int64

üîç Accuracy: 0.890053222945003

üìå Classification Report:
               precision    recall  f1-score   support

       anger       0.90      0.90      0.90     11810
        fear       0.84      0.84      0.84      9952
         joy       0.91      0.92      0.92     28781
        love       0.76      0.75      0.75      6929
         sad       0.94      0.94      0.94     24036
     suprise       0.72      0.69      0.70      3042

    accuracy                           0.89     