In [1]:
import pandas as pd
import re
import joblib
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import classification_report, accuracy_score

In [2]:
df = pd.read_csv("emotions.csv")

def clean_text(text):
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"@\w+", "", text) 
    text = re.sub(r"#\w+", "", text)         
    text = re.sub(r"[^A-Za-z\s]", "", text) 
    return text.lower().strip()

df['text'] = df['text'].astype(str).apply(clean_text)

label_map = {
    0: 'sadness',
    1: 'joy',
    2: 'love',
    3: 'anger',
    4: 'fear',
    5: 'surprise'
}
df['label_name'] = df['label'].map(label_map)

df_sample = df.sample(20000, random_state=42)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    df_sample['text'], df_sample['label_name'], test_size=0.2, random_state=42
)

vectorizer = TfidfVectorizer(max_features=10000, stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [4]:
models = {
    'linear': LinearSVC(),
    'rbf': SVC(kernel='rbf'),
    'poly': SVC(kernel='poly'),
    'sigmoid': SVC(kernel='sigmoid')
}

for name, model in models.items():
    print(f"\n--- Training {name.upper()} SVM ---")
    model.fit(X_train_tfidf, y_train)
    y_pred = model.predict(X_test_tfidf)

    print(classification_report(y_test, y_pred))
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")

    model_filename = f"svm_{name}_model.joblib"
    joblib.dump(model, model_filename)
    print(f"Model saved to {model_filename}")


--- Training LINEAR SVM ---
              precision    recall  f1-score   support

       anger       0.90      0.90      0.90       548
        fear       0.83      0.84      0.83       448
         joy       0.88      0.93      0.91      1359
        love       0.79      0.74      0.77       321
     sadness       0.94      0.92      0.93      1175
    surprise       0.84      0.64      0.73       149

    accuracy                           0.89      4000
   macro avg       0.86      0.83      0.84      4000
weighted avg       0.89      0.89      0.89      4000

Accuracy: 0.8860
Model saved to svm_linear_model.joblib

--- Training RBF SVM ---
              precision    recall  f1-score   support

       anger       0.91      0.84      0.87       548
        fear       0.81      0.79      0.80       448
         joy       0.83      0.96      0.89      1359
        love       0.82      0.60      0.69       321
     sadness       0.92      0.92      0.92      1175
    surprise       0.