In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

In [21]:
df = pd.read_csv('final_dataset.csv')
df.head()

Unnamed: 0,label,text
0,anger,grab minute post feel greedy wrong
1,anger,feel grouchy
2,anger,think easiest time year feel dissatisfy
3,anger,feel irritate reject without anyone anything s...
4,anger,already feel like fuck though usually eat morning


In [22]:
from sklearn.model_selection import train_test_split

X = df['text']  
y = df['label']  

X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2,  
    random_state=42 
)

print(f"Train: {len(X_train)}")
print(f"Test: {len(X_test)}")

Train: 8331
Test: 2083


In [23]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=5000) 

X_train_tfidf = vectorizer.fit_transform(X_train)

X_test_tfidf = vectorizer.transform(X_test)

In [24]:
from sklearn.svm import SVC

svm_linear = SVC(kernel='linear')

svm_linear.fit(X_train_tfidf, y_train)

print("Train Done")

Train Done


In [25]:
svm_rbf = SVC(kernel='rbf')

svm_rbf.fit(X_train_tfidf, y_train)

print("Train Done")

Train Done


In [26]:
from sklearn.metrics import accuracy_score

y_pred_linear = svm_linear.predict(X_test_tfidf)
accuracy_linear = accuracy_score(y_test, y_pred_linear)
print(f"accuracy linear: {accuracy_linear * 100:.2f}%")

y_pred_rbf = svm_rbf.predict(X_test_tfidf)
accuracy_rbf = accuracy_score(y_test, y_pred_rbf)
print(f"accuracy rbf: {accuracy_rbf * 100:.2f}%")

accuracy linear: 85.41%
accuracy rbf: 84.64%


In [27]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

y_pred = svm_linear.predict(X_test_tfidf)

print("Done\n")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Done

Accuracy: 0.8540566490638503

Classification Report:
               precision    recall  f1-score   support

       anger       0.86      0.87      0.87       444
        fear       0.88      0.88      0.88       387
         joy       0.84      0.86      0.85       515
        love       0.75      0.78      0.77       236
     sadness       0.89      0.84      0.87       501

    accuracy                           0.85      2083
   macro avg       0.85      0.85      0.85      2083
weighted avg       0.86      0.85      0.85      2083



In [28]:
import pickle

best_model = svm_linear

with open('svm_model.pkl', 'wb') as f:
    pickle.dump(best_model, f)

with open('vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)

test_df = pd.DataFrame({
    'text': X_test,
    'label': y_test
})
test_df.to_csv('test_data.csv', index=False)

print("Done")

Done
