In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import (accuracy_score, classification_report,
                             confusion_matrix, precision_score, recall_score,
                             f1_score)
from sklearn.svm import SVC
import pickle

In [3]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
news_df_balanced=pd.read_csv('/content/drive/MyDrive/nlp/data/news_df_balanced.csv')

In [5]:
X = news_df_balanced['token']
y = news_df_balanced['sentiment_class']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
with open('/content/drive/MyDrive/nlp/src/dumps/cv.pkl', 'rb') as f:
    cv = pickle.load(f)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [8]:
X_train_cv = cv.fit_transform(X_train)
X_test_cv = cv.transform(X_test)

In [None]:
svc = SVC(kernel='linear')
svc.fit(X_train_cv, y_train)
y_pred_svc = svc.predict(X_test_cv)

In [None]:
with open('/content/drive/MyDrive/nlp/src/dumps/svc.pkl', 'wb') as f:
    pickle.dump(svc, f)

In [None]:
accuracy = accuracy_score(y_test, y_pred_svc)
print("Accuracy:", accuracy)

In [None]:
precision = precision_score(y_test, y_pred_svc, average='micro')
recall = recall_score(y_test, y_pred_svc, average='micro')
print("Precision:", precision)
print("Recall:", recall)

Precision: 0.7010740689077974
Recall: 0.7010740689077974


In [None]:
f1_score = f1_score(y_test, y_pred_svc, average='micro')
print("F1 Score:", f1_score)

F1 Score: 0.7010740689077974


In [None]:
labels = ['positive', 'negative', 'neutral']
cm = confusion_matrix(y_true=y_test, y_pred=y_pred_svc)
cm_df = pd.DataFrame(cm, index=labels, columns=labels)
print("Confusion Matrix:")
print(cm_df)

Confusion Matrix:
          positive  negative  neutral
positive      1804       275      371
negative       606      1239      421
neutral        244       226     1983


In [None]:
print(classification_report(y_test, y_pred_svc, target_names=labels))

              precision    recall  f1-score   support

    positive       0.68      0.74      0.71      2450
    negative       0.71      0.55      0.62      2266
     neutral       0.71      0.81      0.76      2453

    accuracy                           0.70      7169
   macro avg       0.70      0.70      0.69      7169
weighted avg       0.70      0.70      0.70      7169

