In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import (accuracy_score, classification_report,
                             confusion_matrix, precision_score, recall_score,
                             f1_score)
from sklearn.naive_bayes import MultinomialNB
import pickle

In [2]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
news_df_balanced=pd.read_csv('/content/drive/MyDrive/nlp/data/news_df_balanced.csv')

In [4]:
X = news_df_balanced['token']
y = news_df_balanced['sentiment_class']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
with open('/content/drive/MyDrive/nlp/src/dumps/cv.pkl', 'rb') as f:
    cv = pickle.load(f)

In [7]:
X_train_cv = cv.fit_transform(X_train)
X_test_cv = cv.transform(X_test)

In [8]:
mnb = MultinomialNB()
mnb.fit(X_train_cv, y_train)
y_pred_mnb = mnb.predict(X_test_cv)

In [14]:
with open('/content/drive/MyDrive/nlp/src/dumps/mnb.pkl', 'wb') as f:
    pickle.dump(mnb, f)

In [9]:
accuracy = accuracy_score(y_test, y_pred_mnb)
print("Accuracy:", accuracy)

Accuracy: 0.6543450969451806


In [10]:
precision = precision_score(y_test, y_pred_mnb, average='micro')
recall = recall_score(y_test, y_pred_mnb, average='micro')
print("Precision:", precision)
print("Recall:", recall)

Precision: 0.6543450969451806
Recall: 0.6543450969451806


In [11]:
f1_score = f1_score(y_test, y_pred_mnb, average='micro')
print("F1 Score:", f1_score)

F1 Score: 0.6543450969451806


In [12]:
labels = ['positive', 'negative', 'neutral']
cm = confusion_matrix(y_true=y_test, y_pred=y_pred_mnb)
cm_df = pd.DataFrame(cm, index=labels, columns=labels)
print("Confusion Matrix:")
print(cm_df)

Confusion Matrix:
          positive  negative  neutral
positive      1523       480      447
negative       512      1144      610
neutral        188       241     2024


In [13]:
print(classification_report(y_test, y_pred_mnb, target_names=labels))

              precision    recall  f1-score   support

    positive       0.69      0.62      0.65      2450
    negative       0.61      0.50      0.55      2266
     neutral       0.66      0.83      0.73      2453

    accuracy                           0.65      7169
   macro avg       0.65      0.65      0.65      7169
weighted avg       0.65      0.65      0.65      7169

