In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import (accuracy_score, classification_report,
                             confusion_matrix, precision_score, recall_score,
                             f1_score)
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils import resample
import pickle

In [2]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
news_df_balanced=pd.read_csv('/content/drive/MyDrive/nlp/data/news_df_balanced.csv')

In [4]:
X = news_df_balanced['token']
y = news_df_balanced['sentiment_class']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
with open('/content/drive/MyDrive/nlp/src/dumps/cv.pkl', 'rb') as f:
    cv = pickle.load(f)

In [7]:
X_train_cv = cv.fit_transform(X_train)
X_test_cv = cv.transform(X_test)

In [8]:
rf = RandomForestClassifier()
rf.fit(X_train_cv, y_train)
y_pred_rf = rf.predict(X_test_cv)

In [14]:
with open('/content/drive/MyDrive/nlp/src/dumps/rf.pkl', 'wb') as f:
    pickle.dump(rf, f)

In [9]:
accuracy = accuracy_score(y_test, y_pred_rf)
print("Accuracy:", accuracy)

Accuracy: 0.6991212163481657


In [10]:
precision = precision_score(y_test, y_pred_rf, average='micro')
recall = recall_score(y_test, y_pred_rf, average='micro')
print("Precision:", precision)
print("Recall:", recall)

Precision: 0.6991212163481657
Recall: 0.6991212163481657


In [11]:
f1_score = f1_score(y_test, y_pred_rf, average='micro')
print("F1 Score:", f1_score)

F1 Score: 0.6991212163481657


In [12]:
labels = ['positive', 'negative', 'neutral']
cm = confusion_matrix(y_true=y_test, y_pred=y_pred_rf)
cm_df = pd.DataFrame(cm, index=labels, columns=labels)
print("Confusion Matrix:")
print(cm_df)

Confusion Matrix:
          positive  negative  neutral
positive      1796       287      367
negative       611      1224      431
neutral        239       222     1992


In [13]:
print(classification_report(y_test, y_pred_rf, target_names=labels))

              precision    recall  f1-score   support

    positive       0.68      0.73      0.70      2450
    negative       0.71      0.54      0.61      2266
     neutral       0.71      0.81      0.76      2453

    accuracy                           0.70      7169
   macro avg       0.70      0.70      0.69      7169
weighted avg       0.70      0.70      0.69      7169

