In [1]:
import pandas as pd
import numpy as np
import warnings
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold, cross_val_score
import pickle

warnings.simplefilter(action = 'ignore', category = FutureWarning)
k_folds = KFold(n_splits = 10)

In [2]:
dataset = pd.read_csv("News_dataset_preprocessed.csv")
dataset.drop(dataset.tail(39898).index, inplace = True)
reviews = [w for w in dataset["text"]]

In [3]:
pickle_in = open("tfidf_vectors.pickle", "rb")
word_vectors_TFIDF = pickle.load(pickle_in)

In [4]:
x = word_vectors_TFIDF[:-39898]

In [5]:
y = dataset["true"]

In [6]:
rfc = RandomForestClassifier(n_estimators = 100)

In [7]:
# 10-fold cross-validation - accuracy
scores = cross_val_score(rfc, x, y, cv = k_folds, scoring = 'accuracy')
print("CV accuracy scores: ", scores)
print("Average CV accuracy score: ", scores.mean())

CV accuracy scores:  [0.984 0.99  0.994 0.996 0.998 0.996 0.988 0.996 0.994 0.986]
Average CV accuracy score:  0.9922000000000001


In [8]:
# 10-fold cross-validation - precision
scores = cross_val_score(rfc, x, y, cv = k_folds, scoring = 'precision_weighted')
print("CV precision scores: ", scores)
print("Average CV precision score: ", scores.mean())

CV precision scores:  [0.98415285 0.99603463 0.99800855 0.99400743 1.         0.9900053
 0.992128   0.99603175 0.992      0.98619124]
Average CV precision score:  0.9928559739393121


In [9]:
# 10-fold cross-validation - recall
scores = cross_val_score(rfc, x, y, cv = k_folds, scoring = 'recall_weighted')
print("CV recall scores: ", scores)
print("Average CV recall score: ", scores.mean())

CV recall scores:  [0.988 0.992 0.998 0.992 0.998 0.992 0.988 0.996 0.994 0.984]
Average CV recall score:  0.9922000000000001


In [10]:
# 10-fold cross-validation - f1-score
scores = cross_val_score(rfc, x, y, cv = k_folds, scoring = 'f1_weighted')
print("CV f1 scores: ", scores)
print("Average CV f1 score: ", scores.mean())

CV f1 scores:  [0.99200646 0.992      0.99600103 0.99599962 0.99399969 0.99199781
 0.99400036 0.99399978 0.992      0.98399539]
Average CV f1 score:  0.9926000143689577
