In [1]:
import pandas as pd
import numpy as np
import warnings
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold, cross_val_score
import pickle

warnings.simplefilter(action = 'ignore', category = FutureWarning)
k_folds = KFold(n_splits = 10)

In [2]:
dataset = pd.read_csv("IMDB_dataset_untouched.csv")
reviews = [w for w in dataset["review"]]

In [3]:
pickle_in = open("tfidf_vectors_untouched.pickle", "rb")
word_vectors_TFIDF = pickle.load(pickle_in)

In [4]:
x = word_vectors_TFIDF

In [5]:
y = pd.get_dummies(dataset["sentiment"])
y = y.iloc[:,1].values

In [6]:
rfc = RandomForestClassifier(n_estimators = 100)

In [7]:
# 10-fold cross-validation - accuracy
scores = cross_val_score(rfc, x, y, cv = k_folds, scoring = 'accuracy')
print("CV accuracy scores: ", scores)
print("Average CV accuracy score: ", scores.mean())

CV accuracy scores:  [0.8586 0.865  0.8592 0.8646 0.8476 0.8572 0.86   0.8618 0.8568 0.8616]
Average CV accuracy score:  0.85924


In [8]:
# 10-fold cross-validation - precision
scores = cross_val_score(rfc, x, y, cv = k_folds, scoring = 'precision_weighted')
print("CV precision scores: ", scores)
print("Average CV precision score: ", scores.mean())

CV precision scores:  [0.85319912 0.86243599 0.85881334 0.86997958 0.85032092 0.85689793
 0.86604284 0.86243431 0.85687681 0.86301987]
Average CV precision score:  0.8600020702797279


In [9]:
# 10-fold cross-validation - recall
scores = cross_val_score(rfc, x, y, cv = k_folds, scoring = 'recall_weighted')
print("CV recall scores: ", scores)
print("Average CV recall score: ", scores.mean())

CV recall scores:  [0.8564 0.8632 0.8594 0.862  0.848  0.857  0.8606 0.858  0.858  0.8616]
Average CV recall score:  0.85842


In [10]:
# 10-fold cross-validation - f1-score
scores = cross_val_score(rfc, x, y, cv = k_folds, scoring = 'f1_weighted')
print("CV f1 scores: ", scores)
print("Average CV f1 score: ", scores.mean())

CV f1 scores:  [0.86019633 0.86301968 0.859466   0.86578788 0.85362784 0.86100926
 0.85940079 0.86199541 0.8530072  0.86079218]
Average CV f1 score:  0.8598302571977159
