In [None]:
import numpy as np
from scipy.sparse import load_npz
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

#### Load data

In [None]:
prepared_data = load_npz("../data/prepared_data.npz")
labels = np.load("../data/labels.npy")

In [None]:
prepared_test_data = load_npz("../data/prepared_test_data.npz")
test_labels = np.load("../data/test_labels.npy")

#### Train model

In [None]:
forest_clf = RandomForestClassifier(random_state=42)
forest_clf.fit(prepared_data, labels)

##### Results based on training set with cross validation

In [None]:
cross_val_score(forest_clf, prepared_data, labels, cv=3, scoring="accuracy")

In [None]:
labels_predictions = forest_clf.predict(prepared_data)

In [None]:
conf_matrix = confusion_matrix(labels, labels_predictions)
plt.matshow(conf_matrix, cmap=plt.cm.gray)
plt.show()

###### Results based on test set with cross validation

In [None]:
cross_val_score(forest_clf, prepared_test_data, test_labels, cv=3, scoring="accuracy")

In [None]:
test_labels_predictions = cross_val_predict(forest_clf, prepared_test_data, test_labels, cv=3)

In [None]:
conf_matrix = confusion_matrix(test_labels, test_labels_predictions)
plt.matshow(conf_matrix, cmap=plt.cm.gray)
plt.show()