In [1]:
import pickle

import numpy as np
import plotly.express as px
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Loading the data

In [2]:
with open("data/randomized/train_data", "rb") as f:
    train_data = pickle.load(f)

train_features = train_data[0]
train_targets  = train_data[1]

In [3]:
with open("data/randomized/val_data", "rb") as f:
    val_data = pickle.load(f)

val_features = val_data[0]
val_targets  = val_data[1]

In [4]:
with open("data/randomized/test_data", "rb") as f:
    test_data = pickle.load(f)

test_features = test_data[0]
test_targets  = test_data[1]

# Training the model

In [5]:
def reshape_to_train(unshaped: list) -> np.array:
    shaped = np.array([np.array(sample).reshape(-1, 1) for sample in unshaped])
    shaped = shaped.reshape(shaped.shape[0], shaped.shape[1])

    return shaped

In [6]:
rf_model = RandomForestClassifier()

shaped_train_features = reshape_to_train(unshaped=train_features)

rf_model.fit(X=shaped_train_features, y=np.array(train_targets))

RandomForestClassifier()

# Testing the model

In [7]:
shaped_test_features = reshape_to_train(unshaped=test_features)

predictions = rf_model.predict(shaped_test_features)

# Metrics

In [8]:
acc = accuracy_score(y_true=test_targets, y_pred=predictions)
accuracy = round(acc, 4)

print(f"The accuracy is {round(accuracy*100, 4)}%")

print(classification_report(y_true=test_targets, y_pred=predictions))

conf_matrix = confusion_matrix(y_true=test_targets, y_pred=predictions)
px.imshow(conf_matrix, color_continuous_scale="RdBu_r")

The accuracy is 99.56%
              precision    recall  f1-score   support

           0       1.00      0.99      1.00       128
           1       0.99      1.00      1.00       136
           2       0.99      1.00      1.00       137
           3       0.99      0.98      0.99       112
           4       1.00      1.00      1.00       150
           5       0.99      0.98      0.99       112
           6       1.00      1.00      1.00       166
           7       1.00      1.00      1.00       133
           8       0.99      1.00      1.00       143
           9       0.99      0.99      0.99       133

    accuracy                           1.00      1350
   macro avg       1.00      0.99      1.00      1350
weighted avg       1.00      1.00      1.00      1350



In [10]:
%%timeit
rf_model.predict(shaped_test_features)

27.3 ms ± 5.78 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


# Saving the model

In [9]:
with open("models/decision_tree", "wb") as f:
    pickle.dump(rf_model, f)