In [1]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from utils import find_optimal_hyperparameters, fit_and_evaluate, load_model_from_json
from dataloader import load_and_split_data

In [2]:
X_train, X_test, y_train, y_test = load_and_split_data("data/preprocessed_dataset.csv",
                                                       "increase_stock",
                                                       "low_bike_demand",
                                                       test_size=0.2,
                                                       random_state=0)

In [3]:
model = RandomForestClassifier(random_state=0)
_ = fit_and_evaluate(model,
                    X_train,
                    y_train,
                    X_test,
                    y_test,
                    verbose=True)

Evaluating RandomForestClassifier
Accuracy: 0.8656
Precision: 0.6531
Recall: 0.5517
F1: 0.5981
ROC AUC: 0.9144
Confusion Matrix: 
[[245  17]
 [ 26  32]]



In [8]:
param_space = {'bootstrap': [True, False],
               'max_depth': [20, 40, 80, 100, None],
               'max_features': [None, 'sqrt'],
               'min_samples_leaf': [1, 2, 4],
               'min_samples_split': [2, 5, 10],
               'n_estimators': [50, 100, 200]
               }

best_params = find_optimal_hyperparameters(RandomForestClassifier,
                                           param_space, 
                                           X_train, 
                                           y_train,
                                           cv=5, 
                                           scoring="recall", 
                                           n_jobs=-1, 
                                           save_dir="output/best_params", 
                                           save_file="rf_best.json", 
                                           extra_args={"random_state": 0})

Best parameters found:  {'bootstrap': True, 'max_depth': 20, 'max_features': None, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 100}
Saving best parameters to 'output/best_params/rf_best.json'


In [9]:

model = load_model_from_json(RandomForestClassifier, "output/best_params/rf_best.json")

_= fit_and_evaluate(model, 
                    X_train, 
                    y_train, 
                    X_test, 
                    y_test, 
                    verbose=True)

Evaluating RandomForestClassifier
Accuracy: 0.8750
Precision: 0.6731
Recall: 0.6034
F1: 0.6364
ROC AUC: 0.9051
Confusion Matrix: 
[[245  17]
 [ 23  35]]

