In [None]:
from catboost import CatBoostClassifier
from libs.dataloader import load_and_split_data
from libs.utils import find_optimal_hyperparameters, load_model_from_json, fit_and_evaluate

### Load and split the dataset

In [2]:
X_train, X_test, y_train, y_test = load_and_split_data('data/normalized_dataset.csv',
                                                       target_column='increase_stock',
                                                       class_zero='low_bike_demand',
                                                       test_size=0.2,
                                                       random_state=0)

### Load, fit and evaluate the initial model

In [3]:
cat_features = ['hour_of_day', 'day_of_week', 'month', 'weekday', 'summertime', 'snowdepth', 'day', 'rain']
model = CatBoostClassifier(cat_features=cat_features, random_seed=0)

results = fit_and_evaluate(model, 
                           X_train, 
                           y_train, 
                           X_test, 
                           y_test, 
                           verbose=True)

Evaluating CatBoostClassifier
Accuracy: 0.8719
Precision: 0.6889
Recall: 0.5345
F1: 0.6019
ROC AUC: 0.9251
Confusion Matrix: 
[[248  14]
 [ 27  31]]



### Find optimal hyperparameters

In [None]:
param_grid = {
    'iterations': [500, 1000, 1500],
    'learning_rate': [0.01, 0.05, 0.1],
    'depth': [6, 8, 10]
}

best_params = find_optimal_hyperparameters(CatBoostClassifier,
                                           param_grid,
                                           X_train,
                                           y_train,
                                           cv=5,
                                           scoring='accuracy',
                                           save_dir='output/best_params',
                                           save_file='catboost_best_params.json',
                                           extra_args={'cat_features': cat_features, 'random_seed': 0},
                                           verbose_training=False)

  _data = np.array(data, dtype=dtype, copy=copy,


Best parameters found:  {'depth': 6, 'iterations': 1000, 'learning_rate': 0.01}
Saving best parameters to 'test/catboost_best_params.json'


### Use optimal hyperparameters to train and evaluate

In [4]:
opt_model = load_model_from_json(CatBoostClassifier, 'output/best_params/catboost_best_params.json')

opt_results = fit_and_evaluate(opt_model, 
                               X_train, 
                               y_train, 
                               X_test, 
                               y_test, 
                               verbose=True)

Evaluating CatBoostClassifier
Accuracy: 0.8781
Precision: 0.7021
Recall: 0.5690
F1: 0.6286
ROC AUC: 0.9247
Confusion Matrix: 
[[248  14]
 [ 25  33]]

