In [None]:
from catboost import CatBoostClassifier
from dataloader import load_and_split_data
from utils import find_optimal_hyperparameters, load_model_from_json, fit_and_evaluate

### Load and split the dataset

In [7]:
X_train, X_test, y_train, y_test = load_and_split_data('preprocessed_dataset.csv',
                                                       target_column='increase_stock',
                                                       class_zero='low_bike_demand',
                                                       test_size=0.2,
                                                       convert_cat_target=True,
                                                       random_state=0)

### Load, fit and evaluate the initial model

In [8]:
cat_features = ['hour_of_day', 'day_of_week', 'month', 'weekday', 'summertime', 'snowdepth', 'day', 'rain']
model = CatBoostClassifier(cat_features=cat_features, random_seed=0)

results = fit_and_evaluate(model, X_train, y_train, X_test, y_test)

print(f"Accuracy: {results['accuracy']:.4f}")
print(f"Precision: {results['precision']:.4f}")
print(f"Recall: {results['recall']:.4f}")
print(f"F1: {results['f1']:.4f}")
print(f"Confusion matrix: \n{results['confusion_matrix']}")

Accuracy: 0.8719
Precision: 0.6889
Recall: 0.5345
F1: 0.6019
Confusion matrix: 
[[248  14]
 [ 27  31]]


### Find optimal hyperparameters

In [None]:
param_grid = {
    'iterations': [500, 1000, 1500],
    'learning_rate': [0.01, 0.05, 0.1],
    'depth': [6, 8, 10]
}

best_params = find_optimal_hyperparameters(CatBoostClassifier,
                                           param_grid,
                                           X_train,
                                           y_train,
                                           cv=5,
                                           scoring='accuracy',
                                           save_dir='output/best_params',
                                           save_file='catboost_best_params.json',
                                           extra_args={'cat_features': cat_features, 'random_seed': 0},
                                           verbose=False)

  _data = np.array(data, dtype=dtype, copy=copy,


Best parameters found:  {'depth': 6, 'iterations': 1000, 'learning_rate': 0.01}
Saving best parameters to 'test/catboost_best_params.json'


In [11]:
### Use optimal hyperparameters to train and evaluate

In [None]:
opt_model = load_model_from_json(CatBoostClassifier, 'output/best_params/catboost_best_params.json')

opt_results = fit_and_evaluate(opt_model, X_train, y_train, X_test, y_test)

print(f"Accuracy: {opt_results['accuracy']:.4f}")
print(f"Precision: {opt_results['precision']:.4f}")
print(f"Recall: {opt_results['recall']:.4f}")
print(f"F1: {opt_results['f1']:.4f}")
print(f"Confusion matrix: \n{opt_results['confusion_matrix']}")

Accuracy: 0.8781
Precision: 0.7021
Recall: 0.5690
F1: 0.6286
Confusion matrix: 
[[248  14]
 [ 25  33]]
