In [None]:
from xgboost import XGBClassifier
from nb_utils import load_and_split_data, find_optimal_hyperparameters, load_model_from_json, fit_and_evaluate

### Load and split the dataset

In [None]:
cat_features = ['hour_of_day', 'day_of_week', 'month', 'weekday', 'summertime', 'snowdepth', 'day', 'rain', 'increase_stock']

X_train, X_test, y_train, y_test = load_and_split_data('app/data/training_data_preprocessed.csv',
                                                       target_column='increase_stock',
                                                       class_zero='low_bike_demand',
                                                       test_size=0.2,
                                                       cat_features=cat_features,
                                                       random_state=0)

### Load, fit and evaluate the initial model

In [3]:
model = XGBClassifier(random_state=0, enable_categorical=True)

results = fit_and_evaluate(model, 
                           X_train, 
                           y_train, 
                           X_test, 
                           y_test, 
                           verbose=True)

Evaluating XGBClassifier
Accuracy: 0.8656
Precision: 0.6471
Recall: 0.5690
F1: 0.6055
ROC AUC: 0.9118
Confusion Matrix: 
[[244  18]
 [ 25  33]]



### Find optimal hyperparameters

In [None]:
param_grid = {
    'max_depth': [7, 10,12],
    'learning_rate': [0.1, 0.2, 0.3, 0.4],
    'n_estimators': [10, 20, 30],
    'min_child_weight':[0.1, 1, 3],
    'gamma':[0, 0.1, 0.3],
    'reg_alpha':[ 0.1, 1, 2],
    'reg_lambda': [0, 0.1]
}

best_params = find_optimal_hyperparameters(XGBClassifier,
                                           param_grid,
                                           X_train,
                                           y_train,
                                           cv=5,
                                           scoring='accuracy',
                                           save_dir='app/model/best_params',
                                           save_file='xgb_best_params.json',
                                           extra_args={'random_state': 0, 'enable_categorical': True},
                                           verbose_training=False)

Best parameters found:  {'gamma': 0.1, 'learning_rate': 0.4, 'max_depth': 10, 'min_child_weight': 1, 'n_estimators': 20, 'reg_alpha': 1, 'reg_lambda': 0}
Saving best parameters to 'test/xgb_best_params.json'


### Use optimal hyperparameters to train and evaluate

In [None]:
opt_model = load_model_from_json(XGBClassifier, 'app/model/best_params/xgb_best_params.json')

opt_results = fit_and_evaluate(opt_model, 
                               X_train, 
                               y_train, 
                               X_test, 
                               y_test, 
                               verbose=True)

Evaluating XGBClassifier
Accuracy: 0.8750
Precision: 0.6667
Recall: 0.6207
F1: 0.6429
ROC AUC: 0.9132
Confusion Matrix: 
[[244  18]
 [ 22  36]]

