In [None]:
from sklearn.neighbors import KNeighborsClassifier
from dataloader import load_and_split_data
from utils import find_optimal_hyperparameters, load_model_from_json, fit_and_evaluate

### Load and split the dataset

In [2]:
X_train, X_test, y_train, y_test = load_and_split_data("data/normalized_dataset.csv", 
                                                       target_column='increase_stock', 
                                                       class_zero='low_bike_demand', 
                                                       test_size=0.2,
                                                       random_state=0)

### Load, fit and evaluate the initial model

In [None]:
# Using arbitraily chosen n_neighbors=5
params = {"n_neighbors": 5}
knn = KNeighborsClassifier(**params)
results = fit_and_evaluate(knn, 
                           X_train, 
                           y_train, 
                           X_test, 
                           y_test, 
                           verbose=True)

Evaluating KNeighborsClassifier
Accuracy: 0.8781
Precision: 0.6557
Recall: 0.6897
F1: 0.6723
ROC AUC: 0.8819
Confusion Matrix: 
[[241  21]
 [ 18  40]]



### Find optimal hyperparameters

In [None]:
param_grid = {"n_neighbors": range(1, 31) ,
              "weights": ["uniform", "distance"],
              "metric": ["euclidean", "manhattan", "chebyshev", "minkowski"],
              "algorithm": ["ball_tree", "kd_tree", "brute"]
              }


best_params = find_optimal_hyperparameters(KNeighborsClassifier,
                                           param_grid,
                                           X_train,
                                           y_train,
                                           cv=5,
                                           scoring='accuracy',
                                           save_dir="output/best_params",
                                           save_file='knn_best_params.json')

Best parameters found:  {'algorithm': 'ball_tree', 'metric': 'manhattan', 'n_neighbors': 23, 'weights': 'uniform'}
Saving best parameters to 'output/best_params/knn_best_params.json'


### Use optimal hyperparameters to train and evaluate

In [None]:
opt_model = load_model_from_json(KNeighborsClassifier, 'output/best_params/knn_best_params.json')

results = fit_and_evaluate(opt_model, 
                           X_train, 
                           y_train, 
                           X_test, 
                           y_test, 
                           verbose=True)

Evaluating KNeighborsClassifier
Accuracy: 0.8656
Precision: 0.6531
Recall: 0.5517
F1: 0.5981
ROC AUC: 0.9053
Confusion Matrix: 
[[245  17]
 [ 26  32]]

