# LightGBM Hyperparameter Sweep 20210922
Integrating some enhancements introduced in the XGBoost version, and implementing the frequent-serialization approach.

# Setup

In [1]:
# two manual flags (ex-config)
colab = False
gpu_available = False

In [2]:
# basic imports
from pathlib import Path
import os
import math
from datetime import datetime
import random
import multiprocessing
import pickle

In [3]:
%matplotlib inline
%config Completer.use_jedi = False
os.environ['WANDB_NOTEBOOK_NAME'] = f"sweep_lightgbm_{datetime.now().strftime('%Y%m%d')}.ipynb"

In [4]:
# handle Google Colab-specific library installation/updating
if colab:
    # much of the below inspired by or cribbed from the May 2021 Kaggle Tabular Playground winner, at 
    # https://colab.research.google.com/gist/academicsuspect/0aac7bd6e506f5f70295bfc9a3dc2250/tabular-may-baseline.ipynb?authuser=1#scrollTo=LJoVKJb5wN0L
    
    # Kaggle API for downloading the datasets
    !pip install --upgrade -q kaggle

    # weights and biases
    !pip install -qqqU wandb
    
    # Optuna for parameter search
    !pip install -q optuna

    # !pip install --upgrade xgboost

    # upgrade sklearn
    !pip install --upgrade scikit-learn

    # !pip install category_encoders
    # !pip install catboost
    !pip install --upgrade -q lightgbm

    # lighgbm gpu compatible
    # !git clone --recursive https://github.com/Microsoft/LightGBM
    # ! cd LightGBM && rm -rf build && mkdir build && cd build && cmake -DUSE_GPU=1 ../../LightGBM && make -j4 && cd ../python-package && python3 setup.py install --precompile --gpu;
    
    # # this part is from https://github.com/rapidsai/gputreeshap/issues/24
    # !pip install cmake --upgrade
    # # !pip install sklearn --upgrade
    # !git clone --recursive https://github.com/dmlc/xgboost
    # %cd /content/xgboost
    # !mkdir build
    # %cd build
    # !cmake .. -DUSE_CUDA=ON
    # !make -j4
    # %cd /content/xgboost/python-package
    # !python setup.py install --use-cuda --use-nccl
    # !/opt/bin/nvidia-smi
    # !pip install shap
    

Now, non-stdlib imports

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

# general ML tooling
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
from sklearn.metrics import log_loss, roc_auc_score
import wandb
from optuna.integration.wandb import WeightsAndBiasesCallback
# from wandb.xgboost import wandb_callback
# from wandb.lightgbm import wandb_callback
# from sklearn.impute import KNNImputer, StandardImputer
# import timm

import seaborn as sns

# from catboost import CatBoostClassifier
# from xgboost import XGBClassifier
# from lightgbm import LGBMClassifier
from sklearn.neighbors import KNeighborsClassifier
# from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MaxAbsScaler, StandardScaler, MinMaxScaler, RobustScaler, PolynomialFeatures
# from sklearn.feature_selection import SelectKBest, f_regression
from joblib import dump, load
# feature engineering tools
# from sklearn.feature_selection import mutual_info_regression
# import featuretools as ft


from optuna.samplers import TPESampler
import optuna
# import catboost
from sklearn.utils import resample
import sklearn.metrics

Now, datapath setup

In [6]:
# # This is the code for reading the train.csv and converting it to a .feather file
# df = pd.read_csv(datapath/'train.csv', index_col='id', low_memory=False)
# df.index.name = None
# df.to_feather(path='./dataset_df.feather')

In [7]:
if colab:
    # mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')
    
    # handling datapath
    datapath = Path('/content/drive/MyDrive/kaggle/tabular_playgrounds/sep2021/')
    
else:
    # if on local machine
    datapath = Path('/media/sf/easystore/kaggle_data/tabular_playgrounds/sep2021/')    
    


In [8]:

# n_trials = int(1000)
SEED = 42

In [9]:
# Function to seed everything
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
seed_everything(SEED)

## Ex-Model Config

In [10]:
# meta-config for preprocessing and cross-validation, but NOT for model parameters
# in the sweep version, this includes both ex-model parameters and defaults for model parameters
exmodel_config = {
    # model config
    "library": 'sklearn',
#     "model": XGBClassifier,
#     "n_estimators": 100, 
#     "max_depth": 3,
#     "learning_rate": 0.1,
#     "test_size": 0.2,
#     "reg_lambda": None, 
    "scaler": "sklearn.preprocessing.StandardScaler()", # TODO: experiment with others (but imputation may be slow)
    "scale_b4_impute": False,
    "imputer": "sklearn.impute.SimpleImputer(strategy='median', add_indicator=True)",
    "knn_imputer_n_neighbors": None, # None if a different imputer is used
#     "feature_selector": SelectKBest,
#     "k_best": 80,
#     "feature_selection_scoring": f_regression,
    'random_state': SEED,
    'optuna': True,
    'optuna_trials': 100,
#     'subsample': 1,
#     'cross_val_strategy': None, # None for holdout, or the relevant sklearn class
#     'kfolds': 1, # if 1, that means just doing holdout
#     'test_size': 0.2,
    # these are XGBoost default (my choice) params 
#     "tree_method": "auto", # set to 'gpu_hist' to try GPU if available
#     "booster": 'gbtree', # dart may be marginally better, but will opt for this quicker approach as a default
#     "n_estimators": 200, 
#     "max_depth": 3,
#     "learning_rate": 0.1,
#     "n_jobs": -1,
#     "verbosity": 1,
#     "subsample": 1,
#     'features_created': False,
#     'feature_creator': None,
}

wandb_kwargs = {
    # wandb config
    'name': f"{os.environ['WANDB_NOTEBOOK_NAME'][:-6]}_{datetime.now().strftime('%H%M%S')}", # just removes the .ipynb extension, leaving the notebook filename's stem
    'project': '202109_Kaggle_tabular_playground',
    'tags': ['sweep', 'knn'],
    'notes': "Trying an Optuna run on sklearn.neighbors.KNeighborsClassifier",
    'config': exmodel_config,
}

## Data Setup

**TODO** Write some conditional logic here to automate it -- possibly as part of a sklearn.*pipeline

In [11]:
X_source = 'X_NaNcounts_SummaryStats_imputed-Median-wIndicators-StandardScaled.feather'
X_train = pd.read_feather(datapath/X_source) 
y_train = load(datapath/'y.joblib')    
# X.index.name = 'id'
# y.index.name = 'id'
X = np.array(X_train)
y = np.array(y_train)

In [12]:
X.shape[0]

957919

In [13]:
X_small = X[:50000]
y_small = y[:50000]

In [18]:
len(np.where(y_small == 0)[0])

25158

In [19]:
X = X_small
y = y_small

In [20]:
del X_train, y_train

In [21]:
exmodel_config['feature_count'] = X.shape[1]
exmodel_config['feature_generator'] = "Summary statistics"
exmodel_config['X_source'] = X_source

# Experiment setup

In [22]:
# wandb_kwargs = {
#     # wandb config:
#     'name': f"{os.environ['WANDB_NOTEBOOK_NAME'][:-6]}_{datetime.now().strftime('%H%M%S')}", # just removes the .ipynb extension, leaving the notebook filename's stem
#     'project': '202109_Kaggle_tabular_playground',
#     'tags': ['sweep'],
#     'notes': "Sweep for CatBoost using Optuna",
#     'config': exmodel_config,
# }

In [23]:
# originally from https://www.kaggle.com/satorushibata/optimize-catboost-hyperparameter-with-optuna-gpu
def objective(trial):
    # split the (original Kaggle training) data into partitions
    # if study.best_trial:
    #     print("Dumping best params, which are:")
    #     print(str(study.best_trial.params))
    #     dump(study.best_trial.params, filename=datapath/'optuna_catboost_best_20210920.joblib')
       
    # else:
    #     print("No best study yet")
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.33, random_state=int(SEED), shuffle=True)
    # create wrappers for the training and validation partitions
    # train_pool = catboost.Pool(X_train, y_train)
    # valid_pool = catboost.Pool(X_valid, y_valid)
    
    # experimental parameters
    params = {
        'n_neighbors': trial.suggest_int('n_neighbors', 2, 7),
        'weights': trial.suggest_categorical('weights', ['uniform', 'distance']),
        'algorithm': trial.suggest_categorical('algorithm', ['ball_tree', 'kd_tree', 'brute']),
        'metric': trial.suggest_categorical('metric', ['euclidean', 'manhattan', 'chebyshev', 'minkowski', 'seuclidean'])
    }

    # instantiate the model, with some parameters locked in, and experimnental ones passed via splat 
    model = KNeighborsClassifier(
        n_jobs=-1,
        **params
    )       

    model.fit(X_train, y_train)
    # generate predictions
    preds = model.predict_proba(X_valid)[:,1]
    # rounds to the nearest integer, and the nearest even in case of _.5s

    # Evaluation
    valid_auc = roc_auc_score(y_valid, preds)
    print('ROC AUC Score of XGBoost =', valid_auc)
    wandb.log({'valid_auc': valid_auc,
              })

    return valid_auc

In [24]:
wandbc = WeightsAndBiasesCallback(wandb_kwargs=wandb_kwargs)

  wandbc = WeightsAndBiasesCallback(wandb_kwargs=wandb_kwargs)
[34m[1mwandb[0m: Currently logged in as: [33mhushifang[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


In [25]:
study = optuna.create_study(direction = "maximize", 
                            sampler = TPESampler(seed=int(SEED)), 
                            study_name='knn_20210922')

# study = load(datapath/f'optuna_lightgbm_study_5trials_20210922.joblib')


[32m[I 2021-09-22 15:49:33,294][0m A new study created in memory with name: knn_20210922[0m


In [26]:

for x in range(1,20):
    study.optimize(objective, n_trials = 5, n_jobs=1, callbacks = [wandbc]) #n_jobs = multiprocessing.cpu_count())
    print(f"{x*5} trials complete")
    dump(study, filename=datapath/f'optuna_knn_study_{x*5}trials_20210922.joblib')
    dump(study.best_trial.params, filename=datapath/f'optuna_knn_study_best-thru-{x*5}trials_20210922.joblib')

[32m[I 2021-09-22 15:50:06,884][0m Trial 0 finished with value: 0.6407968637226262 and parameters: {'n_neighbors': 4, 'weights': 'uniform', 'algorithm': 'ball_tree', 'metric': 'manhattan'}. Best is trial 0 with value: 0.6407968637226262.[0m


ROC AUC Score of XGBoost = 0.6407968637226262


[32m[I 2021-09-22 15:50:18,261][0m Trial 1 finished with value: 0.7657963659854825 and parameters: {'n_neighbors': 7, 'weights': 'uniform', 'algorithm': 'brute', 'metric': 'minkowski'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7657963659854825


[32m[I 2021-09-22 15:50:47,387][0m Trial 2 finished with value: 0.7396590902428812 and parameters: {'n_neighbors': 3, 'weights': 'distance', 'algorithm': 'ball_tree', 'metric': 'chebyshev'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7396590902428812


[32m[I 2021-09-22 15:51:30,784][0m Trial 3 finished with value: 0.7657207687130323 and parameters: {'n_neighbors': 7, 'weights': 'uniform', 'algorithm': 'brute', 'metric': 'seuclidean'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7657207687130323


[32m[I 2021-09-22 15:52:34,836][0m Trial 4 finished with value: 0.7301788543306091 and parameters: {'n_neighbors': 3, 'weights': 'uniform', 'algorithm': 'kd_tree', 'metric': 'euclidean'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7301788543306091
5 trials complete


[32m[I 2021-09-22 15:53:16,239][0m Trial 5 finished with value: 0.6685746443574438 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'manhattan'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.6685746443574438


[32m[I 2021-09-22 15:53:54,580][0m Trial 6 finished with value: 0.6109403858423074 and parameters: {'n_neighbors': 2, 'weights': 'uniform', 'algorithm': 'ball_tree', 'metric': 'manhattan'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.6109403858423074


[32m[I 2021-09-22 15:54:34,513][0m Trial 7 finished with value: 0.703662306557857 and parameters: {'n_neighbors': 2, 'weights': 'uniform', 'algorithm': 'ball_tree', 'metric': 'minkowski'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.703662306557857


[32m[I 2021-09-22 15:55:36,271][0m Trial 8 finished with value: 0.7657963659854825 and parameters: {'n_neighbors': 7, 'weights': 'uniform', 'algorithm': 'kd_tree', 'metric': 'euclidean'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7657963659854825


[32m[I 2021-09-22 15:56:13,725][0m Trial 9 finished with value: 0.718609341962697 and parameters: {'n_neighbors': 2, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.718609341962697
10 trials complete


[32m[I 2021-09-22 15:56:24,800][0m Trial 10 finished with value: 0.7547450439008568 and parameters: {'n_neighbors': 5, 'weights': 'uniform', 'algorithm': 'brute', 'metric': 'minkowski'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7547450439008568


[32m[I 2021-09-22 15:57:28,732][0m Trial 11 finished with value: 0.7615498930757761 and parameters: {'n_neighbors': 6, 'weights': 'uniform', 'algorithm': 'kd_tree', 'metric': 'euclidean'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7615498930757761


[32m[I 2021-09-22 15:58:35,073][0m Trial 12 finished with value: 0.7615498930757761 and parameters: {'n_neighbors': 6, 'weights': 'uniform', 'algorithm': 'kd_tree', 'metric': 'minkowski'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7615498930757761


[32m[I 2021-09-22 15:59:41,022][0m Trial 13 finished with value: 0.7615498930757761 and parameters: {'n_neighbors': 6, 'weights': 'uniform', 'algorithm': 'kd_tree', 'metric': 'euclidean'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7615498930757761


[32m[I 2021-09-22 16:00:25,549][0m Trial 14 finished with value: 0.7657207687130323 and parameters: {'n_neighbors': 7, 'weights': 'uniform', 'algorithm': 'brute', 'metric': 'seuclidean'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7657207687130323
15 trials complete


[32m[I 2021-09-22 16:01:30,235][0m Trial 15 finished with value: 0.7555723528270609 and parameters: {'n_neighbors': 5, 'weights': 'distance', 'algorithm': 'kd_tree', 'metric': 'euclidean'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7555723528270609


[32m[I 2021-09-22 16:01:41,297][0m Trial 16 finished with value: 0.7615498930757761 and parameters: {'n_neighbors': 6, 'weights': 'uniform', 'algorithm': 'brute', 'metric': 'minkowski'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7615498930757761


[32m[I 2021-09-22 16:02:45,209][0m Trial 17 finished with value: 0.7657963659854825 and parameters: {'n_neighbors': 7, 'weights': 'uniform', 'algorithm': 'kd_tree', 'metric': 'euclidean'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7657963659854825


[32m[I 2021-09-22 16:03:49,786][0m Trial 18 finished with value: 0.7555723528270609 and parameters: {'n_neighbors': 5, 'weights': 'distance', 'algorithm': 'kd_tree', 'metric': 'euclidean'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7555723528270609


[32m[I 2021-09-22 16:04:00,985][0m Trial 19 finished with value: 0.7657963659854825 and parameters: {'n_neighbors': 7, 'weights': 'uniform', 'algorithm': 'brute', 'metric': 'minkowski'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7657963659854825
20 trials complete


[32m[I 2021-09-22 16:04:12,538][0m Trial 20 finished with value: 0.7422722800406258 and parameters: {'n_neighbors': 4, 'weights': 'uniform', 'algorithm': 'brute', 'metric': 'minkowski'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7422722800406258


[32m[I 2021-09-22 16:05:17,220][0m Trial 21 finished with value: 0.7657963659854825 and parameters: {'n_neighbors': 7, 'weights': 'uniform', 'algorithm': 'kd_tree', 'metric': 'euclidean'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7657963659854825


[32m[I 2021-09-22 16:05:28,250][0m Trial 22 finished with value: 0.7615498930757761 and parameters: {'n_neighbors': 6, 'weights': 'uniform', 'algorithm': 'brute', 'metric': 'minkowski'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7615498930757761


[32m[I 2021-09-22 16:05:39,405][0m Trial 23 finished with value: 0.7657963659854825 and parameters: {'n_neighbors': 7, 'weights': 'uniform', 'algorithm': 'brute', 'metric': 'minkowski'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7657963659854825


[32m[I 2021-09-22 16:05:50,464][0m Trial 24 finished with value: 0.7615498930757761 and parameters: {'n_neighbors': 6, 'weights': 'uniform', 'algorithm': 'brute', 'metric': 'minkowski'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7615498930757761
25 trials complete


[32m[I 2021-09-22 16:06:34,107][0m Trial 25 finished with value: 0.7657207687130323 and parameters: {'n_neighbors': 7, 'weights': 'uniform', 'algorithm': 'brute', 'metric': 'seuclidean'}. Best is trial 1 with value: 0.7657963659854825.[0m


ROC AUC Score of XGBoost = 0.7657207687130323


[32m[I 2021-09-22 16:07:13,566][0m Trial 26 finished with value: 0.7704870040941985 and parameters: {'n_neighbors': 6, 'weights': 'uniform', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7704870040941985


[32m[I 2021-09-22 16:07:53,745][0m Trial 27 finished with value: 0.7698911153897812 and parameters: {'n_neighbors': 6, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7698911153897812


[32m[I 2021-09-22 16:08:33,808][0m Trial 28 finished with value: 0.7638726394751678 and parameters: {'n_neighbors': 5, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7638726394751678


[32m[I 2021-09-22 16:09:04,320][0m Trial 29 finished with value: 0.7528050812416482 and parameters: {'n_neighbors': 4, 'weights': 'distance', 'algorithm': 'ball_tree', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7528050812416482
30 trials complete


[32m[I 2021-09-22 16:09:42,838][0m Trial 30 finished with value: 0.7690717408711812 and parameters: {'n_neighbors': 6, 'weights': 'distance', 'algorithm': 'kd_tree', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7690717408711812


[32m[I 2021-09-22 16:10:21,429][0m Trial 31 finished with value: 0.7690717408711812 and parameters: {'n_neighbors': 6, 'weights': 'distance', 'algorithm': 'kd_tree', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7690717408711812


[32m[I 2021-09-22 16:10:59,237][0m Trial 32 finished with value: 0.7690717408711812 and parameters: {'n_neighbors': 6, 'weights': 'distance', 'algorithm': 'kd_tree', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7690717408711812


[32m[I 2021-09-22 16:11:36,654][0m Trial 33 finished with value: 0.7633321373438317 and parameters: {'n_neighbors': 5, 'weights': 'distance', 'algorithm': 'kd_tree', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7633321373438317


[32m[I 2021-09-22 16:12:07,247][0m Trial 34 finished with value: 0.7673714340549859 and parameters: {'n_neighbors': 6, 'weights': 'distance', 'algorithm': 'ball_tree', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7673714340549859
35 trials complete


[32m[I 2021-09-22 16:12:44,939][0m Trial 35 finished with value: 0.7633321373438317 and parameters: {'n_neighbors': 5, 'weights': 'distance', 'algorithm': 'kd_tree', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7633321373438317


[32m[I 2021-09-22 16:13:23,585][0m Trial 36 finished with value: 0.7690717408711812 and parameters: {'n_neighbors': 6, 'weights': 'distance', 'algorithm': 'kd_tree', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7690717408711812


[32m[I 2021-09-22 16:13:53,456][0m Trial 37 finished with value: 0.7396590902428812 and parameters: {'n_neighbors': 3, 'weights': 'distance', 'algorithm': 'ball_tree', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7396590902428812


[32m[I 2021-09-22 16:14:33,817][0m Trial 38 finished with value: 0.7638726394751678 and parameters: {'n_neighbors': 5, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7638726394751678


[32m[I 2021-09-22 16:15:38,416][0m Trial 39 finished with value: 0.662152791976245 and parameters: {'n_neighbors': 6, 'weights': 'distance', 'algorithm': 'kd_tree', 'metric': 'manhattan'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.662152791976245
40 trials complete


[32m[I 2021-09-22 16:16:18,653][0m Trial 40 finished with value: 0.7698911153897812 and parameters: {'n_neighbors': 6, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7698911153897812


[32m[I 2021-09-22 16:16:58,606][0m Trial 41 finished with value: 0.7698911153897812 and parameters: {'n_neighbors': 6, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7698911153897812


[32m[I 2021-09-22 16:17:37,370][0m Trial 42 finished with value: 0.7698911153897812 and parameters: {'n_neighbors': 6, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7698911153897812


[32m[I 2021-09-22 16:18:15,082][0m Trial 43 finished with value: 0.7638726394751678 and parameters: {'n_neighbors': 5, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7638726394751678


[32m[I 2021-09-22 16:18:55,254][0m Trial 44 finished with value: 0.7698911153897812 and parameters: {'n_neighbors': 6, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7698911153897812
45 trials complete


[32m[I 2021-09-22 16:19:35,618][0m Trial 45 finished with value: 0.662152791976245 and parameters: {'n_neighbors': 6, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'manhattan'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.662152791976245


[32m[I 2021-09-22 16:20:15,395][0m Trial 46 finished with value: 0.7638726394751678 and parameters: {'n_neighbors': 5, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 26 with value: 0.7704870040941985.[0m


ROC AUC Score of XGBoost = 0.7638726394751678


[32m[I 2021-09-22 16:20:54,184][0m Trial 47 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


[32m[I 2021-09-22 16:21:36,937][0m Trial 48 finished with value: 0.7662687497582027 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'seuclidean'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7662687497582027


[32m[I 2021-09-22 16:22:16,713][0m Trial 49 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226
50 trials complete


[32m[I 2021-09-22 16:22:53,745][0m Trial 50 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


[32m[I 2021-09-22 16:23:30,256][0m Trial 51 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


[32m[I 2021-09-22 16:24:07,484][0m Trial 52 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


[32m[I 2021-09-22 16:24:45,369][0m Trial 53 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


[32m[I 2021-09-22 16:25:23,597][0m Trial 54 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226
55 trials complete


[32m[I 2021-09-22 16:25:59,093][0m Trial 55 finished with value: 0.6685746443574438 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'manhattan'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.6685746443574438


[32m[I 2021-09-22 16:26:38,194][0m Trial 56 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


[32m[I 2021-09-22 16:27:19,600][0m Trial 57 finished with value: 0.7662687497582027 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'seuclidean'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7662687497582027


[32m[I 2021-09-22 16:27:58,128][0m Trial 58 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


[32m[I 2021-09-22 16:28:39,291][0m Trial 59 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226
60 trials complete


[32m[I 2021-09-22 16:29:06,628][0m Trial 60 finished with value: 0.7710482385387372 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'ball_tree', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7710482385387372


[32m[I 2021-09-22 16:29:47,186][0m Trial 61 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


[32m[I 2021-09-22 16:30:28,335][0m Trial 62 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


[32m[I 2021-09-22 16:31:09,684][0m Trial 63 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


[32m[I 2021-09-22 16:31:49,974][0m Trial 64 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226
65 trials complete


[32m[I 2021-09-22 16:32:32,318][0m Trial 65 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


[32m[I 2021-09-22 16:33:11,946][0m Trial 66 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


[32m[I 2021-09-22 16:33:22,804][0m Trial 67 finished with value: 0.7664007474447057 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'euclidean'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7664007474447057


[32m[I 2021-09-22 16:34:04,908][0m Trial 68 finished with value: 0.7305191522567416 and parameters: {'n_neighbors': 3, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'seuclidean'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7305191522567416


[32m[I 2021-09-22 16:34:46,561][0m Trial 69 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226
70 trials complete


[32m[I 2021-09-22 16:35:25,285][0m Trial 70 finished with value: 0.6685746443574438 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'manhattan'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.6685746443574438


[32m[I 2021-09-22 16:36:06,207][0m Trial 71 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


[32m[I 2021-09-22 16:36:42,873][0m Trial 72 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


[32m[I 2021-09-22 16:37:23,467][0m Trial 73 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


[32m[I 2021-09-22 16:38:02,302][0m Trial 74 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226
75 trials complete


[32m[I 2021-09-22 16:38:32,656][0m Trial 75 finished with value: 0.7710482385387372 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'ball_tree', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7710482385387372


[32m[I 2021-09-22 16:39:13,301][0m Trial 76 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


[32m[I 2021-09-22 16:39:53,512][0m Trial 77 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


[32m[I 2021-09-22 16:40:33,503][0m Trial 78 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


[32m[I 2021-09-22 16:40:44,528][0m Trial 79 finished with value: 0.7664007474447057 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'minkowski'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7664007474447057
80 trials complete


[32m[I 2021-09-22 16:40:51,854][0m Trial 80 finished with value: 0.707104135029747 and parameters: {'n_neighbors': 2, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'euclidean'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.707104135029747


[32m[I 2021-09-22 16:41:33,384][0m Trial 81 finished with value: 0.7729908900805226 and parameters: {'n_neighbors': 7, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'chebyshev'}. Best is trial 47 with value: 0.7729908900805226.[0m


ROC AUC Score of XGBoost = 0.7729908900805226


KeyboardInterrupt: 

In [27]:
dump(study, filename=datapath/'optuna_knn_82trials-complete_20210922.joblib')
dump(study.best_trial.params, filename=datapath/'optuna_knn_all-82trials-best_20210922.joblib')
# pickle.dump(study.best_trial.params, open('CatBoost_Hyperparameter.pickle', 'wb'))
# print('CatBoost Hyperparameter:', study.best_trial.params)

['/media/sf/easystore/kaggle_data/tabular_playgrounds/sep2021/optuna_knn_all-82trials-best_20210922.joblib']

In [28]:
study.best_trial.params

{'n_neighbors': 7,
 'weights': 'distance',
 'algorithm': 'brute',
 'metric': 'chebyshev'}