In [1]:
import optuna
import os
from pathlib import Path
import kaggle
from xgboost import XGBClassifier
import pandas as pd
import numpy as np
from sklearn.preprocessing import FunctionTransformer, PowerTransformer
from sklearn.model_selection import RandomizedSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

iskaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')
if iskaggle:
    path = Path('../input/playground-series-s3e23')
    orig_path = Path('../input/software-defect-prediction')
else:
    path = Path('../input/playground-series-s3e23')
    orig_path = Path('../input/software-defect-prediction')
    if not path.exists():
        kaggle.api.competition_download_cli(str(path))
        zipfile.ZipFile(f'{path}.zip').extractall(path)
    if not orig_path.exists():
        kaggle.api.competition_download_cli(str(path))
        zipfile.ZipFile(f'{path}.zip').extractall(path)

SEED = 42
SPLITS = 5
VERSION = 3
OUTPUT = f'xg_boost_clf_submisson_v{VERSION}.csv'
N_TRIALS = 30

np.random.seed(SEED)

  from .autonotebook import tqdm as notebook_tqdm




In [2]:
train = pd.read_csv(path/'train.csv', index_col = 'id')
test = pd.read_csv(path/'test.csv', index_col = 'id')
orig_train = pd.read_csv(orig_path/'jm1.csv')

In [3]:
for object_features in list(orig_train.loc[:, orig_train.dtypes == 'O']):
    orig_train[object_features] = orig_train[object_features].replace({'?' : np.nan}).astype('float64')

In [4]:
X = pd.concat([train, orig_train])
y = X.pop('defects')

In [5]:
partial_pipeline = make_pipeline(SimpleImputer(), FunctionTransformer(func=np.log1p, validate=False), PowerTransformer())

In [6]:
def objective(trial):
    params = {
        'objective': 'binary:logistic',
        'tree_method':'gpu_hist',  # this parameter means using the GPU when training our model to speedup the training process
        'n_jobs': trial.suggest_categorical('n_jobs', [-1]),
        'random_state': trial.suggest_categorical('random_state', [SEED]),
        'lambda': trial.suggest_float('lambda', 1e-3, 10.0, log=True),
        'alpha': trial.suggest_float('alpha', 1e-3, 10.0, log=True),
        'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.3,0.4,0.5,0.6,0.7,0.8,0.9, 1.0]),
        'subsample': trial.suggest_categorical('subsample', [0.4,0.5,0.6,0.7,0.8,1.0]),
        'learning_rate': trial.suggest_categorical('learning_rate', [0.008,0.01,0.012,0.014,0.016,0.018, 0.02]),
        'n_estimators': trial.suggest_int('n_estimators', 100, 600, 100),
        'max_depth': trial.suggest_categorical('max_depth', [5,7,9,11,13,15,17]),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 300),
    }

    pipeline = make_pipeline(partial_pipeline, XGBClassifier(**params))
    scores = cross_val_score(pipeline, X, y, cv=SPLITS, scoring='roc_auc', n_jobs=-1)
    return scores.mean()

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=N_TRIALS, show_progress_bar=True)

[I 2023-10-16 12:54:44,050] A new study created in memory with name: no-name-b7def1c3-bc83-4cd4-8a7c-a39da47d6834
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 0. Best value: 0.785081:   3%|█████▍                                                                                                                                                              | 1/30 [00:13<06:31, 13.50s/it]

[I 2023-10-16 12:54:57,550] Trial 0 finished with value: 0.7850809604840705 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 5.438983569563615, 'alpha': 0.137440665763447, 'colsample_bytree': 0.6, 'subsample': 0.4, 'learning_rate': 0.018, 'n_estimators': 300, 'max_depth': 7, 'min_child_weight': 202}. Best is trial 0 with value: 0.7850809604840705.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 0. Best value: 0.785081:   7%|██████████▉                                                                                                                                                         | 2/30 [00:29<06:59, 14.98s/it]

[I 2023-10-16 12:55:13,569] Trial 1 finished with value: 0.7845479817887575 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.01437494206988546, 'alpha': 0.13466253531825673, 'colsample_bytree': 0.4, 'subsample': 0.7, 'learning_rate': 0.014, 'n_estimators': 400, 'max_depth': 11, 'min_child_weight': 277}. Best is trial 0 with value: 0.7850809604840705.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 0. Best value: 0.785081:  10%|████████████████▍                                                                                                                                                   | 3/30 [00:46<07:09, 15.91s/it]

[I 2023-10-16 12:55:30,594] Trial 2 finished with value: 0.7827509388202891 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 3.884527889735897, 'alpha': 0.40330686813250755, 'colsample_bytree': 0.3, 'subsample': 0.6, 'learning_rate': 0.008, 'n_estimators': 600, 'max_depth': 11, 'min_child_weight': 290}. Best is trial 0 with value: 0.7850809604840705.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 0. Best value: 0.785081:  13%|█████████████████████▊                                                                                                                                              | 4/30 [01:07<07:50, 18.08s/it]

[I 2023-10-16 12:55:52,003] Trial 3 finished with value: 0.7850111866354365 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.3459367035974128, 'alpha': 0.5782100311458863, 'colsample_bytree': 0.4, 'subsample': 0.7, 'learning_rate': 0.01, 'n_estimators': 600, 'max_depth': 17, 'min_child_weight': 222}. Best is trial 0 with value: 0.7850809604840705.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 4. Best value: 0.78589:  17%|███████████████████████████▌                                                                                                                                         | 5/30 [01:27<07:43, 18.54s/it]

[I 2023-10-16 12:56:11,358] Trial 4 finished with value: 0.7858900480977999 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.007402196441479776, 'alpha': 0.0017377704825740942, 'colsample_bytree': 1.0, 'subsample': 0.4, 'learning_rate': 0.012, 'n_estimators': 600, 'max_depth': 9, 'min_child_weight': 145}. Best is trial 4 with value: 0.7858900480977999.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 4. Best value: 0.78589:  20%|█████████████████████████████████                                                                                                                                    | 6/30 [02:00<09:27, 23.66s/it]

[I 2023-10-16 12:56:44,945] Trial 5 finished with value: 0.7846276989142604 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.0015799069360915052, 'alpha': 0.003091443252793618, 'colsample_bytree': 0.7, 'subsample': 1.0, 'learning_rate': 0.014, 'n_estimators': 600, 'max_depth': 13, 'min_child_weight': 55}. Best is trial 4 with value: 0.7858900480977999.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 4. Best value: 0.78589:  23%|██████████████████████████████████████▌                                                                                                                              | 7/30 [02:16<08:06, 21.17s/it]

[I 2023-10-16 12:57:00,993] Trial 6 finished with value: 0.7845796325664022 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.49523940800994554, 'alpha': 0.003166661199633655, 'colsample_bytree': 0.5, 'subsample': 1.0, 'learning_rate': 0.018, 'n_estimators': 200, 'max_depth': 17, 'min_child_weight': 61}. Best is trial 4 with value: 0.7858900480977999.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 4. Best value: 0.78589:  27%|████████████████████████████████████████████                                                                                                                         | 8/30 [02:47<08:51, 24.16s/it]

[I 2023-10-16 12:57:31,559] Trial 7 finished with value: 0.7845617472798116 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.0015119411142833316, 'alpha': 0.09831144392858449, 'colsample_bytree': 0.8, 'subsample': 0.7, 'learning_rate': 0.012, 'n_estimators': 300, 'max_depth': 17, 'min_child_weight': 28}. Best is trial 4 with value: 0.7858900480977999.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 4. Best value: 0.78589:  30%|█████████████████████████████████████████████████▌                                                                                                                   | 9/30 [03:00<07:12, 20.62s/it]

[I 2023-10-16 12:57:44,381] Trial 8 finished with value: 0.7855924810127913 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.16935640645723335, 'alpha': 0.009660236645597214, 'colsample_bytree': 0.6, 'subsample': 0.8, 'learning_rate': 0.016, 'n_estimators': 300, 'max_depth': 17, 'min_child_weight': 221}. Best is trial 4 with value: 0.7858900480977999.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 4. Best value: 0.78589:  33%|██████████████████████████████████████████████████████▋                                                                                                             | 10/30 [03:10<05:50, 17.51s/it]

[I 2023-10-16 12:57:54,947] Trial 9 finished with value: 0.7832071850784631 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 1.431367121702333, 'alpha': 7.697233837104666, 'colsample_bytree': 0.4, 'subsample': 1.0, 'learning_rate': 0.014, 'n_estimators': 200, 'max_depth': 11, 'min_child_weight': 38}. Best is trial 4 with value: 0.7858900480977999.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 4. Best value: 0.78589:  37%|████████████████████████████████████████████████████████████▏                                                                                                       | 11/30 [03:27<05:29, 17.33s/it]

[I 2023-10-16 12:58:11,858] Trial 10 finished with value: 0.7858840090533408 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.028884986125457507, 'alpha': 0.0010874523536648034, 'colsample_bytree': 1.0, 'subsample': 0.4, 'learning_rate': 0.012, 'n_estimators': 500, 'max_depth': 9, 'min_child_weight': 128}. Best is trial 4 with value: 0.7858900480977999.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 4. Best value: 0.78589:  40%|█████████████████████████████████████████████████████████████████▌                                                                                                  | 12/30 [03:44<05:10, 17.27s/it]

[I 2023-10-16 12:58:28,986] Trial 11 finished with value: 0.7858747216021852 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.035669734805492466, 'alpha': 0.0011002368054701785, 'colsample_bytree': 1.0, 'subsample': 0.4, 'learning_rate': 0.012, 'n_estimators': 500, 'max_depth': 9, 'min_child_weight': 122}. Best is trial 4 with value: 0.7858900480977999.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 12. Best value: 0.785966:  43%|██████████████████████████████████████████████████████████████████████▏                                                                                           | 13/30 [04:02<04:54, 17.30s/it]

[I 2023-10-16 12:58:46,358] Trial 12 finished with value: 0.7859664483461207 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.013088818186035384, 'alpha': 0.0010647806712852173, 'colsample_bytree': 1.0, 'subsample': 0.4, 'learning_rate': 0.02, 'n_estimators': 500, 'max_depth': 9, 'min_child_weight': 125}. Best is trial 12 with value: 0.7859664483461207.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 12. Best value: 0.785966:  47%|███████████████████████████████████████████████████████████████████████████▌                                                                                      | 14/30 [04:29<05:25, 20.33s/it]

[I 2023-10-16 12:59:13,683] Trial 13 finished with value: 0.7859246460276051 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.006349623339329833, 'alpha': 0.011596452675230833, 'colsample_bytree': 0.9, 'subsample': 0.5, 'learning_rate': 0.02, 'n_estimators': 500, 'max_depth': 9, 'min_child_weight': 166}. Best is trial 12 with value: 0.7859664483461207.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 12. Best value: 0.785966:  50%|█████████████████████████████████████████████████████████████████████████████████                                                                                 | 15/30 [04:50<05:05, 20.35s/it]

[I 2023-10-16 12:59:34,070] Trial 14 finished with value: 0.7859097999598801 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.0051347859717406465, 'alpha': 0.011438005324356668, 'colsample_bytree': 0.9, 'subsample': 0.5, 'learning_rate': 0.02, 'n_estimators': 400, 'max_depth': 15, 'min_child_weight': 91}. Best is trial 12 with value: 0.7859664483461207.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 12. Best value: 0.785966:  53%|██████████████████████████████████████████████████████████████████████████████████████▍                                                                           | 16/30 [05:02<04:11, 17.93s/it]

[I 2023-10-16 12:59:46,394] Trial 15 finished with value: 0.7859626911452826 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.07111155594852363, 'alpha': 0.009111254542728184, 'colsample_bytree': 0.9, 'subsample': 0.5, 'learning_rate': 0.02, 'n_estimators': 500, 'max_depth': 5, 'min_child_weight': 182}. Best is trial 12 with value: 0.7859664483461207.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 12. Best value: 0.785966:  57%|███████████████████████████████████████████████████████████████████████████████████████████▊                                                                      | 17/30 [05:06<03:00, 13.85s/it]

[I 2023-10-16 12:59:50,757] Trial 16 finished with value: 0.7843771364629679 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.07636507064538102, 'alpha': 0.006306690266234157, 'colsample_bytree': 0.9, 'subsample': 0.5, 'learning_rate': 0.02, 'n_estimators': 100, 'max_depth': 5, 'min_child_weight': 181}. Best is trial 12 with value: 0.7859664483461207.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 12. Best value: 0.785966:  60%|█████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                | 18/30 [05:18<02:38, 13.19s/it]

[I 2023-10-16 13:00:02,403] Trial 17 finished with value: 0.7855881972186431 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.07251157160375733, 'alpha': 0.023688683445327218, 'colsample_bytree': 0.3, 'subsample': 0.8, 'learning_rate': 0.02, 'n_estimators': 500, 'max_depth': 5, 'min_child_weight': 95}. Best is trial 12 with value: 0.7859664483461207.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 12. Best value: 0.785966:  63%|██████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                           | 19/30 [05:28<02:15, 12.34s/it]

[I 2023-10-16 13:00:12,779] Trial 18 finished with value: 0.7856174129950096 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.18166054716698007, 'alpha': 0.0031395682020389955, 'colsample_bytree': 0.7, 'subsample': 0.6, 'learning_rate': 0.02, 'n_estimators': 400, 'max_depth': 5, 'min_child_weight': 256}. Best is trial 12 with value: 0.7859664483461207.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 12. Best value: 0.785966:  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                      | 20/30 [05:57<02:53, 17.35s/it]

[I 2023-10-16 13:00:41,802] Trial 19 finished with value: 0.7858776392611324 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.029101722909860735, 'alpha': 0.02101093596786237, 'colsample_bytree': 0.5, 'subsample': 0.5, 'learning_rate': 0.016, 'n_estimators': 500, 'max_depth': 15, 'min_child_weight': 97}. Best is trial 12 with value: 0.7859664483461207.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 12. Best value: 0.785966:  70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                | 21/30 [06:10<02:22, 15.83s/it]

[I 2023-10-16 13:00:54,091] Trial 20 finished with value: 0.7853367476875672 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.014685333368988031, 'alpha': 0.003181090258577211, 'colsample_bytree': 0.8, 'subsample': 0.5, 'learning_rate': 0.01, 'n_estimators': 400, 'max_depth': 7, 'min_child_weight': 191}. Best is trial 12 with value: 0.7859664483461207.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 12. Best value: 0.785966:  73%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                           | 22/30 [06:26<02:08, 16.02s/it]

[I 2023-10-16 13:01:10,563] Trial 21 finished with value: 0.7859012003634916 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.004170746815918707, 'alpha': 0.006935387169173446, 'colsample_bytree': 0.9, 'subsample': 0.5, 'learning_rate': 0.02, 'n_estimators': 500, 'max_depth': 9, 'min_child_weight': 174}. Best is trial 12 with value: 0.7859664483461207.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 12. Best value: 0.785966:  77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                     | 23/30 [06:43<01:53, 16.26s/it]

[I 2023-10-16 13:01:27,381] Trial 22 finished with value: 0.7859127666920213 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.003130930704724847, 'alpha': 0.02930086472280288, 'colsample_bytree': 0.9, 'subsample': 0.5, 'learning_rate': 0.02, 'n_estimators': 500, 'max_depth': 9, 'min_child_weight': 156}. Best is trial 12 with value: 0.7859664483461207.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 12. Best value: 0.785966:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                | 24/30 [07:05<01:47, 17.90s/it]

[I 2023-10-16 13:01:49,096] Trial 23 finished with value: 0.7858534104580135 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.009856353845108284, 'alpha': 0.0010866240482764362, 'colsample_bytree': 0.9, 'subsample': 0.5, 'learning_rate': 0.02, 'n_estimators': 600, 'max_depth': 13, 'min_child_weight': 150}. Best is trial 12 with value: 0.7859664483461207.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 12. Best value: 0.785966:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                           | 25/30 [07:15<01:18, 15.73s/it]

[I 2023-10-16 13:01:59,776] Trial 24 finished with value: 0.784460470271072 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.023105195225725808, 'alpha': 0.005893117232720621, 'colsample_bytree': 1.0, 'subsample': 0.4, 'learning_rate': 0.008, 'n_estimators': 400, 'max_depth': 5, 'min_child_weight': 238}. Best is trial 12 with value: 0.7859664483461207.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 25. Best value: 0.786023:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                     | 26/30 [07:33<01:04, 16.21s/it]

[I 2023-10-16 13:02:17,100] Trial 25 finished with value: 0.7860226244731053 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.05542671160432112, 'alpha': 0.012214385888954854, 'colsample_bytree': 0.9, 'subsample': 0.5, 'learning_rate': 0.02, 'n_estimators': 500, 'max_depth': 9, 'min_child_weight': 118}. Best is trial 25 with value: 0.7860226244731053.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 25. Best value: 0.786023:  90%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                | 27/30 [07:51<00:50, 16.89s/it]

[I 2023-10-16 13:02:35,588] Trial 26 finished with value: 0.785919230854179 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.07729305533557676, 'alpha': 0.0018760757105770738, 'colsample_bytree': 0.9, 'subsample': 0.8, 'learning_rate': 0.02, 'n_estimators': 500, 'max_depth': 9, 'min_child_weight': 123}. Best is trial 25 with value: 0.7860226244731053.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 27. Best value: 0.786382:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏          | 28/30 [08:01<00:29, 14.84s/it]

[I 2023-10-16 13:02:45,641] Trial 27 finished with value: 0.7863815140733784 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.04066835180694219, 'alpha': 0.04259428467448357, 'colsample_bytree': 1.0, 'subsample': 0.6, 'learning_rate': 0.02, 'n_estimators': 400, 'max_depth': 5, 'min_child_weight': 80}. Best is trial 27 with value: 0.7863815140733784.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 27. Best value: 0.786382:  97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌     | 29/30 [08:22<00:16, 16.75s/it]

[I 2023-10-16 13:03:06,838] Trial 28 finished with value: 0.7831916086716529 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.04397174941233527, 'alpha': 0.050020547902576595, 'colsample_bytree': 1.0, 'subsample': 0.6, 'learning_rate': 0.02, 'n_estimators': 400, 'max_depth': 9, 'min_child_weight': 3}. Best is trial 27 with value: 0.7863815140733784.


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
Best trial: 27. Best value: 0.786382: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [08:33<00:00, 17.13s/it]

[I 2023-10-16 13:03:18,045] Trial 29 finished with value: 0.7860802348337792 and parameters: {'n_jobs': -1, 'random_state': 42, 'lambda': 0.017727432912097645, 'alpha': 0.04405732508023331, 'colsample_bytree': 1.0, 'subsample': 0.6, 'learning_rate': 0.018, 'n_estimators': 300, 'max_depth': 7, 'min_child_weight': 75}. Best is trial 27 with value: 0.7863815140733784.





In [7]:
study.best_params, study.best_value

({'n_jobs': -1,
  'random_state': 42,
  'lambda': 0.04066835180694219,
  'alpha': 0.04259428467448357,
  'colsample_bytree': 1.0,
  'subsample': 0.6,
  'learning_rate': 0.02,
  'n_estimators': 400,
  'max_depth': 5,
  'min_child_weight': 80},
 0.7863815140733784)

In [8]:
pipeline = make_pipeline(partial_pipeline, XGBClassifier(**study.best_params))
pipeline.fit(X, y)

  if is_sparse(data):


In [9]:
submission = test.copy()
submission['defects'] = pipeline.predict_proba(submission)[:, 1]
submission.defects.to_csv(OUTPUT)