In [1]:
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
from catboost import CatBoostClassifier
from sklearn.metrics import roc_auc_score
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

In [2]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [3]:
target = train['target']
train = train.drop(['id', 'target'], axis = 1)

In [4]:
sub = pd.read_csv('sample_submission.csv')
sub['id'] = test['id']
test = test.drop('id', axis = 1)

In [10]:
import optuna
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")

def objective(trial, data=train, target=target):

    train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.25, stratify = target, shuffle = True, random_state = 2021)
    
    param = {
        'iterations' : 2000,
        'eval_metric' : 'AUC',
        'random_state' : 2021,
        'od_type' : 'Iter',
        #'od_wait' : 50,
        'learning_rate': trial.suggest_loguniform('learning_rate', 4E-2, 1.0),
        'depth': trial.suggest_int('depth', 5, 16),
        'l2_leaf_reg' : trial.suggest_float('l2_leaf_reg', 1.0, 50),
        'bagging_temperature' : trial.suggest_float('bagging_temperature', 1E-3, 50),
        'border_count' : trial.suggest_categorical('border_count', [5, 100, 200, 225, 254]),
        'grow_policy' : 'Depthwise',
        'min_data_in_leaf' : trial.suggest_int('min_data_in_leaf', 1, 500),
        'cat_features' : [0, 1, 2,3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
        'early_stopping_rounds' : 50,
        'verbose' : False
    }
    
    model = CatBoostClassifier(**param)  
    model.fit(train_x,train_y,eval_set=(test_x,test_y), use_best_model = True)
    preds = model.predict_proba(test_x)[:, 1]    
    roc = roc_auc_score(test_y, preds)
    
    return roc

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)
print('Best trial:', study.best_params)
    

[32m[I 2021-03-06 13:56:32,923][0m A new study created in memory with name: no-name-51435d47-85b4-4a76-b9eb-5f5ce0448574[0m
[32m[I 2021-03-06 13:56:59,708][0m Trial 0 finished with value: 0.8898600503977709 and parameters: {'learning_rate': 0.2835358536849435, 'depth': 13, 'l2_leaf_reg': 9.83216000375306, 'bagging_temperature': 4.06255003921493, 'border_count': 225, 'min_data_in_leaf': 385}. Best is trial 0 with value: 0.8898600503977709.[0m
[32m[I 2021-03-06 13:59:35,248][0m Trial 1 finished with value: 0.8945588170408715 and parameters: {'learning_rate': 0.054943602378284224, 'depth': 11, 'l2_leaf_reg': 21.85335873290447, 'bagging_temperature': 3.192763399205135, 'border_count': 100, 'min_data_in_leaf': 100}. Best is trial 1 with value: 0.8945588170408715.[0m
[32m[I 2021-03-06 14:04:21,668][0m Trial 2 finished with value: 0.8917145598117848 and parameters: {'learning_rate': 0.06741957468005244, 'depth': 13, 'l2_leaf_reg': 4.272929167346739, 'bagging_temperature': 11.897976

[32m[I 2021-03-06 15:19:19,177][0m Trial 25 finished with value: 0.894385863990661 and parameters: {'learning_rate': 0.08710602182269733, 'depth': 14, 'l2_leaf_reg': 17.94566984793561, 'bagging_temperature': 47.797209405938126, 'border_count': 200, 'min_data_in_leaf': 488}. Best is trial 18 with value: 0.8955251749070442.[0m
[32m[I 2021-03-06 15:23:34,823][0m Trial 26 finished with value: 0.8951864406408259 and parameters: {'learning_rate': 0.052315760421902786, 'depth': 10, 'l2_leaf_reg': 38.58968824517141, 'bagging_temperature': 35.31671571742632, 'border_count': 200, 'min_data_in_leaf': 497}. Best is trial 18 with value: 0.8955251749070442.[0m
[32m[I 2021-03-06 15:28:41,200][0m Trial 27 finished with value: 0.8951691615877018 and parameters: {'learning_rate': 0.051763766264766, 'depth': 15, 'l2_leaf_reg': 23.95711578622072, 'bagging_temperature': 46.084410417245266, 'border_count': 200, 'min_data_in_leaf': 452}. Best is trial 18 with value: 0.8955251749070442.[0m
[32m[I 20

[32m[I 2021-03-06 16:59:09,620][0m Trial 50 finished with value: 0.8869151911888424 and parameters: {'learning_rate': 0.3375877281260559, 'depth': 15, 'l2_leaf_reg': 28.190058100792466, 'bagging_temperature': 27.130630636318127, 'border_count': 5, 'min_data_in_leaf': 370}. Best is trial 18 with value: 0.8955251749070442.[0m
[32m[I 2021-03-06 17:04:34,366][0m Trial 51 finished with value: 0.8952338085285981 and parameters: {'learning_rate': 0.04480209128078856, 'depth': 12, 'l2_leaf_reg': 33.87162010810913, 'bagging_temperature': 42.084972995148654, 'border_count': 200, 'min_data_in_leaf': 463}. Best is trial 18 with value: 0.8955251749070442.[0m
[32m[I 2021-03-06 17:08:54,373][0m Trial 52 finished with value: 0.8951714678961927 and parameters: {'learning_rate': 0.04720240939347395, 'depth': 11, 'l2_leaf_reg': 20.324965494537103, 'bagging_temperature': 38.20015409859215, 'border_count': 200, 'min_data_in_leaf': 481}. Best is trial 18 with value: 0.8955251749070442.[0m
[32m[I 2

[32m[I 2021-03-06 19:39:50,640][0m Trial 75 finished with value: 0.8946750610148018 and parameters: {'learning_rate': 0.06394697738813257, 'depth': 12, 'l2_leaf_reg': 46.04045044176159, 'bagging_temperature': 45.82587612078778, 'border_count': 225, 'min_data_in_leaf': 396}. Best is trial 18 with value: 0.8955251749070442.[0m
[32m[I 2021-03-06 19:44:45,873][0m Trial 76 finished with value: 0.8952725172597085 and parameters: {'learning_rate': 0.04004524537422323, 'depth': 11, 'l2_leaf_reg': 41.401826996209614, 'bagging_temperature': 48.643168289483405, 'border_count': 200, 'min_data_in_leaf': 459}. Best is trial 18 with value: 0.8955251749070442.[0m
[32m[I 2021-03-06 19:49:38,820][0m Trial 77 finished with value: 0.8949739005266595 and parameters: {'learning_rate': 0.05447401340361339, 'depth': 14, 'l2_leaf_reg': 35.97963707705111, 'bagging_temperature': 31.425794087612854, 'border_count': 225, 'min_data_in_leaf': 425}. Best is trial 18 with value: 0.8955251749070442.[0m
[32m[I

Best trial: {'learning_rate': 0.04487756498826348, 'depth': 15, 'l2_leaf_reg': 36.71763813738534, 'bagging_temperature': 49.14896686365306, 'border_count': 200, 'min_data_in_leaf': 442}
