In [1]:
!pip install catboost[gpu]



In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.model_selection import StratifiedKFold
from catboost import CatBoostClassifier, Pool
from sklearn.metrics import roc_curve, auc, roc_auc_score
from tqdm import tqdm
import optuna
from optuna import Trial
from optuna.logging import set_verbosity, INFO
import catboost as cb
import gc

In [3]:
train = pd.read_csv('/kaggle/input/playground-series-s4e7/train.csv')
test = pd.read_csv('/kaggle/input/playground-series-s4e7/test.csv')

In [4]:
# id 열 삭제
train = train.drop(['id'], axis = 1)

In [5]:
# gender, vehicle_age, vehicle_damage 데이터 타입 변경 -> 라벨인코더로 통일
label_encoder = LabelEncoder()
cat=['Gender', 'Vehicle_Age', 'Vehicle_Damage']
for column in cat:
    train[column] = label_encoder.fit_transform(train[column])
    
for column in cat:
    test[column] = label_encoder.fit_transform(test[column])

In [6]:
# min_max_scaler
numerical = ['Region_Code', 'Annual_Premium', 'Policy_Sales_Channel', 'Vintage']
scaler = MinMaxScaler()
train[numerical] = scaler.fit_transform(train[numerical])
train.head()

test[numerical] = scaler.fit_transform(test[numerical])

In [7]:
target = "Response"

In [8]:
X = train.drop(target, axis=1)
y = train[target]

In [9]:
# Stratified K-Fold 설정
n_splits = 2
skfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

In [10]:
def objective(trial):
    params = {
        'objective': 'Logloss',
        'eval_metric': 'AUC',
        'task_type': 'GPU',
        'devices': '0',  
        'iterations': 1000,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'depth': trial.suggest_int('depth', 4, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-8, 100.0, log=True),
        'random_strength': trial.suggest_float('random_strength', 0.0, 10.0),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 10.0),
        'border_count': trial.suggest_int('border_count', 1, 255),
        'verbose': 0
    }

    skfold = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)
    oof_preds = []
    oof_aucs = []

    for fold, (train_idx, test_idx) in enumerate(skfold.split(X, y)):
        X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
        X_test, y_test = X.iloc[test_idx], y.iloc[test_idx]
        
        train_pool = cb.Pool(data=X_train, label=y_train)
        eval_pool = cb.Pool(data=X_test, label=y_test)
        
        model = cb.CatBoostClassifier(**params)
        
        model.fit(train_pool, eval_set=eval_pool, early_stopping_rounds=50, verbose=0)
        
        y_pred = model.predict_proba(X_test)[:, 1]
        roc_auc = roc_auc_score(y_test, y_pred)
        
        oof_preds.append(y_pred)
        oof_aucs.append(roc_auc)
    
    return np.mean(oof_aucs)


In [11]:
# 진행 상황을 출력하는 콜백 함수 정의
def logging_callback(study, trial):
    print(f"Trial {trial.number} finished with value: {trial.value} and parameters: {trial.params}")
    
# Optuna 스터디 생성 및 최적화
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50, callbacks=[logging_callback])

# 최적의 결과 출력
print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

[I 2024-07-24 04:12:10,708] A new study created in memory with name: no-name-1b93fde4-156f-4c9b-9472-8a56e8d7e1ed
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 04:14:13,876] Trial 0 finished with value: 0.8732562867923626 and parameters: {'learning_rate': 0.2677681984456135, 'depth': 8, 'l2_leaf_reg': 1.3436801823885703e-07, 'random_strength': 6.23308974404795, 'bagging_temperature': 1.142432250976999, 'border_count': 101}. Best is trial 0 with value: 0.8732562867923626.


Trial 0 finished with value: 0.8732562867923626 and parameters: {'learning_rate': 0.2677681984456135, 'depth': 8, 'l2_leaf_reg': 1.3436801823885703e-07, 'random_strength': 6.23308974404795, 'bagging_temperature': 1.142432250976999, 'border_count': 101}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 04:17:01,569] Trial 1 finished with value: 0.8648641349682555 and parameters: {'learning_rate': 0.03855701232728146, 'depth': 4, 'l2_leaf_reg': 3.340705844220733e-05, 'random_strength': 2.188388189823275, 'bagging_temperature': 5.523954887332133, 'border_count': 123}. Best is trial 0 with value: 0.8732562867923626.


Trial 1 finished with value: 0.8648641349682555 and parameters: {'learning_rate': 0.03855701232728146, 'depth': 4, 'l2_leaf_reg': 3.340705844220733e-05, 'random_strength': 2.188388189823275, 'bagging_temperature': 5.523954887332133, 'border_count': 123}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 04:19:17,241] Trial 2 finished with value: 0.8769983375183559 and parameters: {'learning_rate': 0.22699365018441214, 'depth': 10, 'l2_leaf_reg': 0.0017546732202033967, 'random_strength': 8.978746541278188, 'bagging_temperature': 1.3378771565305747, 'border_count': 192}. Best is trial 2 with value: 0.8769983375183559.


Trial 2 finished with value: 0.8769983375183559 and parameters: {'learning_rate': 0.22699365018441214, 'depth': 10, 'l2_leaf_reg': 0.0017546732202033967, 'random_strength': 8.978746541278188, 'bagging_temperature': 1.3378771565305747, 'border_count': 192}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 04:22:17,413] Trial 3 finished with value: 0.8764689416855961 and parameters: {'learning_rate': 0.08370698795881353, 'depth': 5, 'l2_leaf_reg': 42.225603870567035, 'random_strength': 1.3536060410541473, 'bagging_temperature': 1.5754283929201585, 'border_count': 244}. Best is trial 2 with value: 0.8769983375183559.


Trial 3 finished with value: 0.8764689416855961 and parameters: {'learning_rate': 0.08370698795881353, 'depth': 5, 'l2_leaf_reg': 42.225603870567035, 'random_strength': 1.3536060410541473, 'bagging_temperature': 1.5754283929201585, 'border_count': 244}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 04:25:03,529] Trial 4 finished with value: 0.8659472245811048 and parameters: {'learning_rate': 0.1501971030005774, 'depth': 4, 'l2_leaf_reg': 0.0002135616125191044, 'random_strength': 1.43175496821919, 'bagging_temperature': 8.959917785212973, 'border_count': 43}. Best is trial 2 with value: 0.8769983375183559.


Trial 4 finished with value: 0.8659472245811048 and parameters: {'learning_rate': 0.1501971030005774, 'depth': 4, 'l2_leaf_reg': 0.0002135616125191044, 'random_strength': 1.43175496821919, 'bagging_temperature': 8.959917785212973, 'border_count': 43}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 04:29:38,638] Trial 5 finished with value: 0.8663075696722512 and parameters: {'learning_rate': 0.050258021253166564, 'depth': 9, 'l2_leaf_reg': 0.0001170437541331045, 'random_strength': 7.940554073656498, 'bagging_temperature': 7.493834236008933, 'border_count': 60}. Best is trial 2 with value: 0.8769983375183559.


Trial 5 finished with value: 0.8663075696722512 and parameters: {'learning_rate': 0.050258021253166564, 'depth': 9, 'l2_leaf_reg': 0.0001170437541331045, 'random_strength': 7.940554073656498, 'bagging_temperature': 7.493834236008933, 'border_count': 60}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 04:33:29,738] Trial 6 finished with value: 0.8698537786437048 and parameters: {'learning_rate': 0.026441506925943353, 'depth': 8, 'l2_leaf_reg': 0.026774115185553157, 'random_strength': 2.9979171911842286, 'bagging_temperature': 3.6773656619594286, 'border_count': 116}. Best is trial 2 with value: 0.8769983375183559.


Trial 6 finished with value: 0.8698537786437048 and parameters: {'learning_rate': 0.026441506925943353, 'depth': 8, 'l2_leaf_reg': 0.026774115185553157, 'random_strength': 2.9979171911842286, 'bagging_temperature': 3.6773656619594286, 'border_count': 116}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 04:38:43,874] Trial 7 finished with value: 0.8675751133747268 and parameters: {'learning_rate': 0.1158422816717131, 'depth': 10, 'l2_leaf_reg': 2.1944670616087079e-07, 'random_strength': 9.41971075702903, 'bagging_temperature': 7.73474766967511, 'border_count': 58}. Best is trial 2 with value: 0.8769983375183559.


Trial 7 finished with value: 0.8675751133747268 and parameters: {'learning_rate': 0.1158422816717131, 'depth': 10, 'l2_leaf_reg': 2.1944670616087079e-07, 'random_strength': 9.41971075702903, 'bagging_temperature': 7.73474766967511, 'border_count': 58}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 04:42:13,756] Trial 8 finished with value: 0.8781408554051682 and parameters: {'learning_rate': 0.122249134013768, 'depth': 7, 'l2_leaf_reg': 0.005585390324686624, 'random_strength': 4.911165143264711, 'bagging_temperature': 2.0881193726929137, 'border_count': 251}. Best is trial 8 with value: 0.8781408554051682.


Trial 8 finished with value: 0.8781408554051682 and parameters: {'learning_rate': 0.122249134013768, 'depth': 7, 'l2_leaf_reg': 0.005585390324686624, 'random_strength': 4.911165143264711, 'bagging_temperature': 2.0881193726929137, 'border_count': 251}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 04:45:02,988] Trial 9 finished with value: 0.8737497147906995 and parameters: {'learning_rate': 0.266032852292106, 'depth': 4, 'l2_leaf_reg': 0.5977753944662416, 'random_strength': 2.547930521280468, 'bagging_temperature': 4.226268368558483, 'border_count': 198}. Best is trial 8 with value: 0.8781408554051682.


Trial 9 finished with value: 0.8737497147906995 and parameters: {'learning_rate': 0.266032852292106, 'depth': 4, 'l2_leaf_reg': 0.5977753944662416, 'random_strength': 2.547930521280468, 'bagging_temperature': 4.226268368558483, 'border_count': 198}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 04:48:15,485] Trial 10 finished with value: 0.8798317844001096 and parameters: {'learning_rate': 0.19574552985670457, 'depth': 6, 'l2_leaf_reg': 0.8703543329753842, 'random_strength': 4.956084588870899, 'bagging_temperature': 0.04330122784989854, 'border_count': 248}. Best is trial 10 with value: 0.8798317844001096.


Trial 10 finished with value: 0.8798317844001096 and parameters: {'learning_rate': 0.19574552985670457, 'depth': 6, 'l2_leaf_reg': 0.8703543329753842, 'random_strength': 4.956084588870899, 'bagging_temperature': 0.04330122784989854, 'border_count': 248}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 04:51:28,320] Trial 11 finished with value: 0.8796437426868307 and parameters: {'learning_rate': 0.17095988527929043, 'depth': 6, 'l2_leaf_reg': 1.3143216171526217, 'random_strength': 4.829660269450826, 'bagging_temperature': 0.2647284513415602, 'border_count': 255}. Best is trial 10 with value: 0.8798317844001096.


Trial 11 finished with value: 0.8796437426868307 and parameters: {'learning_rate': 0.17095988527929043, 'depth': 6, 'l2_leaf_reg': 1.3143216171526217, 'random_strength': 4.829660269450826, 'bagging_temperature': 0.2647284513415602, 'border_count': 255}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 04:54:40,916] Trial 12 finished with value: 0.8783007801609213 and parameters: {'learning_rate': 0.19206561471332778, 'depth': 6, 'l2_leaf_reg': 66.01052368360907, 'random_strength': 4.7701510416829365, 'bagging_temperature': 0.04666733456164973, 'border_count': 192}. Best is trial 10 with value: 0.8798317844001096.


Trial 12 finished with value: 0.8783007801609213 and parameters: {'learning_rate': 0.19206561471332778, 'depth': 6, 'l2_leaf_reg': 66.01052368360907, 'random_strength': 4.7701510416829365, 'bagging_temperature': 0.04666733456164973, 'border_count': 192}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 04:57:53,252] Trial 13 finished with value: 0.8793630843033975 and parameters: {'learning_rate': 0.19810825620713915, 'depth': 6, 'l2_leaf_reg': 0.5303046107346729, 'random_strength': 6.478149842563803, 'bagging_temperature': 0.031260035546692766, 'border_count': 221}. Best is trial 10 with value: 0.8798317844001096.


Trial 13 finished with value: 0.8793630843033975 and parameters: {'learning_rate': 0.19810825620713915, 'depth': 6, 'l2_leaf_reg': 0.5303046107346729, 'random_strength': 6.478149842563803, 'bagging_temperature': 0.031260035546692766, 'border_count': 221}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:01:26,812] Trial 14 finished with value: 0.8753048773863386 and parameters: {'learning_rate': 0.18218687839298023, 'depth': 7, 'l2_leaf_reg': 0.7351517754210861, 'random_strength': 3.9489115801448653, 'bagging_temperature': 2.8343570236655076, 'border_count': 158}. Best is trial 10 with value: 0.8798317844001096.


Trial 14 finished with value: 0.8753048773863386 and parameters: {'learning_rate': 0.18218687839298023, 'depth': 7, 'l2_leaf_reg': 0.7351517754210861, 'random_strength': 3.9489115801448653, 'bagging_temperature': 2.8343570236655076, 'border_count': 158}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:04:43,464] Trial 15 finished with value: 0.8723046580572149 and parameters: {'learning_rate': 0.2373648292467096, 'depth': 6, 'l2_leaf_reg': 3.6734675977724205, 'random_strength': 6.266936436470162, 'bagging_temperature': 5.673907444130313, 'border_count': 161}. Best is trial 10 with value: 0.8798317844001096.


Trial 15 finished with value: 0.8723046580572149 and parameters: {'learning_rate': 0.2373648292467096, 'depth': 6, 'l2_leaf_reg': 3.6734675977724205, 'random_strength': 6.266936436470162, 'bagging_temperature': 5.673907444130313, 'border_count': 161}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:05:27,800] Trial 16 finished with value: 0.8433117751305492 and parameters: {'learning_rate': 0.2978775071698929, 'depth': 5, 'l2_leaf_reg': 0.04080477992163408, 'random_strength': 0.38551630540254056, 'bagging_temperature': 2.9697763592044617, 'border_count': 2}. Best is trial 10 with value: 0.8798317844001096.


Trial 16 finished with value: 0.8433117751305492 and parameters: {'learning_rate': 0.2978775071698929, 'depth': 5, 'l2_leaf_reg': 0.04080477992163408, 'random_strength': 0.38551630540254056, 'bagging_temperature': 2.9697763592044617, 'border_count': 2}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:08:27,371] Trial 17 finished with value: 0.8785668673576621 and parameters: {'learning_rate': 0.15437104366691917, 'depth': 5, 'l2_leaf_reg': 3.6993362862967487e-06, 'random_strength': 7.325170857822318, 'bagging_temperature': 0.06805646424703654, 'border_count': 222}. Best is trial 10 with value: 0.8798317844001096.


Trial 17 finished with value: 0.8785668673576621 and parameters: {'learning_rate': 0.15437104366691917, 'depth': 5, 'l2_leaf_reg': 3.6993362862967487e-06, 'random_strength': 7.325170857822318, 'bagging_temperature': 0.06805646424703654, 'border_count': 222}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:12:22,189] Trial 18 finished with value: 0.8740414066005395 and parameters: {'learning_rate': 0.10993623895294032, 'depth': 8, 'l2_leaf_reg': 10.869219288478577, 'random_strength': 3.788861154203447, 'bagging_temperature': 4.370270236795182, 'border_count': 251}. Best is trial 10 with value: 0.8798317844001096.


Trial 18 finished with value: 0.8740414066005395 and parameters: {'learning_rate': 0.10993623895294032, 'depth': 8, 'l2_leaf_reg': 10.869219288478577, 'random_strength': 3.788861154203447, 'bagging_temperature': 4.370270236795182, 'border_count': 251}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:15:38,138] Trial 19 finished with value: 0.8758509035509852 and parameters: {'learning_rate': 0.21950589858767225, 'depth': 6, 'l2_leaf_reg': 0.04424270637426119, 'random_strength': 5.544587720783836, 'bagging_temperature': 2.2533247526855127, 'border_count': 158}. Best is trial 10 with value: 0.8798317844001096.


Trial 19 finished with value: 0.8758509035509852 and parameters: {'learning_rate': 0.21950589858767225, 'depth': 6, 'l2_leaf_reg': 0.04424270637426119, 'random_strength': 5.544587720783836, 'bagging_temperature': 2.2533247526855127, 'border_count': 158}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:19:06,539] Trial 20 finished with value: 0.8791091955708137 and parameters: {'learning_rate': 0.16840670336754515, 'depth': 7, 'l2_leaf_reg': 4.360068004639056, 'random_strength': 4.40024315471532, 'bagging_temperature': 1.0136738838536954, 'border_count': 224}. Best is trial 10 with value: 0.8798317844001096.


Trial 20 finished with value: 0.8791091955708137 and parameters: {'learning_rate': 0.16840670336754515, 'depth': 7, 'l2_leaf_reg': 4.360068004639056, 'random_strength': 4.40024315471532, 'bagging_temperature': 1.0136738838536954, 'border_count': 224}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:22:20,361] Trial 21 finished with value: 0.879362152573923 and parameters: {'learning_rate': 0.2085591226551165, 'depth': 6, 'l2_leaf_reg': 0.27125657837460326, 'random_strength': 7.122512011972171, 'bagging_temperature': 0.06384823157821451, 'border_count': 219}. Best is trial 10 with value: 0.8798317844001096.


Trial 21 finished with value: 0.879362152573923 and parameters: {'learning_rate': 0.2085591226551165, 'depth': 6, 'l2_leaf_reg': 0.27125657837460326, 'random_strength': 7.122512011972171, 'bagging_temperature': 0.06384823157821451, 'border_count': 219}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:25:20,758] Trial 22 finished with value: 0.8788481165165576 and parameters: {'learning_rate': 0.19656787070184903, 'depth': 5, 'l2_leaf_reg': 0.16782576131772675, 'random_strength': 5.701369047409609, 'bagging_temperature': 0.7139522866329595, 'border_count': 229}. Best is trial 10 with value: 0.8798317844001096.


Trial 22 finished with value: 0.8788481165165576 and parameters: {'learning_rate': 0.19656787070184903, 'depth': 5, 'l2_leaf_reg': 0.16782576131772675, 'random_strength': 5.701369047409609, 'bagging_temperature': 0.7139522866329595, 'border_count': 229}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:28:36,604] Trial 23 finished with value: 0.8778685281711863 and parameters: {'learning_rate': 0.24780202601181586, 'depth': 6, 'l2_leaf_reg': 3.445322675309567, 'random_strength': 8.28263497284615, 'bagging_temperature': 2.101746091708509, 'border_count': 206}. Best is trial 10 with value: 0.8798317844001096.


Trial 23 finished with value: 0.8778685281711863 and parameters: {'learning_rate': 0.24780202601181586, 'depth': 6, 'l2_leaf_reg': 3.445322675309567, 'random_strength': 8.28263497284615, 'bagging_temperature': 2.101746091708509, 'border_count': 206}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:32:04,328] Trial 24 finished with value: 0.877734160327085 and parameters: {'learning_rate': 0.13788282715756595, 'depth': 7, 'l2_leaf_reg': 0.004329067772315648, 'random_strength': 6.726221076504789, 'bagging_temperature': 0.8240751464998415, 'border_count': 181}. Best is trial 10 with value: 0.8798317844001096.


Trial 24 finished with value: 0.877734160327085 and parameters: {'learning_rate': 0.13788282715756595, 'depth': 7, 'l2_leaf_reg': 0.004329067772315648, 'random_strength': 6.726221076504789, 'bagging_temperature': 0.8240751464998415, 'border_count': 181}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:35:05,775] Trial 25 finished with value: 0.8769013018249108 and parameters: {'learning_rate': 0.17414513328548387, 'depth': 5, 'l2_leaf_reg': 1.1178387384496877, 'random_strength': 3.527097347420548, 'bagging_temperature': 2.909046204298165, 'border_count': 254}. Best is trial 10 with value: 0.8798317844001096.


Trial 25 finished with value: 0.8769013018249108 and parameters: {'learning_rate': 0.17414513328548387, 'depth': 5, 'l2_leaf_reg': 1.1178387384496877, 'random_strength': 3.527097347420548, 'bagging_temperature': 2.909046204298165, 'border_count': 254}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:38:17,861] Trial 26 finished with value: 0.8775244847289172 and parameters: {'learning_rate': 0.07753869184802774, 'depth': 6, 'l2_leaf_reg': 0.07249544743848743, 'random_strength': 5.4375602351934695, 'bagging_temperature': 0.5683934069203505, 'border_count': 232}. Best is trial 10 with value: 0.8798317844001096.


Trial 26 finished with value: 0.8775244847289172 and parameters: {'learning_rate': 0.07753869184802774, 'depth': 6, 'l2_leaf_reg': 0.07249544743848743, 'random_strength': 5.4375602351934695, 'bagging_temperature': 0.5683934069203505, 'border_count': 232}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:41:52,276] Trial 27 finished with value: 0.8724476109490091 and parameters: {'learning_rate': 0.2027643030738152, 'depth': 7, 'l2_leaf_reg': 18.113529535867404, 'random_strength': 6.229238173958507, 'bagging_temperature': 6.491716147064708, 'border_count': 209}. Best is trial 10 with value: 0.8798317844001096.


Trial 27 finished with value: 0.8724476109490091 and parameters: {'learning_rate': 0.2027643030738152, 'depth': 7, 'l2_leaf_reg': 18.113529535867404, 'random_strength': 6.229238173958507, 'bagging_temperature': 6.491716147064708, 'border_count': 209}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:45:06,535] Trial 28 finished with value: 0.8784197140546146 and parameters: {'learning_rate': 0.1657891667510542, 'depth': 6, 'l2_leaf_reg': 0.007104882081187784, 'random_strength': 7.871208727637394, 'bagging_temperature': 1.7614678158100516, 'border_count': 236}. Best is trial 10 with value: 0.8798317844001096.


Trial 28 finished with value: 0.8784197140546146 and parameters: {'learning_rate': 0.1657891667510542, 'depth': 6, 'l2_leaf_reg': 0.007104882081187784, 'random_strength': 7.871208727637394, 'bagging_temperature': 1.7614678158100516, 'border_count': 236}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:49:07,330] Trial 29 finished with value: 0.8715906419091651 and parameters: {'learning_rate': 0.2519103315974266, 'depth': 8, 'l2_leaf_reg': 0.813116701938623, 'random_strength': 6.118367688669148, 'bagging_temperature': 9.508524361148645, 'border_count': 175}. Best is trial 10 with value: 0.8798317844001096.


Trial 29 finished with value: 0.8715906419091651 and parameters: {'learning_rate': 0.2519103315974266, 'depth': 8, 'l2_leaf_reg': 0.813116701938623, 'random_strength': 6.118367688669148, 'bagging_temperature': 9.508524361148645, 'border_count': 175}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:50:50,087] Trial 30 finished with value: 0.8725137092602274 and parameters: {'learning_rate': 0.28934766787827026, 'depth': 9, 'l2_leaf_reg': 1.1628388309090461e-08, 'random_strength': 4.3800328706005764, 'bagging_temperature': 1.239573764873179, 'border_count': 95}. Best is trial 10 with value: 0.8798317844001096.


Trial 30 finished with value: 0.8725137092602274 and parameters: {'learning_rate': 0.28934766787827026, 'depth': 9, 'l2_leaf_reg': 1.1628388309090461e-08, 'random_strength': 4.3800328706005764, 'bagging_temperature': 1.239573764873179, 'border_count': 95}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:54:02,724] Trial 31 finished with value: 0.8791241838139559 and parameters: {'learning_rate': 0.2101907210832074, 'depth': 6, 'l2_leaf_reg': 0.20151166794168238, 'random_strength': 7.087462023168435, 'bagging_temperature': 0.16241135317514252, 'border_count': 215}. Best is trial 10 with value: 0.8798317844001096.


Trial 31 finished with value: 0.8791241838139559 and parameters: {'learning_rate': 0.2101907210832074, 'depth': 6, 'l2_leaf_reg': 0.20151166794168238, 'random_strength': 7.087462023168435, 'bagging_temperature': 0.16241135317514252, 'border_count': 215}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 05:57:30,842] Trial 32 finished with value: 0.8797071158515313 and parameters: {'learning_rate': 0.22549642644115572, 'depth': 7, 'l2_leaf_reg': 0.2622506873977085, 'random_strength': 6.973423334983087, 'bagging_temperature': 0.5928643056406313, 'border_count': 236}. Best is trial 10 with value: 0.8798317844001096.


Trial 32 finished with value: 0.8797071158515313 and parameters: {'learning_rate': 0.22549642644115572, 'depth': 7, 'l2_leaf_reg': 0.2622506873977085, 'random_strength': 6.973423334983087, 'bagging_temperature': 0.5928643056406313, 'border_count': 236}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 06:00:58,560] Trial 33 finished with value: 0.8796167441806553 and parameters: {'learning_rate': 0.18701442081978756, 'depth': 7, 'l2_leaf_reg': 11.099256418318843, 'random_strength': 5.273357013691269, 'bagging_temperature': 0.7127068862981005, 'border_count': 238}. Best is trial 10 with value: 0.8798317844001096.


Trial 33 finished with value: 0.8796167441806553 and parameters: {'learning_rate': 0.18701442081978756, 'depth': 7, 'l2_leaf_reg': 11.099256418318843, 'random_strength': 5.273357013691269, 'bagging_temperature': 0.7127068862981005, 'border_count': 238}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 06:04:28,917] Trial 34 finished with value: 0.8793893334542058 and parameters: {'learning_rate': 0.23350082166612504, 'depth': 7, 'l2_leaf_reg': 88.27238210199582, 'random_strength': 5.433146169986882, 'bagging_temperature': 1.4217488628954393, 'border_count': 242}. Best is trial 10 with value: 0.8798317844001096.


Trial 34 finished with value: 0.8793893334542058 and parameters: {'learning_rate': 0.23350082166612504, 'depth': 7, 'l2_leaf_reg': 88.27238210199582, 'random_strength': 5.433146169986882, 'bagging_temperature': 1.4217488628954393, 'border_count': 242}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 06:08:50,608] Trial 35 finished with value: 0.8795888312618344 and parameters: {'learning_rate': 0.13745136514904077, 'depth': 9, 'l2_leaf_reg': 8.95079519956635, 'random_strength': 3.359104559717377, 'bagging_temperature': 0.7753701185255274, 'border_count': 240}. Best is trial 10 with value: 0.8798317844001096.


Trial 35 finished with value: 0.8795888312618344 and parameters: {'learning_rate': 0.13745136514904077, 'depth': 9, 'l2_leaf_reg': 8.95079519956635, 'random_strength': 3.359104559717377, 'bagging_temperature': 0.7753701185255274, 'border_count': 240}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 06:12:41,637] Trial 36 finished with value: 0.8757557685472007 and parameters: {'learning_rate': 0.1819469571174539, 'depth': 8, 'l2_leaf_reg': 2.409530429587727, 'random_strength': 8.54476411440304, 'bagging_temperature': 1.570850920421295, 'border_count': 145}. Best is trial 10 with value: 0.8798317844001096.


Trial 36 finished with value: 0.8757557685472007 and parameters: {'learning_rate': 0.1819469571174539, 'depth': 8, 'l2_leaf_reg': 2.409530429587727, 'random_strength': 8.54476411440304, 'bagging_temperature': 1.570850920421295, 'border_count': 145}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 06:16:08,048] Trial 37 finished with value: 0.8781736059903577 and parameters: {'learning_rate': 0.27305719886179397, 'depth': 8, 'l2_leaf_reg': 0.0007557594036480716, 'random_strength': 4.332894527663166, 'bagging_temperature': 2.449310352464639, 'border_count': 255}. Best is trial 10 with value: 0.8798317844001096.


Trial 37 finished with value: 0.8781736059903577 and parameters: {'learning_rate': 0.27305719886179397, 'depth': 8, 'l2_leaf_reg': 0.0007557594036480716, 'random_strength': 4.332894527663166, 'bagging_temperature': 2.449310352464639, 'border_count': 255}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 06:19:36,936] Trial 38 finished with value: 0.878777883045482 and parameters: {'learning_rate': 0.2217944149807466, 'depth': 7, 'l2_leaf_reg': 28.305616600581494, 'random_strength': 4.969180616735011, 'bagging_temperature': 0.7275584708204124, 'border_count': 202}. Best is trial 10 with value: 0.8798317844001096.


Trial 38 finished with value: 0.878777883045482 and parameters: {'learning_rate': 0.2217944149807466, 'depth': 7, 'l2_leaf_reg': 28.305616600581494, 'random_strength': 4.969180616735011, 'bagging_temperature': 0.7275584708204124, 'border_count': 202}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 06:23:08,634] Trial 39 finished with value: 0.8716517337888561 and parameters: {'learning_rate': 0.14256923698221508, 'depth': 7, 'l2_leaf_reg': 0.0008011722357353118, 'random_strength': 2.001730150513316, 'bagging_temperature': 3.748665778254179, 'border_count': 98}. Best is trial 10 with value: 0.8798317844001096.


Trial 39 finished with value: 0.8716517337888561 and parameters: {'learning_rate': 0.14256923698221508, 'depth': 7, 'l2_leaf_reg': 0.0008011722357353118, 'random_strength': 2.001730150513316, 'bagging_temperature': 3.748665778254179, 'border_count': 98}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 06:25:57,567] Trial 40 finished with value: 0.8747767611828343 and parameters: {'learning_rate': 0.09925277180820097, 'depth': 4, 'l2_leaf_reg': 0.012876862703190339, 'random_strength': 5.604943719899232, 'bagging_temperature': 1.6647266908262575, 'border_count': 182}. Best is trial 10 with value: 0.8798317844001096.


Trial 40 finished with value: 0.8747767611828343 and parameters: {'learning_rate': 0.09925277180820097, 'depth': 4, 'l2_leaf_reg': 0.012876862703190339, 'random_strength': 5.604943719899232, 'bagging_temperature': 1.6647266908262575, 'border_count': 182}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 06:30:17,361] Trial 41 finished with value: 0.8796622382868683 and parameters: {'learning_rate': 0.13158516207955068, 'depth': 9, 'l2_leaf_reg': 11.06612823915914, 'random_strength': 3.348339458931086, 'bagging_temperature': 0.5891375183682124, 'border_count': 239}. Best is trial 10 with value: 0.8798317844001096.


Trial 41 finished with value: 0.8796622382868683 and parameters: {'learning_rate': 0.13158516207955068, 'depth': 9, 'l2_leaf_reg': 11.06612823915914, 'random_strength': 3.348339458931086, 'bagging_temperature': 0.5891375183682124, 'border_count': 239}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 06:35:09,143] Trial 42 finished with value: 0.8798078719295699 and parameters: {'learning_rate': 0.1275008976781418, 'depth': 10, 'l2_leaf_reg': 21.67125093103892, 'random_strength': 2.830627739845416, 'bagging_temperature': 0.4613860066910449, 'border_count': 241}. Best is trial 10 with value: 0.8798317844001096.


Trial 42 finished with value: 0.8798078719295699 and parameters: {'learning_rate': 0.1275008976781418, 'depth': 10, 'l2_leaf_reg': 21.67125093103892, 'random_strength': 2.830627739845416, 'bagging_temperature': 0.4613860066910449, 'border_count': 241}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 06:40:03,012] Trial 43 finished with value: 0.8789695066234723 and parameters: {'learning_rate': 0.08983696161248902, 'depth': 10, 'l2_leaf_reg': 1.7452963250799014, 'random_strength': 2.6867756935136833, 'bagging_temperature': 1.1823682252340943, 'border_count': 241}. Best is trial 10 with value: 0.8798317844001096.


Trial 43 finished with value: 0.8789695066234723 and parameters: {'learning_rate': 0.08983696161248902, 'depth': 10, 'l2_leaf_reg': 1.7452963250799014, 'random_strength': 2.6867756935136833, 'bagging_temperature': 1.1823682252340943, 'border_count': 241}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 06:44:55,262] Trial 44 finished with value: 0.8795968595869614 and parameters: {'learning_rate': 0.12261820853057845, 'depth': 10, 'l2_leaf_reg': 32.41366700113763, 'random_strength': 1.8767419964369947, 'bagging_temperature': 0.4818126449591794, 'border_count': 229}. Best is trial 10 with value: 0.8798317844001096.


Trial 44 finished with value: 0.8795968595869614 and parameters: {'learning_rate': 0.12261820853057845, 'depth': 10, 'l2_leaf_reg': 32.41366700113763, 'random_strength': 1.8767419964369947, 'bagging_temperature': 0.4818126449591794, 'border_count': 229}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 06:49:10,775] Trial 45 finished with value: 0.8783763353277408 and parameters: {'learning_rate': 0.05173940038145855, 'depth': 9, 'l2_leaf_reg': 0.11123419346258268, 'random_strength': 2.8119122830444074, 'bagging_temperature': 0.41244893598397314, 'border_count': 255}. Best is trial 10 with value: 0.8798317844001096.


Trial 45 finished with value: 0.8783763353277408 and parameters: {'learning_rate': 0.05173940038145855, 'depth': 9, 'l2_leaf_reg': 0.11123419346258268, 'random_strength': 2.8119122830444074, 'bagging_temperature': 0.41244893598397314, 'border_count': 255}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 06:54:30,912] Trial 46 finished with value: 0.8712826276044243 and parameters: {'learning_rate': 0.1618574994347664, 'depth': 10, 'l2_leaf_reg': 89.57953214401661, 'random_strength': 0.961014988707916, 'bagging_temperature': 8.093647046622495, 'border_count': 211}. Best is trial 10 with value: 0.8798317844001096.


Trial 46 finished with value: 0.8712826276044243 and parameters: {'learning_rate': 0.1618574994347664, 'depth': 10, 'l2_leaf_reg': 89.57953214401661, 'random_strength': 0.961014988707916, 'bagging_temperature': 8.093647046622495, 'border_count': 211}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 06:59:00,551] Trial 47 finished with value: 0.8775842122897004 and parameters: {'learning_rate': 0.14814593862005043, 'depth': 9, 'l2_leaf_reg': 0.4105577535462637, 'random_strength': 3.1654751328538877, 'bagging_temperature': 1.8698819976602319, 'border_count': 197}. Best is trial 10 with value: 0.8798317844001096.


Trial 47 finished with value: 0.8775842122897004 and parameters: {'learning_rate': 0.14814593862005043, 'depth': 9, 'l2_leaf_reg': 0.4105577535462637, 'random_strength': 3.1654751328538877, 'bagging_temperature': 1.8698819976602319, 'border_count': 197}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 07:03:21,146] Trial 48 finished with value: 0.8733444627233584 and parameters: {'learning_rate': 0.13024281516149386, 'depth': 9, 'l2_leaf_reg': 9.41610060986508e-06, 'random_strength': 9.628204721570178, 'bagging_temperature': 2.4854806944829795, 'border_count': 117}. Best is trial 10 with value: 0.8798317844001096.


Trial 48 finished with value: 0.8733444627233584 and parameters: {'learning_rate': 0.13024281516149386, 'depth': 9, 'l2_leaf_reg': 9.41610060986508e-06, 'random_strength': 9.628204721570178, 'bagging_temperature': 2.4854806944829795, 'border_count': 117}


Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2024-07-24 07:08:16,244] Trial 49 finished with value: 0.8791858780873909 and parameters: {'learning_rate': 0.10858516727435769, 'depth': 10, 'l2_leaf_reg': 5.98897811301219, 'random_strength': 3.8671935750434185, 'bagging_temperature': 1.1887647841727615, 'border_count': 245}. Best is trial 10 with value: 0.8798317844001096.


Trial 49 finished with value: 0.8791858780873909 and parameters: {'learning_rate': 0.10858516727435769, 'depth': 10, 'l2_leaf_reg': 5.98897811301219, 'random_strength': 3.8671935750434185, 'bagging_temperature': 1.1887647841727615, 'border_count': 245}
Best trial:
  Value:  0.8798317844001096
  Params: 
    learning_rate: 0.19574552985670457
    depth: 6
    l2_leaf_reg: 0.8703543329753842
    random_strength: 4.956084588870899
    bagging_temperature: 0.04330122784989854
    border_count: 248


In [12]:
# 최적의 파라미터로 최종 모델 학습 및 평가
best_params = trial.params
model = cb.CatBoostClassifier(**best_params, task_type='GPU', devices='0', iterations=1000, verbose=0)

oof_preds = []
oof_aucs = []

skfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for fold, (train_idx, test_idx) in enumerate(skfold.split(X, y)):
    X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
    X_test, y_test = X.iloc[test_idx], y.iloc[test_idx]
    
    train_pool = cb.Pool(data=X_train, label=y_train)
    eval_pool = cb.Pool(data=X_test, label=y_test)
    
    model.fit(train_pool, eval_set=eval_pool, early_stopping_rounds=50, verbose=0)
    
    y_pred = model.predict_proba(X_test)[:, 1]
    roc_auc = roc_auc_score(y_test, y_pred)
    
    oof_preds.append(y_pred)
    oof_aucs.append(roc_auc)
    
    print(f"\n---- Fold {fold}: ROC-AUC Score: {roc_auc:.6f}\n")
    
    del X_train, y_train, X_test, y_test
    gc.collect()
    
auc_mean = np.mean(oof_aucs)
auc_std = np.std(oof_aucs)
print(f"\n---> ROC-AUC Score: {auc_mean:.6f} ± {auc_std:.6f}\n")


---- Fold 0: ROC-AUC Score: 0.879823


---- Fold 1: ROC-AUC Score: 0.879746


---- Fold 2: ROC-AUC Score: 0.879940


---- Fold 3: ROC-AUC Score: 0.879751


---- Fold 4: ROC-AUC Score: 0.880494


---> ROC-AUC Score: 0.879951 ± 0.000281



In [13]:
# Test set을 활용한 최종 예측
model.fit(X, y)
test_pred = model.predict_proba(test)[:, 1]

In [14]:
# 결과를 파일로 저장
submission = pd.DataFrame({'Id': test.index, 'Response': test_pred})
submission.to_csv('submission.csv', index=False)

## 전처리 없는 버전

In [16]:
from sklearn.feature_selection import mutual_info_classif
from sklearn.metrics import roc_auc_score, roc_curve, confusion_matrix, ConfusionMatrixDisplay, classification_report
import catboost as cb

In [17]:
train = pd.read_csv("/kaggle/input/playground-series-s4e7/train.csv", index_col = "id", engine="pyarrow")
test = pd.read_csv("/kaggle/input/playground-series-s4e7/test.csv", index_col = "id", engine="pyarrow")

In [18]:
train["Region_Code"] = train["Region_Code"].astype(np.int8)
test["Region_Code"] = test["Region_Code"].astype(np.int8)

train["Policy_Sales_Channel"] = train["Policy_Sales_Channel"].astype(np.int16)
test["Policy_Sales_Channel"] = test["Policy_Sales_Channel"].astype(np.int16)

In [19]:
target="Response"

In [20]:
X = train.drop(target, axis=1)
y = train[target]

In [21]:
skfold = StratifiedKFold(2, shuffle=True, random_state=42)

In [23]:
def objective(trial):
    params = {
        'objective': 'Logloss',
        'eval_metric': 'AUC',
        'class_name' : [0, 1],
        'task_type': 'GPU', 
        'iterations': 3000,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'depth': trial.suggest_int('depth', 4, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-8, 100.0, log=True),
        'random_strength': trial.suggest_float('random_strength', 0.0, 10.0),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 10.0),
        'border_count': trial.suggest_int('border_count', 1, 255),
        'max_leaves': trial.suggest_int('max_leaves', 1, 64),
        'fold_permutation_block': trial.suggest_int('fold_permutation_block', 1, 256),
        'verbose': False,
        'verbose': False,
        'allow_writing_files' : False,
        'random_seed' : 42
    }

    skfold = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)
    oof_preds = []
    oof_aucs = []

    for fold, (train_idx, test_idx) in enumerate(skfold.split(X, y)):
        X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
        X_test, y_test = X.iloc[test_idx], y.iloc[test_idx]
        
        train_pool = cb.Pool(data=X_train, label=y_train)
        eval_pool = cb.Pool(data=X_test, label=y_test)
        
        model = cb.CatBoostClassifier(**params)
        
        model.fit(train_pool, eval_set=eval_pool, early_stopping_rounds=50, verbose=0)
        
        y_pred = model.predict_proba(X_test)[:, 1]
        roc_auc = roc_auc_score(y_test, y_pred)
        
        oof_preds.append(y_pred)
        oof_aucs.append(roc_auc)
    
    return np.mean(oof_aucs)

In [24]:
# 진행 상황을 출력하는 콜백 함수 정의
def logging_callback(study, trial):
    print(f"Trial {trial.number} finished with value: {trial.value} and parameters: {trial.params}")
    
# Optuna 스터디 생성 및 최적화
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50, callbacks=[logging_callback])

# 최적의 결과 출력
print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

[I 2024-07-24 07:20:35,348] A new study created in memory with name: no-name-c797c7c0-df61-4916-b6bf-9f45e6884865
[W 2024-07-24 07:20:39,428] Trial 0 failed with parameters: {'learning_rate': 0.19067707754980925, 'depth': 4, 'l2_leaf_reg': 5.902351172975457e-05, 'random_strength': 8.961194266140799, 'bagging_temperature': 6.178061556213699, 'border_count': 1, 'max_leaves': 3, 'fold_permutation_block': 116} because of the following error: CatBoostError('Bad value for num_feature[non_default_doc_idx=0,feature_idx=0]="Male": Cannot convert \'b\'Male\'\' to float').
Traceback (most recent call last):
  File "_catboost.pyx", line 2383, in _catboost.get_float_feature
  File "_catboost.pyx", line 1188, in _catboost._FloatOrNan
  File "_catboost.pyx", line 983, in _catboost._FloatOrNanFromString
TypeError: Cannot convert 'b'Male'' to float

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/opt

CatBoostError: Bad value for num_feature[non_default_doc_idx=0,feature_idx=0]="Male": Cannot convert 'b'Male'' to float

In [None]:
# Test set을 활용한 최종 예측
model.fit(X, y)
test_pred = model.predict_proba(test)[:, 1]

In [None]:
# 결과를 파일로 저장
submission = pd.DataFrame({'Id': test.index, 'Response': test_pred})
submission.to_csv('submission2.csv', index=False)