In [21]:
import optuna
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold
from catboost import CatBoostClassifier
from sklearn.metrics import roc_auc_score


In [28]:
df = pd.read_csv('df_att.csv')
df.head()

X = df.drop('Heart Disease', axis=1)
y = df['Heart Disease']

cat_features = list(X.select_dtypes(exclude='number').columns)


skf = StratifiedKFold(
    n_splits=5,
    shuffle=True,
    random_state=42
)


def objective(trial):
    params = {
    "iterations": trial.suggest_int("iterations", 300, 1500),
    "depth": trial.suggest_int('depth', 4, 10),
    "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
    "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1e-3, 10, log=True),
    "border_count": trial.suggest_int("border_count", 32, 255),
    "random_state": 42,
    "cat_features": cat_features,
    "verbose": 0
    }
    
    scores = []
    
    for train_idx, valid_idx in skf.split(X, y):
    
        X_train_fold = X.iloc[train_idx]
        X_valid_fold = X.iloc[valid_idx]
        y_train_fold = y.iloc[train_idx]
        y_valid_fold = y.iloc[valid_idx]
        
        model = CatBoostClassifier(**params)

        model.fit(X_train_fold,
                  y_train_fold,
                  eval_set=(X_valid_fold, y_valid_fold),
                  use_best_model=False)
        
        probs = model.predict_proba(X_valid_fold)[:,1]

        score = roc_auc_score(y_valid_fold, probs)

        scores.append(score)
                  
    return np.mean(scores)



In [30]:
study = optuna.create_study(direction='maximize')

study.optimize(objective, n_trials=30, show_progress_bar=True)

[32m[I 2026-02-17 10:47:21,705][0m A new study created in memory with name: no-name-61b47684-4630-4f94-92a9-3ef88d2ca5cc[0m
Best trial: 0. Best value: 0.954431:   3%|▎         | 1/30 [09:35<4:38:07, 575.41s/it]

[32m[I 2026-02-17 10:56:57,118][0m Trial 0 finished with value: 0.9544307359421605 and parameters: {'iterations': 441, 'depth': 7, 'learning_rate': 0.023075476101409025, 'l2_leaf_reg': 0.012660019013583133, 'border_count': 49}. Best is trial 0 with value: 0.9544307359421605.[0m


Best trial: 1. Best value: 0.954681:   7%|▋         | 2/30 [23:54<5:46:17, 742.05s/it]

[32m[I 2026-02-17 11:11:15,819][0m Trial 1 finished with value: 0.9546806704133578 and parameters: {'iterations': 427, 'depth': 10, 'learning_rate': 0.09828262971980745, 'l2_leaf_reg': 2.302887091547248, 'border_count': 146}. Best is trial 1 with value: 0.9546806704133578.[0m


Best trial: 2. Best value: 0.955351:  10%|█         | 3/30 [40:29<6:25:55, 857.61s/it]

[32m[I 2026-02-17 11:27:50,939][0m Trial 2 finished with value: 0.9553508052071823 and parameters: {'iterations': 875, 'depth': 5, 'learning_rate': 0.19374854077541165, 'l2_leaf_reg': 2.9485358843513194, 'border_count': 229}. Best is trial 2 with value: 0.9553508052071823.[0m


Best trial: 2. Best value: 0.955351:  13%|█▎        | 4/30 [52:57<5:52:57, 814.52s/it]

[32m[I 2026-02-17 11:40:19,394][0m Trial 3 finished with value: 0.9552257312508109 and parameters: {'iterations': 571, 'depth': 6, 'learning_rate': 0.22655609038328736, 'l2_leaf_reg': 5.706653146132107, 'border_count': 130}. Best is trial 2 with value: 0.9553508052071823.[0m


Best trial: 2. Best value: 0.955351:  17%|█▋        | 5/30 [1:15:39<7:01:34, 1011.78s/it]

[32m[I 2026-02-17 12:03:00,948][0m Trial 4 finished with value: 0.9521645188109048 and parameters: {'iterations': 632, 'depth': 10, 'learning_rate': 0.12662363949780267, 'l2_leaf_reg': 0.03415174523894514, 'border_count': 129}. Best is trial 2 with value: 0.9553508052071823.[0m


Best trial: 2. Best value: 0.955351:  20%|██        | 6/30 [1:39:02<7:37:55, 1144.83s/it]

[32m[I 2026-02-17 12:26:24,046][0m Trial 5 finished with value: 0.9551696601209596 and parameters: {'iterations': 1020, 'depth': 7, 'learning_rate': 0.09229272741264054, 'l2_leaf_reg': 0.44067812874105716, 'border_count': 95}. Best is trial 2 with value: 0.9553508052071823.[0m


Best trial: 2. Best value: 0.955351:  23%|██▎       | 7/30 [1:52:27<6:36:19, 1033.88s/it]

[32m[I 2026-02-17 12:39:49,508][0m Trial 6 finished with value: 0.954896615475403 and parameters: {'iterations': 718, 'depth': 6, 'learning_rate': 0.022237963894071667, 'l2_leaf_reg': 0.0048881397074794185, 'border_count': 68}. Best is trial 2 with value: 0.9553508052071823.[0m


Best trial: 2. Best value: 0.955351:  27%|██▋       | 8/30 [2:06:51<5:59:15, 979.80s/it] 

[32m[I 2026-02-17 12:54:13,513][0m Trial 7 finished with value: 0.9552632435774596 and parameters: {'iterations': 700, 'depth': 6, 'learning_rate': 0.15284303602514596, 'l2_leaf_reg': 0.14700100322511525, 'border_count': 138}. Best is trial 2 with value: 0.9553508052071823.[0m


Best trial: 8. Best value: 0.955417:  30%|███       | 9/30 [2:15:39<4:53:29, 838.54s/it]

[32m[I 2026-02-17 13:03:01,447][0m Trial 8 finished with value: 0.9554170151268974 and parameters: {'iterations': 435, 'depth': 6, 'learning_rate': 0.12316455950174297, 'l2_leaf_reg': 0.35961749111476665, 'border_count': 173}. Best is trial 8 with value: 0.9554170151268974.[0m


Best trial: 8. Best value: 0.955417:  33%|███▎      | 10/30 [2:47:35<6:30:20, 1171.01s/it]

[32m[I 2026-02-17 13:34:56,913][0m Trial 9 finished with value: 0.9543171647324463 and parameters: {'iterations': 1377, 'depth': 7, 'learning_rate': 0.12258435078381502, 'l2_leaf_reg': 0.00652910876291468, 'border_count': 159}. Best is trial 8 with value: 0.9554170151268974.[0m


Best trial: 8. Best value: 0.955417:  37%|███▋      | 11/30 [3:04:33<5:56:02, 1124.35s/it]

[32m[I 2026-02-17 13:51:55,454][0m Trial 10 finished with value: 0.9544074590899655 and parameters: {'iterations': 1267, 'depth': 4, 'learning_rate': 0.010010086413369333, 'l2_leaf_reg': 0.4144806383903114, 'border_count': 218}. Best is trial 8 with value: 0.9554170151268974.[0m


Best trial: 8. Best value: 0.955417:  40%|████      | 12/30 [3:19:53<5:18:38, 1062.16s/it]

[32m[I 2026-02-17 14:07:15,387][0m Trial 11 finished with value: 0.9553017805702282 and parameters: {'iterations': 997, 'depth': 4, 'learning_rate': 0.2931900136853088, 'l2_leaf_reg': 1.766053591674499, 'border_count': 245}. Best is trial 8 with value: 0.9554170151268974.[0m


Best trial: 8. Best value: 0.955417:  43%|████▎     | 13/30 [3:34:19<4:44:07, 1002.81s/it]

[32m[I 2026-02-17 14:21:41,612][0m Trial 12 finished with value: 0.9554100354266947 and parameters: {'iterations': 865, 'depth': 5, 'learning_rate': 0.054596788997670134, 'l2_leaf_reg': 8.797332975027846, 'border_count': 201}. Best is trial 8 with value: 0.9554170151268974.[0m


Training has stopped (degenerate solution on iteration 160, probably too small l2-regularization, try to increase it)
Training has stopped (degenerate solution on iteration 217, probably too small l2-regularization, try to increase it)
Training has stopped (degenerate solution on iteration 206, probably too small l2-regularization, try to increase it)
Best trial: 8. Best value: 0.955417:  47%|████▋     | 14/30 [3:41:05<3:39:20, 822.53s/it] 

[32m[I 2026-02-17 14:28:27,566][0m Trial 13 finished with value: 0.937639016368775 and parameters: {'iterations': 334, 'depth': 9, 'learning_rate': 0.04882754592289551, 'l2_leaf_reg': 0.0012139549351252808, 'border_count': 189}. Best is trial 8 with value: 0.9554170151268974.[0m


Best trial: 14. Best value: 0.955489:  50%|█████     | 15/30 [4:01:55<3:57:49, 951.27s/it]

[32m[I 2026-02-17 14:49:17,199][0m Trial 14 finished with value: 0.9554885032086222 and parameters: {'iterations': 1202, 'depth': 5, 'learning_rate': 0.0560115410010361, 'l2_leaf_reg': 0.605032507417015, 'border_count': 192}. Best is trial 14 with value: 0.9554885032086222.[0m


Best trial: 14. Best value: 0.955489:  53%|█████▎    | 16/30 [4:31:48<4:41:03, 1204.53s/it]

[32m[I 2026-02-17 15:19:09,877][0m Trial 15 finished with value: 0.9552783379865046 and parameters: {'iterations': 1191, 'depth': 8, 'learning_rate': 0.032282704449284244, 'l2_leaf_reg': 0.5185707229311102, 'border_count': 177}. Best is trial 14 with value: 0.9554885032086222.[0m


Best trial: 14. Best value: 0.955489:  57%|█████▋    | 17/30 [4:58:08<4:45:29, 1317.66s/it]

[32m[I 2026-02-17 15:45:30,634][0m Trial 16 finished with value: 0.9554555719338171 and parameters: {'iterations': 1495, 'depth': 5, 'learning_rate': 0.07337890860489066, 'l2_leaf_reg': 0.11467025394364068, 'border_count': 99}. Best is trial 14 with value: 0.9554885032086222.[0m


Best trial: 14. Best value: 0.955489:  60%|██████    | 18/30 [5:22:54<4:33:39, 1368.26s/it]

[32m[I 2026-02-17 16:10:16,673][0m Trial 17 finished with value: 0.9554740456341065 and parameters: {'iterations': 1418, 'depth': 5, 'learning_rate': 0.07270229668101531, 'l2_leaf_reg': 0.06604074170302145, 'border_count': 103}. Best is trial 14 with value: 0.9554885032086222.[0m


Best trial: 14. Best value: 0.955489:  63%|██████▎   | 19/30 [5:39:19<3:49:43, 1253.06s/it]

[32m[I 2026-02-17 16:26:41,372][0m Trial 18 finished with value: 0.9553597638826966 and parameters: {'iterations': 1163, 'depth': 4, 'learning_rate': 0.037548374142747816, 'l2_leaf_reg': 0.060243923567796835, 'border_count': 102}. Best is trial 14 with value: 0.9554885032086222.[0m


Best trial: 14. Best value: 0.955489:  67%|██████▋   | 20/30 [6:04:51<3:42:47, 1336.72s/it]

[32m[I 2026-02-17 16:52:13,090][0m Trial 19 finished with value: 0.955417243539886 and parameters: {'iterations': 1467, 'depth': 5, 'learning_rate': 0.07178305289481672, 'l2_leaf_reg': 0.026226232744903342, 'border_count': 74}. Best is trial 14 with value: 0.9554885032086222.[0m


Best trial: 14. Best value: 0.955489:  70%|███████   | 21/30 [6:37:06<3:47:28, 1516.46s/it]

[32m[I 2026-02-17 17:24:28,609][0m Trial 20 finished with value: 0.9550686483120794 and parameters: {'iterations': 1319, 'depth': 8, 'learning_rate': 0.01365336549055483, 'l2_leaf_reg': 0.8545833472850476, 'border_count': 205}. Best is trial 14 with value: 0.9554885032086222.[0m


Best trial: 14. Best value: 0.955489:  73%|███████▎  | 22/30 [7:03:23<3:24:35, 1534.38s/it]

[32m[I 2026-02-17 17:50:44,783][0m Trial 21 finished with value: 0.9554565477509882 and parameters: {'iterations': 1483, 'depth': 5, 'learning_rate': 0.07688006021345933, 'l2_leaf_reg': 0.1358626441118039, 'border_count': 107}. Best is trial 14 with value: 0.9554885032086222.[0m


Best trial: 14. Best value: 0.955489:  77%|███████▋  | 23/30 [7:28:00<2:57:01, 1517.29s/it]

[32m[I 2026-02-17 18:15:22,193][0m Trial 22 finished with value: 0.9554832705452802 and parameters: {'iterations': 1402, 'depth': 5, 'learning_rate': 0.06759397434704542, 'l2_leaf_reg': 0.14312080279206596, 'border_count': 116}. Best is trial 14 with value: 0.9554885032086222.[0m


Best trial: 14. Best value: 0.955489:  80%|████████  | 24/30 [7:44:01<2:15:01, 1350.23s/it]

[32m[I 2026-02-17 18:31:22,734][0m Trial 23 finished with value: 0.9553938508289225 and parameters: {'iterations': 1120, 'depth': 4, 'learning_rate': 0.04218389667438227, 'l2_leaf_reg': 0.20313361646129147, 'border_count': 118}. Best is trial 14 with value: 0.9554885032086222.[0m


Best trial: 14. Best value: 0.955489:  83%|████████▎ | 25/30 [8:10:55<1:59:08, 1429.63s/it]

[32m[I 2026-02-17 18:58:17,581][0m Trial 24 finished with value: 0.9553959647387702 and parameters: {'iterations': 1379, 'depth': 6, 'learning_rate': 0.028967254320679897, 'l2_leaf_reg': 0.054986445466700735, 'border_count': 79}. Best is trial 14 with value: 0.9554885032086222.[0m


Best trial: 25. Best value: 0.955495:  87%|████████▋ | 26/30 [8:32:36<1:32:44, 1391.04s/it]

[32m[I 2026-02-17 19:19:58,582][0m Trial 25 finished with value: 0.9554952344394854 and parameters: {'iterations': 1233, 'depth': 5, 'learning_rate': 0.05563228358640898, 'l2_leaf_reg': 1.2388924496891942, 'border_count': 157}. Best is trial 25 with value: 0.9554952344394854.[0m


Best trial: 25. Best value: 0.955495:  90%|█████████ | 27/30 [8:50:56<1:05:11, 1303.68s/it]

[32m[I 2026-02-17 19:38:18,457][0m Trial 26 finished with value: 0.9554779877422085 and parameters: {'iterations': 1251, 'depth': 4, 'learning_rate': 0.052344595767393975, 'l2_leaf_reg': 1.070197932555292, 'border_count': 159}. Best is trial 25 with value: 0.9554952344394854.[0m


Best trial: 25. Best value: 0.955495:  93%|█████████▎| 28/30 [9:12:58<43:38, 1309.03s/it]  

[32m[I 2026-02-17 20:00:19,976][0m Trial 27 finished with value: 0.955448777300964 and parameters: {'iterations': 1082, 'depth': 6, 'learning_rate': 0.06098318978340682, 'l2_leaf_reg': 0.9712030715169957, 'border_count': 159}. Best is trial 25 with value: 0.9554952344394854.[0m


Best trial: 25. Best value: 0.955495:  97%|█████████▋| 29/30 [9:35:14<21:57, 1317.10s/it]

[32m[I 2026-02-17 20:22:35,891][0m Trial 28 finished with value: 0.9554660586253905 and parameters: {'iterations': 1290, 'depth': 5, 'learning_rate': 0.040702525672882336, 'l2_leaf_reg': 0.29951267009249116, 'border_count': 178}. Best is trial 25 with value: 0.9554952344394854.[0m


Best trial: 25. Best value: 0.955495: 100%|██████████| 30/30 [9:56:35<00:00, 1193.18s/it]

[32m[I 2026-02-17 20:43:57,133][0m Trial 29 finished with value: 0.9551822455028642 and parameters: {'iterations': 979, 'depth': 7, 'learning_rate': 0.02268785798960501, 'l2_leaf_reg': 4.0254686806132645, 'border_count': 149}. Best is trial 25 with value: 0.9554952344394854.[0m



