In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from skopt import BayesSearchCV
from skopt.space import Integer, Real

from xgboost import XGBClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
import numpy as np

In [2]:
data = load_breast_cancer()
X, y = data.data, data.target

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
class CustomEarlyStopper:
    def __init__(self, no_improvement_rounds=3, trend_window=3, maximize=True):
        """
        no_improvement_rounds: patience для раннього стопу
        trend_window: кількість останніх ітерацій для оцінки тренду
        maximize: True якщо більший score = краще, False якщо менший score = краще
        """
        self.no_improvement_rounds = no_improvement_rounds
        self.trend_window = trend_window
        self.maximize = maximize

        self.best_score = -np.inf if maximize else np.inf
        self.rounds_without_improvement = 0
        self.iteration = 1
        self.current_params_dict = {}

    def __call__(self, result):
        print(f"Iteration <{self.iteration}> \n")
        raw_score = result.func_vals[-1]
        current_score = -raw_score if self.maximize else raw_score
        improved = (current_score > self.best_score) if self.maximize else (current_score < self.best_score)

        if improved:
            current_params = result.x_iters[-1]
            param_names = [dim.name for dim in result.space.dimensions]
            self.current_params_dict = dict(zip(param_names, current_params))

            print(
                f"New best score: {current_score:.5f} "
                f"(previous: {self.best_score:.5f})"
                f"\nParams: {self.current_params_dict}"
            )

            self.best_score = current_score
            self.rounds_without_improvement = 0

        else:
            self.rounds_without_improvement += 1
            print(
                f"No improvement for {self.rounds_without_improvement} rounds | "
                f"current: {current_score:.5f}, best: {self.best_score:.5f}"
            )

        if len(result.func_vals) >= self.trend_window:
            recent_raw = result.func_vals[-self.trend_window:]
            recent = [-v for v in recent_raw] if self.maximize else recent_raw
            slope = recent[-1] - recent[0]
            std = np.std(recent)
            print(
                f"Recent trend | window={self.trend_window}, "
                f"slope={slope:.5f}, std={std:.5f}"
            )

        print("-" * 50)
        print()

        # Early stopping
        if self.rounds_without_improvement >= self.no_improvement_rounds:
            print(
                f"Early stopping triggered. "
                f"Best score: {self.best_score:.5f}"
                f"\nParams: {self.current_params_dict}"
            )
            return True

        self.iteration += 1

        return False

In [4]:
param_dist = {
    'n_estimators': Integer(32, 128),
    'max_depth': Integer(2, 10),   
    'min_child_weight': Integer(1, 5),
    'subsample': Real(0.1, 0.5),         
    'colsample_bytree': Real(0.1, 0.9),  
    'gamma': Real(0, 0.5),
    'reg_alpha': [0, 0.01, 0.1, 1],
    'reg_lambda': [1, 1.5, 2, 3],
    'learning_rate': Real(0.001, 0.1),
}

cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
model = XGBClassifier()

In [5]:
opt = BayesSearchCV(
    model,
    param_dist,
    n_iter=20,
    cv=cv,
    n_jobs=-1,
    scoring='roc_auc',
)

early_stop = CustomEarlyStopper(no_improvement_rounds=3)
opt.fit(X_train, y_train, callback=early_stop)
model = opt.best_estimator_

Iteration <1> 

New best score: 0.98377 (previous: -inf)
Params: {'colsample_bytree': 0.714238960363044, 'gamma': 0.16812643556300919, 'learning_rate': 0.04057742788099126, 'max_depth': 6, 'min_child_weight': 2, 'n_estimators': 48, 'reg_alpha': 0.01, 'reg_lambda': 1.5, 'subsample': 0.4313818826282304}
--------------------------------------------------

Iteration <2> 

New best score: 0.98539 (previous: 0.98377)
Params: {'colsample_bytree': 0.331702685709823, 'gamma': 0.19618519935822737, 'learning_rate': 0.0034907999885650538, 'max_depth': 8, 'min_child_weight': 1, 'n_estimators': 71, 'reg_alpha': 0.1, 'reg_lambda': 1.5, 'subsample': 0.41675432177458704}
--------------------------------------------------

Iteration <3> 

No improvement for 1 rounds | current: 0.97717, best: 0.98539
Recent trend | window=3, slope=-0.00660, std=0.00356
--------------------------------------------------

Iteration <4> 

New best score: 0.98628 (previous: 0.98539)
Params: {'colsample_bytree': 0.81806906009

In [6]:
y_pred = model.predict(X_test)
print("Точність на тесті:", accuracy_score(y_test, y_pred))

Точність на тесті: 0.9736842105263158
