In [1]:
import optuna
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
data = load_breast_cancer()
X, y = data.data, data.target

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# optuna

In [3]:
import optuna
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.metrics import accuracy_score

In [4]:
def objective(trial):
    params = {
    'n_estimators': trial.suggest_int("n_estimators", 10, 300),
    'max_depth': trial.suggest_int("max_depth", 2, 32, log=True),
    'min_samples_split': trial.suggest_float("min_samples_split", 0.1, 1.0),
    'min_samples_leaf': trial.suggest_int("min_samples_leaf", 1, 10)
    }
    
    clf = RandomForestClassifier(**params, random_state=42, n_jobs=-1)

    cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    score = cross_val_score(clf, X_train, y_train, cv=cv, scoring="roc_auc")
    return score.mean()

In [5]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

print("Найкращі параметри:", study.best_params)

[I 2025-04-11 14:08:12,160] A new study created in memory with name: no-name-629cdb5a-42c8-4008-baaa-449fac1521f5
[I 2025-04-11 14:08:12,576] Trial 0 finished with value: 0.5 and parameters: {'n_estimators': 198, 'max_depth': 16, 'min_samples_split': 0.7256204777820796, 'min_samples_leaf': 8}. Best is trial 0 with value: 0.5.
[I 2025-04-11 14:08:12,975] Trial 1 finished with value: 0.9825807322582772 and parameters: {'n_estimators': 198, 'max_depth': 13, 'min_samples_split': 0.26517653030594773, 'min_samples_leaf': 4}. Best is trial 1 with value: 0.9825807322582772.
[I 2025-04-11 14:08:13,529] Trial 2 finished with value: 0.5 and parameters: {'n_estimators': 282, 'max_depth': 20, 'min_samples_split': 0.8005228285509849, 'min_samples_leaf': 7}. Best is trial 1 with value: 0.9825807322582772.
[I 2025-04-11 14:08:14,067] Trial 3 finished with value: 0.9840713694873148 and parameters: {'n_estimators': 280, 'max_depth': 4, 'min_samples_split': 0.16966076454190593, 'min_samples_leaf': 5}. Be

Найкращі параметри: {'n_estimators': 66, 'max_depth': 10, 'min_samples_split': 0.153791452225133, 'min_samples_leaf': 3}


In [6]:
best_clf = RandomForestClassifier(**study.best_params, random_state=42)
best_clf.fit(X_train, y_train)
y_pred = best_clf.predict(X_test)

print("Точність на тесті:", accuracy_score(y_test, y_pred))

Точність на тесті: 0.9649122807017544


# Bayes

In [None]:
from skopt import BayesSearchCV
from skopt.space import Integer, Real

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

In [13]:
param_dist = {
    'n_estimators': Integer(10, 300),
    'max_depth': Integer(2, 32),
    'min_samples_split': Real(0.1, 1.0),
    'min_samples_leaf': Integer(1, 10)
}

cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
model = RandomForestClassifier()

In [14]:
opt = BayesSearchCV(
    model,
    param_dist,
    n_iter=20,
    cv=cv,
    n_jobs=-1,
    scoring='roc_auc',
)

opt.fit(X_train, y_train)
model = opt.best_estimator_

In [15]:
y_pred = model.predict(X_test)
print("Точність на тесті:", accuracy_score(y_test, y_pred))

Точність на тесті: 0.9649122807017544
