In [2]:
import optuna
import xgboost as xgb
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

# Load the dataset
data, target = load_breast_cancer(return_X_y=True)

# Split the dataset into training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(data, target, test_size=0.2, random_state=42)

# Define the objective function, the metric to be minimized (e.g., AUC for classification problems)
def objective(trial):
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'auc',
        'verbosity': 0,
        'booster': 'gbtree',
        'lambda': trial.suggest_float('lambda', 1e-3, 10.0, log=True),
        'alpha': trial.suggest_float('alpha', 1e-3, 10.0, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 9, log=True),
        'eta': trial.suggest_float('eta', 0.01, 0.1),
        'gamma': trial.suggest_float('gamma', 1e-8, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.1, 1.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
    }
  
    model = xgb.train(params, xgb.DMatrix(X_train, y_train))
    preds = model.predict(xgb.DMatrix(X_valid))
    score = roc_auc_score(y_valid, preds)
    return score

# Create an Optuna study object
study = optuna.create_study(direction='maximize')

# Run the automatic tuning process
study.optimize(objective, n_trials=100)

# Output the best hyperparameters and score
print("Best Parameters: ", study.best_params)
print("Best Score: ", study.best_value)

[I 2024-06-05 17:59:51,890] A new study created in memory with name: no-name-a76267a0-1dea-4707-819b-30be69bb68b5
[I 2024-06-05 17:59:51,904] Trial 0 finished with value: 0.9859154929577464 and parameters: {'lambda': 2.379196915771754, 'alpha': 1.8137974516250246, 'max_depth': 5, 'eta': 0.06080702239279447, 'gamma': 0.41114545354167015, 'colsample_bytree': 0.8990296077939444, 'min_child_weight': 4}. Best is trial 0 with value: 0.9859154929577464.
[I 2024-06-05 17:59:51,914] Trial 1 finished with value: 0.9927939731411726 and parameters: {'lambda': 0.002006795250125112, 'alpha': 3.7305155174227553, 'max_depth': 6, 'eta': 0.06591160012935034, 'gamma': 0.2650773927554121, 'colsample_bytree': 0.5632708938155921, 'min_child_weight': 8}. Best is trial 1 with value: 0.9927939731411726.
[I 2024-06-05 17:59:51,925] Trial 2 finished with value: 0.9867343596462497 and parameters: {'lambda': 0.0012471122632641992, 'alpha': 4.942710192159769, 'max_depth': 7, 'eta': 0.07309453797811422, 'gamma': 0.6

Best Parameters:  {'lambda': 0.00863409494854274, 'alpha': 0.4845941400093232, 'max_depth': 9, 'eta': 0.039158151866819466, 'gamma': 0.5032348647739832, 'colsample_bytree': 0.17105796693188846, 'min_child_weight': 10}
Best Score:  0.99737962659679
