# 🧪 Optuna

### ⚙️ Importing Libraries & Environment Setup

In [1]:
import numpy as np
import optuna
import pandas as pd
import xgboost as xgb
from colorama import Fore, Style
from numpy.typing import NDArray
from sklearn.datasets import load_iris
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
)
from sklearn.model_selection import StratifiedKFold, cross_val_score, train_test_split

In [2]:
%matplotlib inline

pd.set_option("display.width", 150)
pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", None)

### 🧰 Utility Functions

In [3]:
def evaluate_classification(
    y_pred: NDArray[np.int64], y_true: NDArray[np.float64]
) -> pd.DataFrame:
    """Evaluate multiple classification model predictions on the provided dataset.

    Args:
        y_pred (NDArray[np.float64]): Predicted outputs.
        y_true (NDArray[np.float64]): True target values of shape (n_samples,).

    Returns:
        pd.DataFrame: DataFrame containing classification metrics — Accuracy, Precision,
            Recall, and F1-score. Кows correspond to the metric names.
    """
    metrics = {
        "Accuracy": accuracy_score(y_true, y_pred),
        "Precision": precision_score(y_true, y_pred, average="weighted"),
        "Recall": recall_score(y_true, y_pred, average="weighted"),
        "F1-score": f1_score(y_true, y_pred, average="weighted"),
    }

    return pd.DataFrame.from_dict(metrics, orient="index", columns=["score"]).T

### 📥 Loading the Dataset

In [4]:
# Generate dataset
X, y = load_iris(return_X_y=True)

# Split data into training and testing sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

### 🏋️‍♂️ Model Training

In [5]:
class Objective:
    """Objective function class for Optuna hyperparameter optimization.

    This class wraps training and evaluation logic to be used in Optuna's optimization
    loop. It supports randomized cross-validation and logs the cross-validated accuracy
    for each trial.
    """

    def __init__(
        self,
        X: NDArray[np.float64],
        y: NDArray[np.float64],
        random_state: int | None = None,
    ) -> None:
        """Initializes the Objective class.

        Args:
            X (NDArray[np.float64]): Feature matrix.
            y (NDArray[np.float64]): Target vector.
            random_state (int | None): Random seed for reproducibility. Default is None.
        """
        self.X = X
        self.y = y
        self.random_state = random_state

    def __call__(self, trial: optuna.trial.Trial) -> float:
        """Objective function for Optuna study to optimize hyperparameters.

        Parameters:
            trial (Trial): A single trial object that suggests hyperparameter values.

        Returns:
            float: Cross-validated accuracy score (mean of folds).
        """
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 10, 300),
            "max_depth": trial.suggest_int("max_depth", 3, 12),
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
            "subsample": trial.suggest_float("subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
            "eval_metric": "logloss",
            "random_state": self.random_state,
        }

        model = xgb.XGBClassifier(**params)

        cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=self.random_state)
        scores = cross_val_score(model, self.X, self.y, cv=cv, scoring="accuracy")
        score = np.median(scores)

        trial.set_user_attr("cv_accuracy", score)
        return score


study = optuna.create_study(
    direction="maximize", study_name="xgb", sampler=optuna.samplers.TPESampler(seed=42)
)

objective = Objective(X_train, y_train, random_state=42)
study.optimize(objective, n_trials=50)

[I 2025-08-01 13:46:25,899] A new study created in memory with name: xgb
[I 2025-08-01 13:46:26,231] Trial 0 finished with value: 0.9583333333333334 and parameters: {'n_estimators': 118, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.7993292420985183, 'colsample_bytree': 0.5780093202212182}. Best is trial 0 with value: 0.9583333333333334.
[I 2025-08-01 13:46:26,365] Trial 1 finished with value: 0.9583333333333334 and parameters: {'n_estimators': 55, 'max_depth': 3, 'learning_rate': 0.19030368381735815, 'subsample': 0.8005575058716043, 'colsample_bytree': 0.8540362888980227}. Best is trial 0 with value: 0.9583333333333334.
[I 2025-08-01 13:46:26,437] Trial 2 finished with value: 0.9583333333333334 and parameters: {'n_estimators': 15, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.6061695553391381, 'colsample_bytree': 0.5909124836035503}. Best is trial 0 with value: 0.9583333333333334.
[I 2025-08-01 13:46:26,629] Trial 3 finished with value: 0

In [6]:
model = xgb.XGBClassifier(**study.best_params, eval_metric="logloss", random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_val)

### 📊 Comparing Algorithm Versions

In [7]:
print(f"{Fore.GREEN}Best hyperparameters found:{Style.RESET_ALL}", study.best_params)
print(f"{Fore.YELLOW}Best CV accuracy:{Style.RESET_ALL}", study.best_value)

[32mBest hyperparameters found:[0m {'n_estimators': 118, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.7993292420985183, 'colsample_bytree': 0.5780093202212182}
[33mBest CV accuracy:[0m 0.9583333333333334


In [8]:
evaluate_classification(y_pred, y_val)

Unnamed: 0,Accuracy,Precision,Recall,F1-score
score,1.0,1.0,1.0,1.0


In [9]:
optuna.visualization.plot_optimization_history(study).show()
optuna.visualization.plot_param_importances(study).show()
optuna.visualization.plot_parallel_coordinate(study).show()