<a href="https://colab.research.google.com/github/Imrantipu/-serviceReviewServer/blob/main/optuna_basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.2.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.1-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.8-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.2.0-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.4/383.4 kB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.1-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.6/233.6 kB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.8-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: M

In [2]:
# Import necessary libraries
import optuna
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the Pima Indian Diabetes dataset from sklearn
# Note: Scikit-learn's built-in 'load_diabetes' is a regression dataset.
# We will load the actual diabetes dataset from an external source
import pandas as pd

# Load the Pima Indian Diabetes dataset (from UCI repository)
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI',
           'DiabetesPedigreeFunction', 'Age', 'Outcome']

# Load the dataset
df = pd.read_csv(url, names=columns)

df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
import numpy as np

# Replace zero values with NaN in columns where zero is not a valid value
cols_with_missing_vals = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']
df[cols_with_missing_vals] = df[cols_with_missing_vals].replace(0, np.nan)

# Impute the missing values with the mean of the respective column
df.fillna(df.mean(), inplace=True)

# Check if there are any remaining missing values
print(df.isnull().sum())


Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64


In [4]:
# Split into features (X) and target (y)
X = df.drop('Outcome', axis=1)
y = df['Outcome']

# Split data into training and test sets (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Optional: Scale the data for better model performance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Check the shape of the data
print(f'Training set shape: {X_train.shape}')
print(f'Test set shape: {X_test.shape}')

Training set shape: (537, 8)
Test set shape: (231, 8)


In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

# Define the objective function
def objective(trial):
    # Suggest values for the hyperparameters
    n_estimators = trial.suggest_int('n_estimators', 50, 200)
    max_depth = trial.suggest_int('max_depth', 3, 20)

    # Create the RandomForestClassifier with suggested hyperparameters
    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        random_state=42
    )

    # Perform 3-fold cross-validation and calculate accuracy
    score = cross_val_score(model, X_train, y_train, cv=3, scoring='accuracy').mean()

    return score  # Return the accuracy score for Optuna to maximize

In [6]:
# Create a study object and optimize the objective function
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler())  # We aim to maximize accuracy insted of .TPESampler we can use .RandomSampler() for other sampling
study.optimize(objective, n_trials=50)  # Run 50 trials to find the best hyperparameters

[I 2025-01-31 08:23:23,823] A new study created in memory with name: no-name-f053c8a6-fa04-477e-badb-6e4acb970fad
[I 2025-01-31 08:23:26,019] Trial 0 finished with value: 0.7635009310986964 and parameters: {'n_estimators': 115, 'max_depth': 9}. Best is trial 0 with value: 0.7635009310986964.
[I 2025-01-31 08:23:26,829] Trial 1 finished with value: 0.7560521415270017 and parameters: {'n_estimators': 101, 'max_depth': 3}. Best is trial 0 with value: 0.7635009310986964.
[I 2025-01-31 08:23:27,753] Trial 2 finished with value: 0.7541899441340782 and parameters: {'n_estimators': 179, 'max_depth': 3}. Best is trial 0 with value: 0.7635009310986964.
[I 2025-01-31 08:23:28,693] Trial 3 finished with value: 0.7765363128491621 and parameters: {'n_estimators': 168, 'max_depth': 16}. Best is trial 3 with value: 0.7765363128491621.
[I 2025-01-31 08:23:29,366] Trial 4 finished with value: 0.7597765363128491 and parameters: {'n_estimators': 121, 'max_depth': 9}. Best is trial 3 with value: 0.77653631

In [7]:
# Print the best result
print(f'Best trial accuracy: {study.best_trial.value}')
print(f'Best hyperparameters: {study.best_trial.params}')

Best trial accuracy: 0.7821229050279329
Best hyperparameters: {'n_estimators': 119, 'max_depth': 19}


In [8]:
from sklearn.metrics import accuracy_score

# Train a RandomForestClassifier using the best hyperparameters from Optuna
best_model = RandomForestClassifier(**study.best_trial.params, random_state=42)

# Fit the model to the training data
best_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = best_model.predict(X_test)

# Calculate the accuracy on the test set
test_accuracy = accuracy_score(y_test, y_pred)

# Print the test accuracy
print(f'Test Accuracy with best hyperparameters: {test_accuracy:.2f}')


Test Accuracy with best hyperparameters: 0.74


# Samplers in optuna

In [9]:
search_space = {
    'n_estimators': [50, 100, 150, 200],
    'max_depth': [5, 10, 15, 20]
}

In [10]:
# Create a study and optimize it using GridSampler
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.GridSampler(search_space))
study.optimize(objective)

[I 2025-01-31 08:41:47,858] A new study created in memory with name: no-name-644009cf-03ab-4bff-8cee-16d31ddbb3cf
[I 2025-01-31 08:41:48,388] Trial 0 finished with value: 0.7690875232774674 and parameters: {'n_estimators': 100, 'max_depth': 5}. Best is trial 0 with value: 0.7690875232774674.
[I 2025-01-31 08:41:49,227] Trial 1 finished with value: 0.7672253258845437 and parameters: {'n_estimators': 150, 'max_depth': 10}. Best is trial 0 with value: 0.7690875232774674.
[I 2025-01-31 08:41:49,525] Trial 2 finished with value: 0.7728119180633147 and parameters: {'n_estimators': 50, 'max_depth': 15}. Best is trial 2 with value: 0.7728119180633147.
[I 2025-01-31 08:41:50,107] Trial 3 finished with value: 0.7653631284916201 and parameters: {'n_estimators': 100, 'max_depth': 15}. Best is trial 2 with value: 0.7728119180633147.
[I 2025-01-31 08:41:50,668] Trial 4 finished with value: 0.7690875232774674 and parameters: {'n_estimators': 100, 'max_depth': 20}. Best is trial 2 with value: 0.772811

In [11]:
# Print the best result
print(f'Best trial accuracy: {study.best_trial.value}')
print(f'Best hyperparameters: {study.best_trial.params}')

Best trial accuracy: 0.7746741154562384
Best hyperparameters: {'n_estimators': 50, 'max_depth': 5}


In [12]:
from sklearn.metrics import accuracy_score

# Train a RandomForestClassifier using the best hyperparameters from Optuna
best_model = RandomForestClassifier(**study.best_trial.params, random_state=42)

# Fit the model to the training data
best_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = best_model.predict(X_test)

# Calculate the accuracy on the test set
test_accuracy = accuracy_score(y_test, y_pred)

# Print the test accuracy
print(f'Test Accuracy with best hyperparameters: {test_accuracy:.2f}')

Test Accuracy with best hyperparameters: 0.74


# Optuna visualization

In [13]:
# For visualizations
from optuna.visualization import plot_optimization_history, plot_parallel_coordinate, plot_slice, plot_contour, plot_param_importances

In [14]:
# 1. Optimization History
plot_optimization_history(study).show()

In [15]:
# 2. Parallel Coordinates Plot
plot_parallel_coordinate(study).show()

In [16]:
# 3. Slice Plot
plot_slice(study).show()

In [17]:
# 4. Contour Plot
plot_contour(study).show()

In [18]:
# 5. Hyperparameter Importance
plot_param_importances(study).show()

#Optimization Multiple ML Models

In [19]:
# Importing the required libraries
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC

In [20]:
# Define the objective function for Optuna
def objective(trial):
    # Choose the algorithm to tune
    classifier_name = trial.suggest_categorical('classifier', ['SVM', 'RandomForest', 'GradientBoosting'])

    if classifier_name == 'SVM':
        # SVM hyperparameters
        c = trial.suggest_float('C', 0.1, 100, log=True)
        kernel = trial.suggest_categorical('kernel', ['linear', 'rbf', 'poly', 'sigmoid'])
        gamma = trial.suggest_categorical('gamma', ['scale', 'auto'])

        model = SVC(C=c, kernel=kernel, gamma=gamma, random_state=42)

    elif classifier_name == 'RandomForest':
        # Random Forest hyperparameters
        n_estimators = trial.suggest_int('n_estimators', 50, 300)
        max_depth = trial.suggest_int('max_depth', 3, 20)
        min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
        min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 10)
        bootstrap = trial.suggest_categorical('bootstrap', [True, False])

        model = RandomForestClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            min_samples_split=min_samples_split,
            min_samples_leaf=min_samples_leaf,
            bootstrap=bootstrap,
            random_state=42
        )

    elif classifier_name == 'GradientBoosting':
        # Gradient Boosting hyperparameters
        n_estimators = trial.suggest_int('n_estimators', 50, 300)
        learning_rate = trial.suggest_float('learning_rate', 0.01, 0.3, log=True)
        max_depth = trial.suggest_int('max_depth', 3, 20)
        min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
        min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 10)

        model = GradientBoostingClassifier(
            n_estimators=n_estimators,
            learning_rate=learning_rate,
            max_depth=max_depth,
            min_samples_split=min_samples_split,
            min_samples_leaf=min_samples_leaf,
            random_state=42
        )

    # Perform cross-validation and return the mean accuracy
    score = cross_val_score(model, X_train, y_train, cv=3, scoring='accuracy').mean()
    return score

In [21]:
# Create a study and optimize it using CmaEsSampler
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

[I 2025-01-31 08:58:17,053] A new study created in memory with name: no-name-d2eb390c-194a-4c04-b3de-fd936d1ae858
[I 2025-01-31 08:58:18,189] Trial 0 finished with value: 0.750465549348231 and parameters: {'classifier': 'GradientBoosting', 'n_estimators': 73, 'learning_rate': 0.19941175183688192, 'max_depth': 8, 'min_samples_split': 3, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.750465549348231.
[I 2025-01-31 08:58:18,224] Trial 1 finished with value: 0.7374301675977654 and parameters: {'classifier': 'SVM', 'C': 2.695816639577965, 'kernel': 'poly', 'gamma': 'scale'}. Best is trial 0 with value: 0.750465549348231.
[I 2025-01-31 08:58:18,255] Trial 2 finished with value: 0.7374301675977654 and parameters: {'classifier': 'SVM', 'C': 0.38248374654782724, 'kernel': 'poly', 'gamma': 'scale'}. Best is trial 0 with value: 0.750465549348231.
[I 2025-01-31 08:58:18,770] Trial 3 finished with value: 0.7728119180633147 and parameters: {'classifier': 'RandomForest', 'n_estimators': 117, '

In [22]:
# Retrieve the best trial
best_trial = study.best_trial
print("Best trial parameters:", best_trial.params)
print("Best trial accuracy:", best_trial.value)

Best trial parameters: {'classifier': 'SVM', 'C': 0.1204798367776354, 'kernel': 'linear', 'gamma': 'auto'}
Best trial accuracy: 0.7895716945996275


In [23]:
study.trials_dataframe()

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_C,params_bootstrap,params_classifier,params_gamma,params_kernel,params_learning_rate,params_max_depth,params_min_samples_leaf,params_min_samples_split,params_n_estimators,state
0,0,0.750466,2025-01-31 08:58:17.056356,2025-01-31 08:58:18.189099,0 days 00:00:01.132743,,,GradientBoosting,,,0.199412,8.0,2.0,3.0,73.0,COMPLETE
1,1,0.737430,2025-01-31 08:58:18.190909,2025-01-31 08:58:18.224697,0 days 00:00:00.033788,2.695817,,SVM,scale,poly,,,,,,COMPLETE
2,2,0.737430,2025-01-31 08:58:18.226440,2025-01-31 08:58:18.254871,0 days 00:00:00.028431,0.382484,,SVM,scale,poly,,,,,,COMPLETE
3,3,0.772812,2025-01-31 08:58:18.256749,2025-01-31 08:58:18.769956,0 days 00:00:00.513207,,False,RandomForest,,,,15.0,8.0,7.0,117.0,COMPLETE
4,4,0.785847,2025-01-31 08:58:18.771661,2025-01-31 08:58:18.939934,0 days 00:00:00.168273,31.709567,,SVM,scale,linear,,,,,,COMPLETE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,95,0.785847,2025-01-31 08:59:06.666620,2025-01-31 08:59:06.701282,0 days 00:00:00.034662,0.223402,,SVM,auto,linear,,,,,,COMPLETE
96,96,0.743017,2025-01-31 08:59:06.702972,2025-01-31 08:59:06.743665,0 days 00:00:00.040693,0.119055,,SVM,auto,rbf,,,,,,COMPLETE
97,97,0.746741,2025-01-31 08:59:06.745252,2025-01-31 08:59:09.858883,0 days 00:00:03.113631,,,GradientBoosting,,,0.020902,16.0,3.0,3.0,146.0,COMPLETE
98,98,0.789572,2025-01-31 08:59:09.860608,2025-01-31 08:59:09.897229,0 days 00:00:00.036621,0.147951,,SVM,auto,linear,,,,,,COMPLETE


In [24]:
study.trials_dataframe()['params_classifier'].value_counts()

Unnamed: 0_level_0,count
params_classifier,Unnamed: 1_level_1
SVM,79
GradientBoosting,12
RandomForest,9


In [25]:
study.trials_dataframe().groupby('params_classifier')['value'].mean()

Unnamed: 0_level_0,value
params_classifier,Unnamed: 1_level_1
GradientBoosting,0.747207
RandomForest,0.761639
SVM,0.77491


In [26]:
# 1. Optimization History
plot_optimization_history(study).show()

In [27]:
# 3. Slice Plot
plot_slice(study).show()

In [28]:
# 5. Hyperparameter Importance
plot_param_importances(study).show()

In [30]:
!pip install optuna-integration[xgboost]
import optuna
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
import numpy as np

# Load the Iris dataset
X, y = load_iris(return_X_y=True)

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the objective function for XGBoost
def objective(trial):
    # Hyperparameter search space
    param = {
        'verbosity': 0,
        'objective': 'multi:softprob',
        'num_class': 3,
        'eval_metric': 'mlogloss',  # Ensure that the eval_metric is specified here
        'booster': 'gbtree',
        'lambda': trial.suggest_float('lambda', 1e-8, 1.0, log=True),
        'alpha': trial.suggest_float('alpha', 1e-8, 1.0, log=True),
        'eta': trial.suggest_float('eta', 0.01, 0.3),
        'gamma': trial.suggest_float('gamma', 1e-8, 1.0, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 9),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.4, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.4, 1.0),
        'n_estimators': 300,
    }

    # Create DMatrix for XGBoost
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dtest = xgb.DMatrix(X_test, label=y_test)

    # Define a pruning callback based on evaluation metrics
    pruning_callback = optuna.integration.XGBoostPruningCallback(trial, "eval-mlogloss")  # Match the metric name in the evals list

    # Train the model
    bst = xgb.train(
        param,
        dtrain,
        num_boost_round=300,
        evals=[(dtrain, "train"), (dtest, "eval")],  # Ensure the eval datasets and names are specified
        early_stopping_rounds=30,
        callbacks=[pruning_callback]
    )

    # Predict on the test set
    preds = bst.predict(dtest)
    best_preds = [int(np.argmax(line)) for line in preds]

    # Return accuracy as the objective value
    accuracy = accuracy_score(y_test, best_preds)
    return accuracy

# Create a study with pruning
study = optuna.create_study(direction='maximize', pruner=optuna.pruners.SuccessiveHalvingPruner())
study.optimize(objective, n_trials=50)

# Output the best trial
print(f"Best trial: {study.best_trial.params}")
print(f"Best accuracy: {study.best_value}")


Collecting optuna-integration[xgboost]
  Downloading optuna_integration-4.2.1-py3-none-any.whl.metadata (12 kB)
Downloading optuna_integration-4.2.1-py3-none-any.whl (97 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/97.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.6/97.6 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: optuna-integration
Successfully installed optuna-integration-4.2.1


[I 2025-01-31 09:12:20,197] A new study created in memory with name: no-name-97f055a5-5a4c-4561-9588-f394cb7ea03d


[0]	train-mlogloss:0.85916	eval-mlogloss:0.84934
[1]	train-mlogloss:0.62438	eval-mlogloss:0.60287
[2]	train-mlogloss:0.46796	eval-mlogloss:0.43674
[3]	train-mlogloss:0.35864	eval-mlogloss:0.32125
[4]	train-mlogloss:0.28547	eval-mlogloss:0.24397
[5]	train-mlogloss:0.22998	eval-mlogloss:0.18061
[6]	train-mlogloss:0.18965	eval-mlogloss:0.14548
[7]	train-mlogloss:0.16375	eval-mlogloss:0.12161
[8]	train-mlogloss:0.14570	eval-mlogloss:0.09886
[9]	train-mlogloss:0.13546	eval-mlogloss:0.08635
[10]	train-mlogloss:0.13243	eval-mlogloss:0.08434
[11]	train-mlogloss:0.12775	eval-mlogloss:0.07512
[12]	train-mlogloss:0.12650	eval-mlogloss:0.07591
[13]	train-mlogloss:0.12349	eval-mlogloss:0.06958
[14]	train-mlogloss:0.12235	eval-mlogloss:0.07168
[15]	train-mlogloss:0.12011	eval-mlogloss:0.06578
[16]	train-mlogloss:0.11742	eval-mlogloss:0.06224
[17]	train-mlogloss:0.11721	eval-mlogloss:0.06190
[18]	train-mlogloss:0.11586	eval-mlogloss:0.05990
[19]	train-mlogloss:0.11538	eval-mlogloss:0.05966
[20]	train

[I 2025-01-31 09:12:20,642] Trial 0 finished with value: 1.0 and parameters: {'lambda': 5.462384910216173e-06, 'alpha': 1.058782621859015e-06, 'eta': 0.2901641528553862, 'gamma': 4.0183289378553344e-08, 'max_depth': 7, 'min_child_weight': 3, 'subsample': 0.6360174671160264, 'colsample_bytree': 0.6587588732736163}. Best is trial 0 with value: 1.0.


[0]	train-mlogloss:1.08860	eval-mlogloss:1.08820
[1]	train-mlogloss:1.07578	eval-mlogloss:1.07410
[2]	train-mlogloss:1.06124	eval-mlogloss:1.05896
[3]	train-mlogloss:1.04648	eval-mlogloss:1.04390
[4]	train-mlogloss:1.03259	eval-mlogloss:1.02988
[5]	train-mlogloss:1.01837	eval-mlogloss:1.01504
[6]	train-mlogloss:1.00500	eval-mlogloss:1.00116
[7]	train-mlogloss:0.99359	eval-mlogloss:0.99061
[8]	train-mlogloss:0.98075	eval-mlogloss:0.97689
[9]	train-mlogloss:0.96790	eval-mlogloss:0.96353
[10]	train-mlogloss:0.95566	eval-mlogloss:0.95103
[11]	train-mlogloss:0.94349	eval-mlogloss:0.93792
[12]	train-mlogloss:0.93550	eval-mlogloss:0.92973
[13]	train-mlogloss:0.92356	eval-mlogloss:0.91683
[14]	train-mlogloss:0.91615	eval-mlogloss:0.90886
[15]	train-mlogloss:0.90546	eval-mlogloss:0.89797
[16]	train-mlogloss:0.89554	eval-mlogloss:0.88761
[17]	train-mlogloss:0.88664	eval-mlogloss:0.87874
[18]	train-mlogloss:0.87547	eval-mlogloss:0.86723
[19]	train-mlogloss:0.86654	eval-mlogloss:0.85828
[20]	train

[I 2025-01-31 09:12:22,206] Trial 1 finished with value: 1.0 and parameters: {'lambda': 1.2156463151106723e-07, 'alpha': 8.458197364290172e-05, 'eta': 0.011494167298865321, 'gamma': 3.956554239090171e-06, 'max_depth': 7, 'min_child_weight': 4, 'subsample': 0.4456894601270849, 'colsample_bytree': 0.6868494063896915}. Best is trial 0 with value: 1.0.


[0]	train-mlogloss:1.01644	eval-mlogloss:1.02477


[I 2025-01-31 09:12:22,237] Trial 2 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:0.92997	eval-mlogloss:0.92599


[I 2025-01-31 09:12:22,267] Trial 3 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:1.00188	eval-mlogloss:1.00554
[1]	train-mlogloss:0.89492	eval-mlogloss:0.89232


[I 2025-01-31 09:12:22,294] Trial 4 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:0.80845	eval-mlogloss:0.78673


[I 2025-01-31 09:12:22,315] Trial 5 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:0.99946	eval-mlogloss:1.00241


[I 2025-01-31 09:12:22,339] Trial 6 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:1.03425	eval-mlogloss:1.02980
[1]	train-mlogloss:0.94366	eval-mlogloss:0.93494


[I 2025-01-31 09:12:22,364] Trial 7 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:1.08782	eval-mlogloss:1.08699
[1]	train-mlogloss:1.07139	eval-mlogloss:1.06990
[2]	train-mlogloss:1.05529	eval-mlogloss:1.05331
[3]	train-mlogloss:1.03923	eval-mlogloss:1.03691


[I 2025-01-31 09:12:22,394] Trial 8 pruned. Trial was pruned at iteration 4.


[0]	train-mlogloss:0.99872	eval-mlogloss:0.99087
[1]	train-mlogloss:0.92183	eval-mlogloss:0.90795


[I 2025-01-31 09:12:22,419] Trial 9 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:0.75148	eval-mlogloss:0.73339
[1]	train-mlogloss:0.54001	eval-mlogloss:0.52578


[I 2025-01-31 09:12:22,588] Trial 10 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:1.07934	eval-mlogloss:1.07916


[I 2025-01-31 09:12:22,657] Trial 11 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:0.86261	eval-mlogloss:0.85512
[1]	train-mlogloss:0.63034	eval-mlogloss:0.61070


[I 2025-01-31 09:12:22,736] Trial 12 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:0.92063	eval-mlogloss:0.90912


[I 2025-01-31 09:12:22,803] Trial 13 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:0.85670	eval-mlogloss:0.84049


[I 2025-01-31 09:12:22,873] Trial 14 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:1.02580	eval-mlogloss:1.02035


[I 2025-01-31 09:12:22,939] Trial 15 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:0.92683	eval-mlogloss:0.91452


[I 2025-01-31 09:12:23,007] Trial 16 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:0.94292	eval-mlogloss:0.94254


[I 2025-01-31 09:12:23,069] Trial 17 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:1.00532	eval-mlogloss:1.00304
[1]	train-mlogloss:0.88232	eval-mlogloss:0.87422


[I 2025-01-31 09:12:23,168] Trial 18 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:0.95955	eval-mlogloss:0.97552
[1]	train-mlogloss:0.71796	eval-mlogloss:0.72014


[I 2025-01-31 09:12:23,244] Trial 19 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:1.06309	eval-mlogloss:1.06476
[1]	train-mlogloss:1.01627	eval-mlogloss:1.01217
[2]	train-mlogloss:0.96837	eval-mlogloss:0.96337
[3]	train-mlogloss:0.91453	eval-mlogloss:0.90846


[I 2025-01-31 09:12:23,334] Trial 20 pruned. Trial was pruned at iteration 4.


[0]	train-mlogloss:1.08924	eval-mlogloss:1.08852
[1]	train-mlogloss:1.07494	eval-mlogloss:1.07364
[2]	train-mlogloss:1.06088	eval-mlogloss:1.05916
[3]	train-mlogloss:1.04682	eval-mlogloss:1.04480
[4]	train-mlogloss:1.03336	eval-mlogloss:1.03078
[5]	train-mlogloss:1.01972	eval-mlogloss:1.01613
[6]	train-mlogloss:1.00688	eval-mlogloss:1.00280
[7]	train-mlogloss:0.99411	eval-mlogloss:0.98977
[8]	train-mlogloss:0.98149	eval-mlogloss:0.97629
[9]	train-mlogloss:0.96921	eval-mlogloss:0.96345
[10]	train-mlogloss:0.95715	eval-mlogloss:0.95106
[11]	train-mlogloss:0.94520	eval-mlogloss:0.93842
[12]	train-mlogloss:0.93785	eval-mlogloss:0.93058
[13]	train-mlogloss:0.92641	eval-mlogloss:0.91823
[14]	train-mlogloss:0.91900	eval-mlogloss:0.91069
[15]	train-mlogloss:0.90920	eval-mlogloss:0.90007
[16]	train-mlogloss:0.89948	eval-mlogloss:0.88975
[17]	train-mlogloss:0.89084	eval-mlogloss:0.88134
[18]	train-mlogloss:0.87991	eval-mlogloss:0.87016
[19]	train-mlogloss:0.87117	eval-mlogloss:0.86092
[20]	train

[I 2025-01-31 09:12:25,192] Trial 21 finished with value: 1.0 and parameters: {'lambda': 2.1145853379913167e-06, 'alpha': 2.712328075328611e-07, 'eta': 0.0112867777557854, 'gamma': 0.006057724816652862, 'max_depth': 5, 'min_child_weight': 10, 'subsample': 0.8243941985590085, 'colsample_bytree': 0.5615992739085217}. Best is trial 0 with value: 1.0.


[0]	train-mlogloss:1.06876	eval-mlogloss:1.06764
[1]	train-mlogloss:1.02530	eval-mlogloss:1.02234
[2]	train-mlogloss:0.98447	eval-mlogloss:0.97941
[3]	train-mlogloss:0.94596	eval-mlogloss:0.94016
[4]	train-mlogloss:0.90925	eval-mlogloss:0.90201


[I 2025-01-31 09:12:25,294] Trial 22 pruned. Trial was pruned at iteration 4.


[0]	train-mlogloss:1.02976	eval-mlogloss:1.03141
[1]	train-mlogloss:0.94379	eval-mlogloss:0.94131


[I 2025-01-31 09:12:25,381] Trial 23 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:0.98653	eval-mlogloss:0.98498


[I 2025-01-31 09:12:25,454] Trial 24 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:1.08888	eval-mlogloss:1.08797
[1]	train-mlogloss:1.07358	eval-mlogloss:1.07198
[2]	train-mlogloss:1.05862	eval-mlogloss:1.05637
[3]	train-mlogloss:1.04336	eval-mlogloss:1.04079


[I 2025-01-31 09:12:25,549] Trial 25 pruned. Trial was pruned at iteration 4.


[0]	train-mlogloss:1.05486	eval-mlogloss:1.05424
[1]	train-mlogloss:1.00039	eval-mlogloss:0.99692


[I 2025-01-31 09:12:25,630] Trial 26 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:0.78471	eval-mlogloss:0.76362


[I 2025-01-31 09:12:25,700] Trial 27 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:1.05386	eval-mlogloss:1.05101
[1]	train-mlogloss:0.94791	eval-mlogloss:0.94004


[I 2025-01-31 09:12:25,782] Trial 28 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:0.93840	eval-mlogloss:0.94123


[I 2025-01-31 09:12:25,857] Trial 29 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:0.99427	eval-mlogloss:1.00066


[I 2025-01-31 09:12:25,930] Trial 30 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:1.08103	eval-mlogloss:1.07924
[1]	train-mlogloss:1.05359	eval-mlogloss:1.05055
[2]	train-mlogloss:1.02715	eval-mlogloss:1.02295
[3]	train-mlogloss:1.00056	eval-mlogloss:0.99580
[4]	train-mlogloss:0.97615	eval-mlogloss:0.97040


[I 2025-01-31 09:12:26,005] Trial 31 pruned. Trial was pruned at iteration 4.


[0]	train-mlogloss:1.08774	eval-mlogloss:1.08712
[1]	train-mlogloss:1.07131	eval-mlogloss:1.07001
[2]	train-mlogloss:1.05500	eval-mlogloss:1.05295
[3]	train-mlogloss:1.03891	eval-mlogloss:1.03652


[I 2025-01-31 09:12:26,079] Trial 32 pruned. Trial was pruned at iteration 4.


[0]	train-mlogloss:1.05384	eval-mlogloss:1.04819
[1]	train-mlogloss:0.98123	eval-mlogloss:0.97230


[I 2025-01-31 09:12:26,142] Trial 33 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:1.07030	eval-mlogloss:1.06742


[I 2025-01-31 09:12:26,199] Trial 34 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:1.03606	eval-mlogloss:1.04443


[I 2025-01-31 09:12:26,253] Trial 35 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:0.90136	eval-mlogloss:0.88863
[1]	train-mlogloss:0.75125	eval-mlogloss:0.72625


[I 2025-01-31 09:12:26,316] Trial 36 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:1.08871	eval-mlogloss:1.08831
[1]	train-mlogloss:1.07455	eval-mlogloss:1.07358
[2]	train-mlogloss:1.06056	eval-mlogloss:1.05871
[3]	train-mlogloss:1.04648	eval-mlogloss:1.04434
[4]	train-mlogloss:1.03305	eval-mlogloss:1.03031
[5]	train-mlogloss:1.01941	eval-mlogloss:1.01590
[6]	train-mlogloss:1.00637	eval-mlogloss:1.00257
[7]	train-mlogloss:0.99379	eval-mlogloss:0.98971
[8]	train-mlogloss:0.98139	eval-mlogloss:0.97669
[9]	train-mlogloss:0.96911	eval-mlogloss:0.96394
[10]	train-mlogloss:0.95706	eval-mlogloss:0.95153
[11]	train-mlogloss:0.94541	eval-mlogloss:0.93913
[12]	train-mlogloss:0.93741	eval-mlogloss:0.93101
[13]	train-mlogloss:0.92600	eval-mlogloss:0.91878
[14]	train-mlogloss:0.91769	eval-mlogloss:0.91076
[15]	train-mlogloss:0.90731	eval-mlogloss:0.90024
[16]	train-mlogloss:0.89739	eval-mlogloss:0.89001
[17]	train-mlogloss:0.88878	eval-mlogloss:0.88185
[18]	train-mlogloss:0.87921	eval-mlogloss:0.87231
[19]	train-mlogloss:0.87039	eval-mlogloss:0.86345
[20]	train

[I 2025-01-31 09:12:27,953] Trial 37 pruned. Trial was pruned at iteration 256.


[0]	train-mlogloss:1.06752	eval-mlogloss:1.06779
[1]	train-mlogloss:1.02757	eval-mlogloss:1.02614
[2]	train-mlogloss:0.98919	eval-mlogloss:0.98557
[3]	train-mlogloss:0.95288	eval-mlogloss:0.94836


[I 2025-01-31 09:12:28,262] Trial 38 pruned. Trial was pruned at iteration 4.


[0]	train-mlogloss:1.02252	eval-mlogloss:1.03457


[I 2025-01-31 09:12:28,441] Trial 39 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:1.03182	eval-mlogloss:1.03282


[I 2025-01-31 09:12:28,591] Trial 40 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:1.08422	eval-mlogloss:1.08385
[1]	train-mlogloss:1.06459	eval-mlogloss:1.06351
[2]	train-mlogloss:1.04512	eval-mlogloss:1.04277
[3]	train-mlogloss:1.02569	eval-mlogloss:1.02286


[I 2025-01-31 09:12:28,753] Trial 41 pruned. Trial was pruned at iteration 4.


[0]	train-mlogloss:1.07661	eval-mlogloss:1.07517


[I 2025-01-31 09:12:28,907] Trial 42 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:1.08717	eval-mlogloss:1.08671
[1]	train-mlogloss:1.07266	eval-mlogloss:1.07099
[2]	train-mlogloss:1.05611	eval-mlogloss:1.05375
[3]	train-mlogloss:1.03937	eval-mlogloss:1.03667
[4]	train-mlogloss:1.02360	eval-mlogloss:1.02075


[I 2025-01-31 09:12:29,163] Trial 43 pruned. Trial was pruned at iteration 4.


[0]	train-mlogloss:1.06638	eval-mlogloss:1.06527


[I 2025-01-31 09:12:29,251] Trial 44 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:0.92349	eval-mlogloss:0.92247


[I 2025-01-31 09:12:29,333] Trial 45 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:1.09012	eval-mlogloss:1.09022
[1]	train-mlogloss:1.08207	eval-mlogloss:1.08068
[2]	train-mlogloss:1.07260	eval-mlogloss:1.07098
[3]	train-mlogloss:1.06425	eval-mlogloss:1.06243
[4]	train-mlogloss:1.05458	eval-mlogloss:1.05325
[5]	train-mlogloss:1.04312	eval-mlogloss:1.04105
[6]	train-mlogloss:1.03364	eval-mlogloss:1.03150
[7]	train-mlogloss:1.02548	eval-mlogloss:1.02396
[8]	train-mlogloss:1.01864	eval-mlogloss:1.01743
[9]	train-mlogloss:1.00895	eval-mlogloss:1.00772
[10]	train-mlogloss:1.00080	eval-mlogloss:0.99987
[11]	train-mlogloss:0.99126	eval-mlogloss:0.98946
[12]	train-mlogloss:0.98355	eval-mlogloss:0.98146
[13]	train-mlogloss:0.97694	eval-mlogloss:0.97477
[14]	train-mlogloss:0.97223	eval-mlogloss:0.97008
[15]	train-mlogloss:0.96408	eval-mlogloss:0.96198
[16]	train-mlogloss:0.95647	eval-mlogloss:0.95386
[17]	train-mlogloss:0.94971	eval-mlogloss:0.94629
[18]	train-mlogloss:0.94058	eval-mlogloss:0.93668
[19]	train-mlogloss:0.93385	eval-mlogloss:0.92989
[20]	train

[I 2025-01-31 09:12:31,604] Trial 46 finished with value: 0.9666666666666667 and parameters: {'lambda': 1.5350534757795744e-05, 'alpha': 1.0284291012976719e-05, 'eta': 0.011690259858287864, 'gamma': 3.2736744742794236e-08, 'max_depth': 5, 'min_child_weight': 8, 'subsample': 0.40092545599762885, 'colsample_bytree': 0.59876792766147}. Best is trial 0 with value: 1.0.


[0]	train-mlogloss:1.05602	eval-mlogloss:1.05886


[I 2025-01-31 09:12:31,951] Trial 47 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:1.05318	eval-mlogloss:1.05062
[1]	train-mlogloss:1.00685	eval-mlogloss:0.99747


[I 2025-01-31 09:12:32,102] Trial 48 pruned. Trial was pruned at iteration 1.


[0]	train-mlogloss:0.89373	eval-mlogloss:0.88462


[I 2025-01-31 09:12:32,267] Trial 49 pruned. Trial was pruned at iteration 1.


Best trial: {'lambda': 5.462384910216173e-06, 'alpha': 1.058782621859015e-06, 'eta': 0.2901641528553862, 'gamma': 4.0183289378553344e-08, 'max_depth': 7, 'min_child_weight': 3, 'subsample': 0.6360174671160264, 'colsample_bytree': 0.6587588732736163}
Best accuracy: 1.0


In [31]:
! pip install optuna-integration[xgboost]



In [32]:
from optuna.visualization import plot_intermediate_values

# 1. Plot intermediate values during the trials
plot_intermediate_values(study).show()