In [1]:
# Optuna
import optuna
from optuna.samplers import TPESampler

# sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn import datasets

from functools import partial # to solve scoping problem when supplying more params' to objective function


In [2]:
def objective(trial, n_folds, X, y):
    """Objective function for tuning logistic regression hyperparameters"""
    params = {
        'warm_start': trial.suggest_categorical('warm_start', [True, False]),
        'fit_intercept': trial.suggest_categorical('fit_intercept', [True, False]),
        'tol': trial.suggest_uniform('tol', 0.00001, 0.0001),
        'C': trial.suggest_uniform('C', 0.05, 2.5),
        'solver': trial.suggest_categorical('solver', ['newton-cg', 'lbfgs', 'liblinear']),
        'max_iter': trial.suggest_categorical('max_iter', range(10, 500))
    }
    # Perform n_fold cross validation with hyperparameters
    clf = LogisticRegression(**params, random_state=42)
    scores = cross_val_score(clf, X, y, cv=n_folds, scoring='f1_macro')
    
    print(scores)
    # Extract the best score
    max_score = max(scores)

    # Loss must be minimized
    loss = 1 - max_score
    print(loss)
    # Dictionary with information for evaluation
    return loss

In [3]:
n_folds = 5
X, y = datasets.make_classification(n_samples=100000, n_features=20,
                                    n_informative=2, n_redundant=2)

train_samples = 100  # Samples used for training the models

X_train = X[:train_samples]
X_test = X[train_samples:]
y_train = y[:train_samples]
y_test = y[train_samples:]

In [4]:
study = optuna.create_study(direction='minimize', sampler=TPESampler())
study.optimize(partial(objective, n_folds=n_folds, X=X_train, y=y_train), n_trials=16)

[I 2025-10-26 11:07:42,068] A new study created in memory with name: no-name-72d6806c-b2f1-41b3-bdf1-4201f1f9b8d4
  'tol': trial.suggest_uniform('tol', 0.00001, 0.0001),
  'C': trial.suggest_uniform('C', 0.05, 2.5),
[I 2025-10-26 11:07:42,100] Trial 0 finished with value: 0.0 and parameters: {'warm_start': False, 'fit_intercept': False, 'tol': 4.763169406218029e-05, 'C': 1.8812145981258217, 'solver': 'newton-cg', 'max_iter': 288}. Best is trial 0 with value: 0.0.
  'tol': trial.suggest_uniform('tol', 0.00001, 0.0001),
  'C': trial.suggest_uniform('C', 0.05, 2.5),
[I 2025-10-26 11:07:42,143] Trial 1 finished with value: 0.0 and parameters: {'warm_start': False, 'fit_intercept': True, 'tol': 7.762847158794961e-05, 'C': 2.3188624145623984, 'solver': 'newton-cg', 'max_iter': 165}. Best is trial 0 with value: 0.0.
  'tol': trial.suggest_uniform('tol', 0.00001, 0.0001),
  'C': trial.suggest_uniform('C', 0.05, 2.5),
[I 2025-10-26 11:07:42,166] Trial 2 finished with value: 0.0 and parameters: 

[0.8989899  0.9488491  0.7979798  0.84962406 1.        ]
0.0
[0.8989899 0.9488491 0.7979798 0.8       1.       ]
0.0
[0.9488491  1.         0.89583333 0.84962406 1.        ]
0.0
[0.8989899  0.9488491  0.7979798  0.84962406 1.        ]
0.0
[0.9488491  1.         0.89583333 0.84962406 1.        ]
0.0
[0.9488491  1.         0.84654731 0.84962406 1.        ]
0.0
[0.8989899 0.9488491 0.7979798 0.8       1.       ]
0.0
[0.8989899  1.         0.84654731 0.84962406 1.        ]
0.0


  'tol': trial.suggest_uniform('tol', 0.00001, 0.0001),
  'C': trial.suggest_uniform('C', 0.05, 2.5),
[I 2025-10-26 11:07:42,296] Trial 8 finished with value: 0.0 and parameters: {'warm_start': True, 'fit_intercept': True, 'tol': 6.225574722303851e-05, 'C': 0.9137585172884933, 'solver': 'liblinear', 'max_iter': 294}. Best is trial 0 with value: 0.0.
  'tol': trial.suggest_uniform('tol', 0.00001, 0.0001),
  'C': trial.suggest_uniform('C', 0.05, 2.5),
[I 2025-10-26 11:07:42,331] Trial 9 finished with value: 0.0 and parameters: {'warm_start': True, 'fit_intercept': False, 'tol': 1.9499046408518437e-05, 'C': 0.4158889920868151, 'solver': 'newton-cg', 'max_iter': 190}. Best is trial 0 with value: 0.0.
  'tol': trial.suggest_uniform('tol', 0.00001, 0.0001),
  'C': trial.suggest_uniform('C', 0.05, 2.5),
[I 2025-10-26 11:07:42,362] Trial 10 finished with value: 0.0 and parameters: {'warm_start': False, 'fit_intercept': False, 'tol': 4.573135536299256e-05, 'C': 1.8703551548954611, 'solver': 'lb

[0.9488491  1.         0.89583333 0.84962406 1.        ]
0.0
[0.9488491  1.         0.89583333 0.84962406 1.        ]
0.0
[0.8989899  0.9488491  0.7979798  0.84962406 1.        ]
0.0
[0.9488491 0.9488491 0.7979798 0.8       1.       ]
0.0
[0.8989899 0.9488491 0.7979798 0.8       1.       ]
0.0
[0.9488491  0.9488491  0.84654731 0.84962406 1.        ]
0.0
[0.8989899  0.9488491  0.7979798  0.84962406 1.        ]
0.0


  'tol': trial.suggest_uniform('tol', 0.00001, 0.0001),
  'C': trial.suggest_uniform('C', 0.05, 2.5),
[I 2025-10-26 11:07:42,521] Trial 15 finished with value: 0.0 and parameters: {'warm_start': False, 'fit_intercept': True, 'tol': 9.987405270696255e-05, 'C': 1.5919532847376474, 'solver': 'lbfgs', 'max_iter': 177}. Best is trial 0 with value: 0.0.


[0.9488491  0.9488491  0.84654731 0.84962406 1.        ]
0.0


In [5]:
study.best_trial.params

{'warm_start': False,
 'fit_intercept': False,
 'tol': 4.763169406218029e-05,
 'C': 1.8812145981258217,
 'solver': 'newton-cg',
 'max_iter': 288}

In [6]:
study.best_trial.value

0.0

In [7]:
partial(objective, n_folds=n_folds, X=X_train, y=y_train)

functools.partial(<function objective at 0x0000026ADEF81EA0>, n_folds=5, X=array([[-1.06518928, -2.08468676,  0.08196603, ..., -1.24689281,
         0.99075048,  0.34344265],
       [ 0.83839831, -0.80924217, -0.90469563, ..., -0.41562574,
         0.90171995,  0.81643004],
       [ 0.4031738 , -1.11779127, -1.92991456, ...,  1.58885128,
        -0.07782365, -0.86773426],
       ...,
       [ 1.2265641 , -2.32215988,  1.45278634, ..., -0.1248291 ,
         0.98966186,  0.9052977 ],
       [-0.66479956,  0.11201609, -0.63007104, ..., -0.34679023,
         1.75136396,  0.88416528],
       [ 0.61472591, -1.9781159 ,  0.55999545, ..., -0.4399797 ,
         0.4413743 ,  0.37225194]]), y=array([1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1,
       0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1,
       1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1,
       0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 1, 0

In [8]:
params = {'warm_start': False, 'fit_intercept': True, 'tol': 8.193190808600091e-05, 'C': 0.4314980151540406, 'solver': 'newton-cg', 'max_iter': 331}
# Perform n_fold cross validation with hyperparameters
clf = LogisticRegression(**params, random_state=42)
scores = cross_val_score(clf, X, y, cv=n_folds, scoring='f1_macro')

# Extract the best score
max_score = max(scores)

# Loss must be minimized
loss = 1 - max_score

In [9]:
print(loss)

0.09530309639760193
