# Imports

In [1]:
import optuna
from optuna.samplers import TPESampler

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn import datasets

from functools import partial

  from .autonotebook import tqdm as notebook_tqdm


# Functions

In [2]:
def objective(trial, n_folds, X, y):
    """Objective function for tuning logistic regression hyperparameters"""
    params = {
        'warm_start': trial.suggest_categorical('warm_start', [True, False]),
        'fit_intercept': trial.suggest_categorical('fit_intercept', [True, False]),
        'tol': trial.suggest_uniform('tol', 0.00001, 0.0001),
        'C': trial.suggest_uniform('C', 0.05, 2.5),
        'solver': trial.suggest_categorical('solver', ['newton-cg', 'lbfgs', 'liblinear']),
        'max_iter': trial.suggest_categorical('max_iter', range(10, 500))
    }
    # Perform n_fold cross validation with hyperparameters
    clf = LogisticRegression(**params, random_state=42)
    scores = cross_val_score(clf, X, y, cv=n_folds, scoring='f1_macro')
    
    print(scores)
    # Extract the best score
    max_score = max(scores)

    # Loss must be minimized
    loss = 1 - max_score
    print(loss)
    # Dictionary with information for evaluation
    return loss

# Get Data

In [3]:
n_folds = 5
X, y = datasets.make_classification(n_samples=100000, n_features=20,
                                    n_informative=2, n_redundant=2)

train_samples = 100  # Samples used for training the models

X_train = X[:train_samples]
X_test = X[train_samples:]
y_train = y[:train_samples]
y_test = y[train_samples:]

# Tuning

In [4]:
study = optuna.create_study(direction='minimize', sampler=TPESampler())
study.optimize(partial(objective, n_folds = n_folds, X = X_train, y=y_train), n_trials=16)

[32m[I 2023-03-09 18:49:15,069][0m A new study created in memory with name: no-name-6a12155a-dad9-4205-9f5a-4de71b9b0a50[0m
  'tol': trial.suggest_uniform('tol', 0.00001, 0.0001),
  'C': trial.suggest_uniform('C', 0.05, 2.5),
[32m[I 2023-03-09 18:49:15,113][0m Trial 0 finished with value: 0.0 and parameters: {'warm_start': True, 'fit_intercept': True, 'tol': 2.360984548279897e-05, 'C': 2.458204404462609, 'solver': 'lbfgs', 'max_iter': 55}. Best is trial 0 with value: 0.0.[0m
  'tol': trial.suggest_uniform('tol', 0.00001, 0.0001),
  'C': trial.suggest_uniform('C', 0.05, 2.5),
[32m[I 2023-03-09 18:49:15,148][0m Trial 1 finished with value: 0.0 and parameters: {'warm_start': True, 'fit_intercept': True, 'tol': 6.676237508562107e-05, 'C': 2.1982334308557343, 'solver': 'newton-cg', 'max_iter': 162}. Best is trial 0 with value: 0.0.[0m
  'tol': trial.suggest_uniform('tol', 0.00001, 0.0001),
  'C': trial.suggest_uniform('C', 0.05, 2.5),
[32m[I 2023-03-09 18:49:15,167][0m Trial 2 fi

[0.8        1.         0.79166667 0.8        0.84962406]
0.0
[0.8        1.         0.79166667 0.8        0.84962406]
0.0
[0.84962406 0.94987469 0.7979798  0.74424552 0.8989899 ]
0.05012531328320802
[0.84962406 0.94987469 0.7979798  0.74424552 0.8989899 ]
0.05012531328320802
[0.9        0.94987469 0.7979798  0.8989899  0.8989899 ]
0.05012531328320802
[0.9        0.94987469 0.7979798  0.79166667 0.8989899 ]
0.05012531328320802
[0.9        0.94987469 0.79166667 0.84962406 0.8       ]
0.05012531328320802


  'tol': trial.suggest_uniform('tol', 0.00001, 0.0001),
  'C': trial.suggest_uniform('C', 0.05, 2.5),
[32m[I 2023-03-09 18:49:15,329][0m Trial 7 finished with value: 0.0 and parameters: {'warm_start': False, 'fit_intercept': True, 'tol': 1.2004360653295908e-05, 'C': 1.4359356268289973, 'solver': 'lbfgs', 'max_iter': 249}. Best is trial 0 with value: 0.0.[0m
  'tol': trial.suggest_uniform('tol', 0.00001, 0.0001),
  'C': trial.suggest_uniform('C', 0.05, 2.5),
[32m[I 2023-03-09 18:49:15,372][0m Trial 8 finished with value: 0.0 and parameters: {'warm_start': False, 'fit_intercept': True, 'tol': 2.3225397849463716e-05, 'C': 0.6737071412856319, 'solver': 'newton-cg', 'max_iter': 384}. Best is trial 0 with value: 0.0.[0m
  'tol': trial.suggest_uniform('tol', 0.00001, 0.0001),
  'C': trial.suggest_uniform('C', 0.05, 2.5),
[32m[I 2023-03-09 18:49:15,423][0m Trial 9 finished with value: 0.0 and parameters: {'warm_start': False, 'fit_intercept': True, 'tol': 5.9736390093253325e-05, 'C': 2

[0.9        1.         0.79166667 0.8        0.84962406]
0.0
[0.9        1.         0.79166667 0.84962406 0.8       ]
0.0
[0.8        1.         0.79166667 0.8        0.84962406]
0.0
[0.8        1.         0.79166667 0.8        0.84962406]
0.0
[0.8        1.         0.79166667 0.8        0.84962406]
0.0


  'tol': trial.suggest_uniform('tol', 0.00001, 0.0001),
  'C': trial.suggest_uniform('C', 0.05, 2.5),
[32m[I 2023-03-09 18:49:15,560][0m Trial 12 finished with value: 0.0 and parameters: {'warm_start': True, 'fit_intercept': True, 'tol': 7.101750971772029e-05, 'C': 1.9991317247654243, 'solver': 'newton-cg', 'max_iter': 55}. Best is trial 0 with value: 0.0.[0m
  'tol': trial.suggest_uniform('tol', 0.00001, 0.0001),
  'C': trial.suggest_uniform('C', 0.05, 2.5),
[32m[I 2023-03-09 18:49:15,600][0m Trial 13 finished with value: 0.0 and parameters: {'warm_start': True, 'fit_intercept': True, 'tol': 4.442633298290131e-05, 'C': 2.137260816006286, 'solver': 'lbfgs', 'max_iter': 294}. Best is trial 0 with value: 0.0.[0m
  'tol': trial.suggest_uniform('tol', 0.00001, 0.0001),
  'C': trial.suggest_uniform('C', 0.05, 2.5),
[32m[I 2023-03-09 18:49:15,634][0m Trial 14 finished with value: 0.0 and parameters: {'warm_start': True, 'fit_intercept': True, 'tol': 1.2063069377946486e-05, 'C': 1.812

[0.8        1.         0.79166667 0.8        0.84962406]
0.0
[0.8        1.         0.79166667 0.8        0.84962406]
0.0
[0.84962406 1.         0.79166667 0.84962406 0.84962406]
0.0
[0.8        1.         0.79166667 0.8        0.84962406]
0.0


In [5]:
study.best_trial.params

{'warm_start': True,
 'fit_intercept': True,
 'tol': 2.360984548279897e-05,
 'C': 2.458204404462609,
 'solver': 'lbfgs',
 'max_iter': 55}

In [6]:
study.best_trial.value

0.0

In [7]:
partial(objective, n_folds = n_folds, X = X_train, y=y_train)

functools.partial(<function objective at 0x000001F6FF5520E0>, n_folds=5, X=array([[-0.79495433, -0.03103261, -1.51848492, ..., -2.05238958,
        -0.93417305,  0.96676889],
       [ 0.81339979, -0.379493  , -1.64373414, ..., -0.37461693,
        -0.82853333,  0.22766465],
       [-0.51919548,  0.79748686, -1.47907965, ..., -2.07552727,
        -0.56723337,  1.24265196],
       ...,
       [-0.90713403,  0.23365974,  1.30110706, ..., -2.76733113,
         1.01773536, -1.62274866],
       [ 0.24784173, -1.28416825,  0.46717554, ..., -0.02056132,
         1.16366216,  0.28370374],
       [ 0.18339589,  0.07319064,  0.27763402, ...,  0.61831341,
         2.45383373,  1.8809781 ]]), y=array([0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1,
       0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
       0, 1, 1, 1

In [8]:
params = {'warm_start': False, 
          'fit_intercept': True, 
          'tol': 8.193190808600091e-05, 
          'C': 0.4314980151540406, 
          'solver': 'newton-cg', 
          'max_iter': 331}
# Perform n_fold cross validation with hyperparameters
clf = LogisticRegression(**params, random_state=42)
scores = cross_val_score(clf, X, y, cv=n_folds, scoring='f1_macro')

# Extract the best score
max_score = max(scores)

# Loss must be minimized
loss = 1 - max_score

In [9]:
print(loss)

0.08800360462764556
