In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_val_score
from scipy.stats import uniform
from scipy.stats import loguniform
!pip install scikit-optimize
from skopt import BayesSearchCV
!pip install optuna
import optuna

Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.5.0-py3-none-any.whl (400 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.9.0 optuna-4.5.0


#Creating Dataset

In [3]:
x,y=make_classification(n_samples=1000, n_features=10,n_informative=3, n_classes=3, random_state=15)

#Splitting Dataset

In [4]:
X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.20,random_state=42)

#Model and training/testing

In [5]:
lgs=LogisticRegression(multi_class='ovr')
lgs.fit(X_train,y_train)
y_pred=lgs.predict(X_test)



In [6]:
score=accuracy_score(y_pred,y_test)
print("Accuracy : ",score)
print(classification_report(y_pred,y_test))
print("Confusion Matrix")
print(confusion_matrix(y_pred,y_test))

Accuracy :  0.79
              precision    recall  f1-score   support

           0       0.81      0.86      0.84        59
           1       0.72      0.82      0.77        61
           2       0.84      0.71      0.77        80

    accuracy                           0.79       200
   macro avg       0.79      0.80      0.79       200
weighted avg       0.80      0.79      0.79       200

Confusion Matrix
[[51  2  6]
 [ 6 50  5]
 [ 6 17 57]]


#Manual Iterative CV (Brute Force Method)

In [7]:
C_values = [0.01, 0.1, 1, 10, 100]
solvers = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
best_score = 0
best_params = {}

for C in C_values:
    for solver in solvers:
        model = LogisticRegression(C=C, solver=solver, multi_class='ovr', max_iter=1000)
        scores = cross_val_score(model, X_train, y_train, cv=5)
        avg_score = scores.mean()
        if avg_score > best_score:
            best_score = avg_score
            best_params = {'C': C, 'solver': solver}

print("Best Score (Manual):", best_score)
print("Best Parameters (Manual):", best_params)



Best Score (Manual): 0.7875
Best Parameters (Manual): {'C': 0.1, 'solver': 'liblinear'}




#GridSearch CV

In [8]:
grid_params = {
    'C': [0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2','elasticnet'],
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
}

In [9]:
grid = GridSearchCV(estimator=LogisticRegression(multi_class='ovr', max_iter=1000),param_grid=grid_params,cv=5,scoring='accuracy',n_jobs=-1)

In [10]:
grid.fit(X_train, y_train)
print("Best Params (GridSearch):", grid.best_params_)
print("Best Score (GridSearch):", grid.best_score_)

Best Params (GridSearch): {'C': 0.1, 'penalty': 'l1', 'solver': 'saga'}
Best Score (GridSearch): 0.79375


200 fits failed out of a total of 375.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.12/dist-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/sklearn/linear_model/_logistic.py", line 1193, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

In [11]:
y_pred = grid.best_estimator_.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))

Test Accuracy: 0.795


#RandomizedSearch CV

In [12]:
param_dist = {
    'C': loguniform(1e-3, 1e3),
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear', 'saga']
}

In [13]:
random_search = RandomizedSearchCV(
    LogisticRegression(multi_class='ovr', max_iter=1000),
    param_distributions=param_dist,
    n_iter=20,
    scoring='accuracy',
    cv=5,
    random_state=42,
    n_jobs=-1
)

In [14]:
random_search.fit(X_train, y_train)
print("Best Params (RandomizedSearch):", random_search.best_params_)
print("Best Score (RandomizedSearch):", random_search.best_score_)

Best Params (RandomizedSearch): {'C': np.float64(0.06724850206557247), 'penalty': 'l1', 'solver': 'saga'}
Best Score (RandomizedSearch): 0.795




#Bayesian Optimization

In [18]:
bayes_param = {
    'C': (1e-3, 1e3, 'log-uniform'),
    'solver': ['liblinear', 'saga'],
    'penalty': ['l1', 'l2']
}

In [20]:
bayes_search = BayesSearchCV(
    LogisticRegression(multi_class='ovr', max_iter=1000),
    search_spaces=bayes_param,
    n_iter=20,
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    random_state=42
)

In [21]:
bayes_search.fit(X_train, y_train)
print("Best Params (Bayesian):", bayes_search.best_params_)
print("Best Score (Bayesian):", bayes_search.best_score_)

Best Params (Bayesian): OrderedDict({'C': 0.019616003991477306, 'penalty': 'l1', 'solver': 'liblinear'})
Best Score (Bayesian): 0.79375




#Automated Hyperparameter Tuning

In [23]:
def objective(trial):
    C = trial.suggest_loguniform('C', 1e-3, 1e3)
    solver = trial.suggest_categorical('solver', ['liblinear', 'saga'])
    penalty = trial.suggest_categorical('penalty', ['l1', 'l2'])

    model = LogisticRegression(C=C, solver=solver, penalty=penalty, multi_class='ovr', max_iter=1000)

    score = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy').mean()
    return score

In [24]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)

[I 2025-10-11 15:11:56,864] A new study created in memory with name: no-name-28a29543-d82c-44b7-9486-82089da1c346
  C = trial.suggest_loguniform('C', 1e-3, 1e3)
[I 2025-10-11 15:11:57,102] Trial 0 finished with value: 0.7875 and parameters: {'C': 433.9586368254958, 'solver': 'saga', 'penalty': 'l2'}. Best is trial 0 with value: 0.7875.
  C = trial.suggest_loguniform('C', 1e-3, 1e3)
[I 2025-10-11 15:11:57,239] Trial 1 finished with value: 0.785 and parameters: {'C': 0.007996482837074203, 'solver': 'saga', 'penalty': 'l2'}. Best is trial 0 with value: 0.7875.
  C = trial.suggest_loguniform('C', 1e-3, 1e3)
[I 2025-10-11 15:11:57,301] Trial 2 finished with value: 0.78875 and parameters: {'C': 0.0406825207222924, 'solver': 'liblinear', 'penalty': 'l2'}. Best is trial 2 with value: 0.78875.
  C = trial.suggest_loguniform('C', 1e-3, 1e3)
[I 2025-10-11 15:11:57,395] Trial 3 finished with value: 0.7862499999999999 and parameters: {'C': 7.231943963446587, 'solver': 'liblinear', 'penalty': 'l2'}.

In [25]:
print("Best Params (Optuna):", study.best_params)
print("Best Score (Optuna):", study.best_value)

Best Params (Optuna): {'C': 0.0709982568788453, 'solver': 'liblinear', 'penalty': 'l1'}
Best Score (Optuna): 0.795
