# Hyperparameter tuning : Grid Search 

In [4]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import warnings 
warnings.filterwarnings('ignore')

In [23]:
# 1. Load Datasets
X, y = load_breast_cancer(return_X_y = True)

# 2. Create pipeline
pipeline = Pipeline(
    [
        ('scaler', StandardScaler()),
        # ('model', LogisticRegression(max_iter=500, solver = 'liblinear'))
        ('model', LogisticRegression(max_iter=500))
    ]
)

# 3. Hyperparameter grid
param_grid = {
    'model__C':[0.01, 0.1, 1, 10],
    #'model__penalty':['l1']
    'model__penalty':['l2']
}

# 4. Grid Search with 5-fold CV
grid = GridSearchCV(
    pipeline,
    param_grid,
    cv=5
)

# 5. Fit
grid.fit(X, y)

print("Best Parameters:", grid.best_params_)
print("Best CV Score:", grid.best_score_)

Best Parameters: {'model__C': 1, 'model__penalty': 'l2'}
Best CV Score: 0.9806862288464524


# Hyperparameter tuning : Random Search

In [17]:
from sklearn.model_selection import RandomizedSearchCV
import numpy as np

# 1. Load Datasets
X, y = load_breast_cancer(return_X_y = True)

# 2. Create pipeline
pipeline = Pipeline(
    [
        ('scaler', StandardScaler()),
        ('model', LogisticRegression(max_iter=500))
    ]
)

# 3. Hyperparameter grid
param_dist = {
    'model__C':np.logspace(-3,3,100), # 0.001 to 1000
    'model__penalty':['l2']
}

# 4. Randomized Search with (try 10 random combinations)
random_search = RandomizedSearchCV(
    pipeline,
    param_distributions = param_dist,
    n_iter = 10,  # number of random combinations
    cv=10,
    random_state= 42
)

# Fit
random_search.fit(X,y)

print("Best Parameters:", random_search.best_params_)
print("Best CV Score:", random_search.best_score_)

Best Parameters: {'model__penalty': 'l2', 'model__C': 1.629750834620645}
Best CV Score: 0.9806704260651629


# Without Parameter Tuning

In [11]:
from sklearn.model_selection import cross_val_score

# 1. Load Datasets
X, y = load_breast_cancer(return_X_y = True)

# 2. Create pipeline
pipeline = Pipeline(
    [
        ('scaler', StandardScaler()),
        ('model', LogisticRegression(max_iter=500))
    ]
)

# 4. Run 5-fold CV
scores = cross_val_score(
    pipeline,
    X,
    y,
    cv=5
)

print("CV Scores for Each Fold:", scores)
print("Average CV Score:", scores.mean())
print("Standard Deviation:", scores.std())

CV Scores for Each Fold: [0.98245614 0.98245614 0.97368421 0.97368421 0.99115044]
Average CV Score: 0.9806862288464524
Standard Deviation: 0.006539441283506109
