## Types of Hyper parameter Tuning
1. RandomizedSearchCV
2. GridSearchCV
3. Halving Of Both

In [1]:
import seaborn as sns
import pandas as pd
import numpy as np 
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split , RandomizedSearchCV , GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')

### Load the dataset


In [2]:
df = sns.load_dataset('tips')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


### Encode the Categorical Columns

In [3]:
df = pd.get_dummies(df, drop_first=True).astype(int)
df.head()

Unnamed: 0,total_bill,tip,size,sex_Female,smoker_No,day_Fri,day_Sat,day_Sun,time_Dinner
0,16,1,2,1,1,0,0,1,1
1,10,1,3,0,1,0,0,1,1
2,21,3,3,0,1,0,0,1,1
3,23,3,2,0,1,0,0,1,1
4,24,3,4,1,1,0,0,1,1


### Separate Features and Label

In [4]:
X = df.drop('sex_Female' , axis=1)
y = df['sex_Female']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


### Building the Pipeline

In [5]:
pipeline = Pipeline([
    ('scalar' , StandardScaler()),
    ('clf' , LogisticRegression(max_iter=5000))
])

## GridSearchCV

In [None]:
# -- GRID SEARCH --
param_grid = {
    "clf__C": [0.01, 0.1, 1, 10],
    "clf__solver": ["lbfgs", "liblinear"],
    "clf__penalty": ["l2"]  
}

grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring="accuracy", n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

print("=== GridSearchCV ===")
print("Best Params:", grid_search.best_params_)
print("Best CV Score:", grid_search.best_score_)
y_pred_grid = grid_search.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred_grid))

## RandomizedSearchCV

In [None]:
#RandomizedSearchCV
param_dist = {
    'clf__C' : np.logspace(3, -3 ,20),
    'clf__solver' : ['lbfgs','liblinear'],
    'clf__penalty' : ['l2']
}
randomized_search = RandomizedSearchCV(pipeline , param_dist , n_iter=15 , scoring='accuracy' , n_jobs=-1, verbose=1)
randomized_search.fit(X_train, y_train)

print('RandomizedSearchCV')
print("Best params", randomized_search.best_params_)
print("Best CV Score", randomized_search.best_score_)
y_pred  = randomized_search.predict(X_test)
print('accuracy_score', accuracy_score(y_test, y_pred))