<a href="https://colab.research.google.com/github/Harshitsri170/Hyperparameter_Tuning-Grid-Search-and-randomized-search-/blob/main/Hyperparameter_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# Models
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [2]:
models = {
    'RandomForest': RandomForestClassifier(random_state=42),
    'SVC': SVC(random_state=42),
    'LogisticRegression': LogisticRegression(max_iter=1000, random_state=42)
}

param_grids = {
    'RandomForest': {
        'classifier__n_estimators': [50, 100, 150],
        'classifier__max_depth': [None, 10, 20],
    },
    'SVC': {
        'classifier__C': [0.1, 1, 10],
        'classifier__kernel': ['linear', 'rbf']
    },
    'LogisticRegression': {
        'classifier__C': [0.1, 1, 10],
        'classifier__penalty': ['l2'],
        'classifier__solver': ['lbfgs', 'liblinear']
    }
}


In [3]:
results = []

for name, model in models.items():
    pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('classifier', model)
    ])

    grid_search = GridSearchCV(pipe, param_grids[name], cv=5, scoring='f1', n_jobs=-1)
    grid_search.fit(X_train, y_train)

    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_test)

    # Metrics
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    results.append({
        'Model': name,
        'Best Params': grid_search.best_params_,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1 Score': f1
    })

df_results = pd.DataFrame(results)
print(df_results.sort_values(by='F1 Score', ascending=False))


                Model                                        Best Params  \
2  LogisticRegression  {'classifier__C': 0.1, 'classifier__penalty': ...   
1                 SVC  {'classifier__C': 1, 'classifier__kernel': 'rbf'}   
0        RandomForest  {'classifier__max_depth': None, 'classifier__n...   

   Accuracy  Precision    Recall  F1 Score  
2  0.991228   0.986111  1.000000  0.993007  
1  0.982456   0.972603  1.000000  0.986111  
0  0.964912   0.958904  0.985915  0.972222  


In [4]:
# We can also use RandomsizerSearchCv
from scipy.stats import randint, uniform

param_dists = {
    'RandomForest': {
        'classifier__n_estimators': randint(50, 200),
        'classifier__max_depth': [None, 10, 20]
    },
    'SVC': {
        'classifier__C': uniform(0.1, 10),
        'classifier__kernel': ['linear', 'rbf']
    },
    'LogisticRegression': {
        'classifier__C': uniform(0.1, 10),
        'classifier__solver': ['lbfgs', 'liblinear']
    }
}

