<a href="https://colab.research.google.com/github/NishWasHere/Celebal-Assignment/blob/main/Week6_ModelEval_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

from sklearn.datasets import load_breast_cancer  # For demonstration


In [2]:
# Step 2: Load Data (using breast cancer dataset for classification demo)
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Step 3: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Scale the data (important for some models like SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [3]:
# Step 5: Define Models
models = {
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC()
}


In [4]:
# Step 6: Train and Evaluate Models
def evaluate_model(name, model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print(f"--- {name} ---")
    print(f"Accuracy : {accuracy_score(y_test, y_pred):.4f}")
    print(f"Precision: {precision_score(y_test, y_pred):.4f}")
    print(f"Recall   : {recall_score(y_test, y_pred):.4f}")
    print(f"F1-Score : {f1_score(y_test, y_pred):.4f}")
    print()

for name, model in models.items():
    if name == 'SVM':
        evaluate_model(name, model, X_train_scaled, X_test_scaled, y_train, y_test)
    else:
        evaluate_model(name, model, X_train, X_test, y_train, y_test)


--- Logistic Regression ---
Accuracy : 0.9561
Precision: 0.9459
Recall   : 0.9859
F1-Score : 0.9655

--- Random Forest ---
Accuracy : 0.9561
Precision: 0.9583
Recall   : 0.9718
F1-Score : 0.9650

--- SVM ---
Accuracy : 0.9825
Precision: 0.9726
Recall   : 1.0000
F1-Score : 0.9861



In [5]:
# Step 7: Hyperparameter Tuning - GridSearchCV (for Random Forest)
param_grid_rf = {
    'n_estimators': [50, 100],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5],
}

grid_rf = GridSearchCV(RandomForestClassifier(), param_grid_rf, cv=3, scoring='accuracy')
grid_rf.fit(X_train, y_train)

print("Best Random Forest Params (GridSearchCV):", grid_rf.best_params_)
best_rf = grid_rf.best_estimator_
evaluate_model("Random Forest (Tuned)", best_rf, X_train, X_test, y_train, y_test)


Best Random Forest Params (GridSearchCV): {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}
--- Random Forest (Tuned) ---
Accuracy : 0.9649
Precision: 0.9589
Recall   : 0.9859
F1-Score : 0.9722



In [6]:
# Step 8: Hyperparameter Tuning - RandomizedSearchCV (for SVM)
param_dist_svm = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 0.01, 0.1, 1],
    'kernel': ['rbf', 'linear']
}

rand_svm = RandomizedSearchCV(SVC(), param_dist_svm, n_iter=5, cv=3, scoring='accuracy', random_state=42)
rand_svm.fit(X_train_scaled, y_train)

print("Best SVM Params (RandomizedSearchCV):", rand_svm.best_params_)
best_svm = rand_svm.best_estimator_
evaluate_model("SVM (Tuned)", best_svm, X_train_scaled, X_test_scaled, y_train, y_test)


Best SVM Params (RandomizedSearchCV): {'kernel': 'rbf', 'gamma': 'scale', 'C': 1}
--- SVM (Tuned) ---
Accuracy : 0.9825
Precision: 0.9726
Recall   : 1.0000
F1-Score : 0.9861

