<a href="https://colab.research.google.com/github/Shruteecodes04/celebal-assignments/blob/main/Untitled3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# Load dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC()
}

# Evaluate models before tuning
print("----- Initial Model Evaluation -----")
initial_results = {}

for name, model in models.items():
    if name == "SVM" or name == "Logistic Regression":
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

    initial_results[name] = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1 Score": f1_score(y_test, y_pred)
    }

df_initial = pd.DataFrame(initial_results).T
print(df_initial)

# Hyperparameter tuning
print("\n----- Hyperparameter Tuning with GridSearchCV and RandomizedSearchCV -----")

# Logistic Regression - GridSearchCV
lr_param_grid = {
    'C': [0.01, 0.1, 1, 10],
    'penalty': ['l2'],
    'solver': ['lbfgs']
}
lr_grid = GridSearchCV(LogisticRegression(max_iter=1000), lr_param_grid, cv=5)
lr_grid.fit(X_train_scaled, y_train)

# Random Forest - RandomizedSearchCV
rf_param_dist = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}
rf_random = RandomizedSearchCV(RandomForestClassifier(), rf_param_dist, n_iter=10, cv=5, random_state=42)
rf_random.fit(X_train, y_train)

# SVM - GridSearchCV
svm_param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf']
}
svm_grid = GridSearchCV(SVC(), svm_param_grid, cv=5)
svm_grid.fit(X_train_scaled, y_train)

# Evaluate tuned models
print("\n----- Evaluation After Tuning -----")
best_models = {
    "Logistic Regression (Tuned)": lr_grid.best_estimator_,
    "Random Forest (Tuned)": rf_random.best_estimator_,
    "SVM (Tuned)": svm_grid.best_estimator_
}

tuned_results = {}

for name, model in best_models.items():
    if "SVM" in name or "Logistic" in name:
        y_pred = model.predict(X_test_scaled)
    else:
        y_pred = model.predict(X_test)

    tuned_results[name] = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1 Score": f1_score(y_test, y_pred)
    }

df_tuned = pd.DataFrame(tuned_results).T
print(df_tuned)

# Identify best model based on F1 Score
best_model_name = df_tuned['F1 Score'].idxmax()
print(f"\n✅ Best Model: {best_model_name}")

----- Initial Model Evaluation -----
                     Accuracy  Precision    Recall  F1 Score
Logistic Regression  0.973684   0.972222  0.985915  0.979021
Random Forest        0.964912   0.958904  0.985915  0.972222
SVM                  0.982456   0.972603  1.000000  0.986111

----- Hyperparameter Tuning with GridSearchCV and RandomizedSearchCV -----

----- Evaluation After Tuning -----
                             Accuracy  Precision    Recall  F1 Score
Logistic Regression (Tuned)  0.973684   0.985714  0.971831  0.978723
Random Forest (Tuned)        0.964912   0.958904  0.985915  0.972222
SVM (Tuned)                  0.982456   0.972603  1.000000  0.986111

✅ Best Model: SVM (Tuned)
