<a href="https://colab.research.google.com/github/Tanushree-233/ML_Model_Comparison_and_Tuning/blob/main/ML_Model_Comparison_And_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Using a built-in dataset for binary classification
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

In [3]:
# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [4]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [5]:
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC()
}

In [6]:
def evaluate_model(name, model, X_test, y_test):
    y_pred = model.predict(X_test)
    return {
        'Model': name,
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred),
        'Recall': recall_score(y_test, y_pred),
        'F1 Score': f1_score(y_test, y_pred)
    }

In [7]:
results = []
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    result = evaluate_model(name, model, X_test_scaled, y_test)
    results.append(result)

results_df = pd.DataFrame(results)
print("🔹 Initial Model Performance:\n")
print(results_df)

🔹 Initial Model Performance:

                 Model  Accuracy  Precision    Recall  F1 Score
0  Logistic Regression  0.973684   0.972222  0.985915  0.979021
1        Decision Tree  0.938596   0.957143  0.943662  0.950355
2        Random Forest  0.964912   0.958904  0.985915  0.972222
3                  SVM  0.982456   0.972603  1.000000  0.986111


In [8]:
param_grid_rf = {
    'n_estimators': [50, 100, 150],
    'max_depth': [4, 6, 8, None]
}
grid_rf = GridSearchCV(RandomForestClassifier(), param_grid_rf, cv=5, scoring='f1')
grid_rf.fit(X_train_scaled, y_train)
best_rf = grid_rf.best_estimator_

In [9]:
param_dist_svc = {
    'C': np.logspace(-3, 2, 10),
    'gamma': ['scale', 'auto'],
    'kernel': ['rbf', 'linear']
}
rand_svc = RandomizedSearchCV(SVC(), param_distributions=param_dist_svc,
                              n_iter=10, cv=5, scoring='f1', random_state=42)
rand_svc.fit(X_train_scaled, y_train)
best_svc = rand_svc.best_estimator_

In [10]:
tuned_results = []
tuned_results.append(evaluate_model("Tuned Random Forest", best_rf, X_test_scaled, y_test))
tuned_results.append(evaluate_model("Tuned SVM", best_svc, X_test_scaled, y_test))

tuned_df = pd.DataFrame(tuned_results)
print("\n🔹 Tuned Model Performance:\n")
print(tuned_df)


🔹 Tuned Model Performance:

                 Model  Accuracy  Precision    Recall  F1 Score
0  Tuned Random Forest  0.964912   0.958904  0.985915  0.972222
1            Tuned SVM  0.982456   0.972603  1.000000  0.986111


In [11]:
final_df = pd.concat([results_df, tuned_df], ignore_index=True)
print("\n✅ Final Model Comparison (Sorted by F1 Score):\n")
print(final_df.sort_values(by='F1 Score', ascending=False))


✅ Final Model Comparison (Sorted by F1 Score):

                 Model  Accuracy  Precision    Recall  F1 Score
5            Tuned SVM  0.982456   0.972603  1.000000  0.986111
3                  SVM  0.982456   0.972603  1.000000  0.986111
0  Logistic Regression  0.973684   0.972222  0.985915  0.979021
2        Random Forest  0.964912   0.958904  0.985915  0.972222
4  Tuned Random Forest  0.964912   0.958904  0.985915  0.972222
1        Decision Tree  0.938596   0.957143  0.943662  0.950355
