In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [2]:
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)


In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [4]:
model_params = {
    'Logistic Regression': {
        'model': LogisticRegression(max_iter=1000),
        'params': {
            'C': [0.1, 1, 10]
        }
    },
    'Decision Tree': {
        'model': DecisionTreeClassifier(),
        'params': {
            'max_depth': [3, 5, 10],
            'criterion': ['gini', 'entropy']
        }
    },
    'Random Forest': {
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators': [50, 100],
            'max_depth': [5, 10]
        }
    },
    'SVM': {
        'model': SVC(probability=True),
        'params': {
            'C': [0.1, 1],
            'kernel': ['linear', 'rbf']
        }
    },
    'Naive Bayes': {
        'model': GaussianNB(),
        'params': {}
    }
}

In [5]:
results = []

for name, mp in model_params.items():
    print(f"Tuning {name}...")
    pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('clf', mp['model'])
    ])
    
    grid = GridSearchCV(pipe, {'clf__' + k: v for k, v in mp['params'].items()},
                        cv=5, scoring='f1', n_jobs=-1)
    grid.fit(X_train, y_train)
    best_model = grid.best_estimator_
    
    y_pred = best_model.predict(X_test)
    y_proba = best_model.predict_proba(X_test)[:, 1]
    
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_proba)
    
    results.append({
        'Model': name,
        'Best Params': grid.best_params_,
        'F1-Score': f1,
        'AUC-ROC': auc
    })

Tuning Logistic Regression...
Tuning Decision Tree...
Tuning Random Forest...
Tuning SVM...
Tuning Naive Bayes...


In [7]:
leaderboard = pd.DataFrame(results)
leaderboard['F1-Rank'] = leaderboard['F1-Score'].rank(ascending=False).astype(int)
leaderboard['AUC-Rank'] = leaderboard['AUC-ROC'].rank(ascending=False).astype(int)
leaderboard['Overall Rank'] = (leaderboard['F1-Rank'] + leaderboard['AUC-Rank']).rank().astype(int)

leaderboard = leaderboard.sort_values('Overall Rank')
print("\n Final Leaderboard:")
print(leaderboard[['Model', 'F1-Score', 'AUC-ROC', 'Overall Rank']])


 Final Leaderboard:
                 Model  F1-Score   AUC-ROC  Overall Rank
0  Logistic Regression  0.979310  0.995701             1
3                  SVM  0.986111  0.993717             1
2        Random Forest  0.958333  0.992560             3
4          Naive Bayes  0.944444  0.986772             4
1        Decision Tree  0.928571  0.909226             5
