In [1]:
# Import Libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import warnings
warnings.filterwarnings('ignore')

# Load Dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Split and Scale Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Multiple Models and Evaluate
models = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC(),
    "KNN": KNeighborsClassifier(),
    "Decision Tree": DecisionTreeClassifier()
}

results = []
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    results.append({
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1 Score": f1_score(y_test, y_pred)
    })

df_results = pd.DataFrame(results)
print("\n🔍 Model Evaluation Results:\n", df_results.sort_values(by="F1 Score", ascending=False))

# Hyperparameter Tuning - GridSearchCV for Random Forest
param_grid_rf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [4, 6, 8, None],
    'min_samples_split': [2, 5, 10]
}
grid_rf = GridSearchCV(RandomForestClassifier(random_state=42), param_grid_rf, cv=5, scoring='f1', n_jobs=-1)
grid_rf.fit(X_train_scaled, y_train)
best_rf = grid_rf.best_estimator_
y_pred_rf = best_rf.predict(X_test_scaled)
print("\n✅ Best Random Forest Params:", grid_rf.best_params_)
print("F1 Score (Tuned RF):", f1_score(y_test, y_pred_rf))

# Hyperparameter Tuning - RandomizedSearchCV for SVM
param_dist_svc = {
    'C': np.logspace(-3, 2, 6),
    'gamma': ['scale', 'auto'],
    'kernel': ['rbf', 'linear']
}
rand_svc = RandomizedSearchCV(SVC(), param_distributions=param_dist_svc, n_iter=10, cv=5,
                              scoring='f1', random_state=42, n_jobs=-1)
rand_svc.fit(X_train_scaled, y_train)
best_svc = rand_svc.best_estimator_
y_pred_svc = best_svc.predict(X_test_scaled)
print("\n✅ Best SVM Params:", rand_svc.best_params_)
print("F1 Score (Tuned SVM):", f1_score(y_test, y_pred_svc))

# Final Comparison
final_results = pd.DataFrame([
    {"Model": "Tuned Random Forest", "F1 Score": f1_score(y_test, y_pred_rf)},
    {"Model": "Tuned SVM", "F1 Score": f1_score(y_test, y_pred_svc)}
])
print("\n🏁 Final Tuned Model Comparison:\n", final_results.sort_values(by="F1 Score", ascending=False))



🔍 Model Evaluation Results:
                  Model  Accuracy  Precision    Recall  F1 Score
2                  SVM  0.982456   0.972603  1.000000  0.986111
0  Logistic Regression  0.973684   0.972222  0.985915  0.979021
1        Random Forest  0.956140   0.958333  0.971831  0.965035
3                  KNN  0.947368   0.957746  0.957746  0.957746
4        Decision Tree  0.938596   0.944444  0.957746  0.951049

✅ Best Random Forest Params: {'max_depth': 8, 'min_samples_split': 2, 'n_estimators': 200}
F1 Score (Tuned RF): 0.9722222222222222

✅ Best SVM Params: {'kernel': 'linear', 'gamma': 'auto', 'C': np.float64(0.1)}
F1 Score (Tuned SVM): 0.9861111111111112

🏁 Final Tuned Model Comparison:
                  Model  F1 Score
1            Tuned SVM  0.986111
0  Tuned Random Forest  0.972222
