In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report
)
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

# Dummy dataset
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define classifiers
classifiers = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(),
    "Support Vector Machine": SVC(probability=True),
    "Decision Tree": DecisionTreeClassifier(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
}

# Initialize a DataFrame to store results
results = pd.DataFrame(columns=["Classifier", "Accuracy", "Precision", "Recall", "F1 Score", "ROC-AUC"])

# Loop through classifiers and evaluate
# Initialize a list to collect results
results_list = []

# Loop through classifiers and evaluate
for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    # y_proba = clf.predict_proba(X_test)[:, 1] if hasattr(clf, "predict_proba") else None
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    # roc_auc = roc_auc_score(y_test, y_proba) if y_proba is not None else np.nan
    
    # Collect results as a dictionary
    results_list.append({
        "Classifier": name,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1,
        # "ROC-AUC": roc_auc,
    })

# Convert the list of dictionaries into a DataFrame
results = pd.DataFrame(results_list)

# Display results
print(results.sort_values(by="Accuracy", ascending=False))

# Optional: Detailed classification report for each classifier
for name, clf in classifiers.items():
    y_pred = clf.predict(X_test)
    print(f"\n{name} Classification Report:\n")
    print(classification_report(y_test, y_pred))


               Classifier  Accuracy  Precision    Recall  F1 Score
1           Random Forest     0.900   0.939394  0.869159  0.902913
3           Decision Tree     0.865   0.908163  0.831776  0.868293
0     Logistic Regression     0.855   0.914894  0.803738  0.855721
2  Support Vector Machine     0.845   0.895833  0.803738  0.847291
4     K-Nearest Neighbors     0.810   0.879121  0.747664  0.808081

Logistic Regression Classification Report:

              precision    recall  f1-score   support

           0       0.80      0.91      0.85        93
           1       0.91      0.80      0.86       107

    accuracy                           0.85       200
   macro avg       0.86      0.86      0.85       200
weighted avg       0.86      0.85      0.86       200


Random Forest Classification Report:

              precision    recall  f1-score   support

           0       0.86      0.94      0.90        93
           1       0.94      0.87      0.90       107

    accuracy           