In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, KFold
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Load and split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features (if required)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Generalized function for model training and evaluation
def train_and_evaluate_model(model, param_grid=None, model_name="Model"):
    print(f"\n{'='*20} {model_name} {'='*20}\n")

    # Training the model
    model.fit(X_train_scaled, y_train)

    # Predictions and accuracy
    y_train_pred = model.predict(X_train_scaled)
    y_test_pred = model.predict(X_test_scaled)

    train_accuracy = accuracy_score(y_train, y_train_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    print(f"Training Accuracy: {train_accuracy:.2f}")
    print(f"Testing Accuracy: {test_accuracy:.2f}")

    # Confusion Matrix
    conf_matrix = confusion_matrix(y_test, y_test_pred)
    plt.figure(figsize=(6, 4))
    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", annot_kws={"size": 16})
    plt.title(f"Confusion Matrix - {model_name}")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()

    # Classification Report
    class_report = classification_report(y_test, y_test_pred)
    print(f"Classification Report:\n{class_report}")

    # ROC Curve and AUC
    if hasattr(model, "predict_proba"):
        y_proba = model.predict_proba(X_test_scaled)[:, 1]
        fpr, tpr, thresholds = roc_curve(y_test, y_proba)
        roc_auc = auc(fpr, tpr)
        plt.figure(figsize=(6, 4))
        plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}")
        plt.plot([0, 1], [0, 1], "k--")
        plt.title(f"ROC Curve - {model_name}")
        plt.xlabel("False Positive Rate")
        plt.ylabel("True Positive Rate")
        plt.legend()
        plt.show()

    # Cross-validation
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    cross_val_scores = cross_val_score(model, X_train_scaled, y_train, cv=kf, scoring='accuracy')
    print(f"Cross-Validation Scores: {cross_val_scores}")
    print(f"Mean Cross-Validation Accuracy: {np.mean(cross_val_scores):.2f}")

    # Grid Search (if parameter grid is provided)
    if param_grid:
        grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
        grid_search.fit(X_train_scaled, y_train)
        print(f"Best Parameters: {grid_search.best_params_}")

# Define the models and hyperparameter grids
models = {
    "Logistic Regression": (LogisticRegression(random_state=42), {
        'C': [0.001, 0.01, 0.1, 1, 10, 100],
        'max_iter': [100, 200, 300]
    }),

    "Decision Tree": (DecisionTreeClassifier(random_state=42), {
        'max_depth': [3, 5, 10, None],
        'min_samples_split': [2, 5, 10]
    }),

    "Random Forest": (RandomForestClassifier(random_state=42), {
        'n_estimators': [10, 50, 100],
        'max_depth': [3, 5, 10, None]
    })
}

# Iterate through the models and evaluate
for model_name, (model, param_grid) in models.items():
    train_and_evaluate_model(model, param_grid, model_name)
