In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
import seaborn as sns

data = pd.read_csv("heart.csv")
print("Data loaded successfully")

print("Initial Data Shape:", data.shape)

print("\nMissing Values:")
print(data.isnull().sum())

for col in data.select_dtypes(include=[np.number]).columns:
    data[col] = data[col].fillna(data[col].median())

le = LabelEncoder()
if data.select_dtypes(include=[object]).shape[1] > 0:
    for col in data.select_dtypes(include=[object]).columns:
        data[col] = le.fit_transform(data[col].astype(str))

print("\nData Preprocessing Complete")

np.random.seed(42)
data['Readmission'] = np.random.choice([0, 1], size=len(data))

X = data.drop(columns=["Readmission"])
y = data["Readmission"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
print("\nData Split into Training and Testing Sets")

models = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(),
    "XGBoost": XGBClassifier()
}

param_grids = {
    "Logistic Regression": {"C": [0.1, 1, 10]},
    "Random Forest": {"n_estimators": [100, 200], "max_depth": [10, 20]},
    "XGBoost": {"learning_rate": [0.01, 0.1], "n_estimators": [100, 200]}
}

best_models = {}

for model_name, model in models.items():
    print(f"\nTraining {model_name}...")
    grid_search = GridSearchCV(model, param_grids[model_name], cv=5, scoring="roc_auc")
    grid_search.fit(X_train, y_train)
    best_models[model_name] = grid_search.best_estimator_
    print(f"Best Parameters for {model_name}: {grid_search.best_params_}")

for model_name, model in best_models.items():
    y_pred = model.predict(X_test)
    roc_auc = roc_auc_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    print(f"\n{model_name} Evaluation:")
    print("ROC-AUC Score:", roc_auc)
    print("Confusion Matrix:\n", cm)
    print("Classification Report:\n", classification_report(y_test, y_pred))
    
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
    plt.title(f"Confusion Matrix for {model_name}")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()

print("\nTask Completed Successfully!")
