In [2]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
import joblib

# Load the dataset
data = pd.read_csv("heart.csv")  # replace with your file path

# Separate features and target
X = data.drop('target', axis=1)
y = data['target']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define models to try
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC(probability=True)
}

best_model = None
best_score = 0
best_name = ""

# Train and evaluate each model
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    roc = roc_auc_score(y_test, model.predict_proba(X_test_scaled)[:,1])
    print(f"{name} Accuracy: {acc:.4f}, ROC-AUC: {roc:.4f}")

    if acc > best_score:
        best_score = acc
        best_model = model
        best_name = name

print(f"\nBest Model: {best_name} with Accuracy: {best_score:.4f}")

# Classification report
y_pred_best = best_model.predict(X_test_scaled)
print("\nClassification Report:")
print(classification_report(y_test, y_pred_best))

# Save the model and scaler
joblib.dump(best_model, "best_heart_disease_model.pkl")
joblib.dump(scaler, "scaler.pkl")

print("Model and scaler saved successfully!")


Logistic Regression Accuracy: 0.7951, ROC-AUC: 0.8787
Random Forest Accuracy: 0.9854, ROC-AUC: 1.0000
SVM Accuracy: 0.8878, ROC-AUC: 0.9632

Best Model: Random Forest with Accuracy: 0.9854

Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.99       102
           1       1.00      0.97      0.99       103

    accuracy                           0.99       205
   macro avg       0.99      0.99      0.99       205
weighted avg       0.99      0.99      0.99       205

Model and scaler saved successfully!
