In [7]:
#required libraries

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import pickle

In [2]:
train_data = pd.read_csv("/kaggle/input/heart-disease-prediction/train_heart.csv")
test_data = pd.read_csv("/kaggle/input/heart-disease-prediction/test_heart.csv")

train_data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [3]:
X = train_data.drop(columns=['target'])
y = train_data['target']

X_test = test_data.drop(columns=['target'])
y_test = test_data['target']

In [8]:
models_params = {
    'LogisticRegression': {
        'model': LogisticRegression(max_iter=1000, solver='liblinear'),
        'params': {'C': [0.01, 0.1, 1, 10], 'penalty': ['l1', 'l2']}
    },
    'RandomForest': {
        'model': RandomForestClassifier(random_state=42),
        'params': {'n_estimators': [50, 100], 'max_depth': [None, 10, 20], 'min_samples_split': [2, 5]}
    },
    'XGBoost': {
        'model': XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42),
        'params': {'n_estimators': [50, 100], 'max_depth': [3, 6], 'learning_rate': [0.01, 0.1]}
    }
}

In [10]:
# Train models with GridSearchCV
for model_name, mp in models_params.items():
    print(f"Training {model_name}...")
    grid = GridSearchCV(mp['model'], mp['params'], cv=5, scoring='roc_auc', n_jobs=-1)
    grid.fit(X, y)
    best_model = grid.best_estimator_
    
    y_pred = best_model.predict(X_test)
    y_prob = best_model.predict_proba(X_test)[:, 1]
    
    acc = accuracy_score(y_test, y_pred)
    roc = roc_auc_score(y_test, y_prob)
    
    print(f"Best params for {model_name}: {grid.best_params_}")
    print(f"Accuracy: {acc}")
    print(f"ROC AUC: {roc}")
    print(classification_report(y_test, y_pred))

Training LogisticRegression...
Best params for LogisticRegression: {'C': 1, 'penalty': 'l1'}
Accuracy: 0.8646864686468647
ROC AUC: 0.9245937637241985
              precision    recall  f1-score   support

           0       0.89      0.80      0.84       138
           1       0.85      0.92      0.88       165

    accuracy                           0.86       303
   macro avg       0.87      0.86      0.86       303
weighted avg       0.87      0.86      0.86       303

Training RandomForest...
Best params for RandomForest: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}
Accuracy: 1.0
ROC AUC: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       138
           1       1.00      1.00      1.00       165

    accuracy                           1.00       303
   macro avg       1.00      1.00      1.00       303
weighted avg       1.00      1.00      1.00       303

Training XGBoost...
Best params for XGBoost: {'lea

In [12]:
# Save model
with open(f'{model_name}_best_model.pkl', 'wb') as f:
    pickle.dump(best_model, f)
    
    results[model_name] = {
        'accuracy': acc,
        'roc_auc': roc,
        'best_params': grid.best_params_,
        'model': best_model
    }

In [14]:
results[model_name] = {
    'accuracy': acc,
    'roc_auc': roc,
    'best_params': grid.best_params_,
    'model': best_model
}

print("\nModel performance summary:")
for model in results:
    metrics = results[model]
    print(f"{model}: Accuracy={metrics.get('accuracy', 'N/A')}, ROC AUC={metrics.get('roc_auc', 'N/A')}")



Model performance summary:
LogisticRegression: Accuracy=N/A, ROC AUC=N/A
RandomForest: Accuracy=N/A, ROC AUC=N/A
LightGBM: Accuracy=N/A, ROC AUC=N/A
XGBoost: Accuracy=1.0, ROC AUC=1.0
