<a href="https://colab.research.google.com/github/SungjooHwang/ICTclass/blob/main/Ex03_2_Classification_HeartData_AutoML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# This script uses PyCaret, a low-code machine learning library that simplifies model training and comparison.
# PyCaret is suitable for beginners, quick prototyping, and works well in Google Colab.
# It automatically handles preprocessing, model training, cross-validation, and comparison of many algorithms.
# Ideal for tabular data and classification or regression tasks.

# PyCaret is especially useful when:
# - You want to compare many models quickly without writing separate code for each
# - You prefer automation of preprocessing and hyperparameter tuning
# - You are working in a limited environment like Google Colab or Jupyter

# If you need full control or custom model architecture (e.g., deep learning, text/image data), consider:
# - scikit-learn for customizable ML pipelines
# - XGBoost or LightGBM for high-performance gradient boosting
# - PyTorch or TensorFlow for deep learning tasks
# - Auto-sklearn or Google AutoML for advanced AutoML capabilities

In [None]:
!pip install pycaret



In [None]:
from pycaret.classification import *     # PyCaret's classification module
import pandas as pd

In [None]:
# Load the dataset
df = pd.read_csv('Ex03_2_heartData.csv')

In [None]:
# Set up the classification environment (silent option removed)
#    - data: entire dataframe
#    - target: column you want to predict (must be in df)
#    - session_id: random seed for reproducibility
#    - verbose: suppress output during setup
clf_setup = setup(
    data=df,
    target='HeartDisease',
    session_id=42,
    verbose=False  # suppress step-by-step print
)

In [None]:
# Train and compare multiple classification models
#    - Compare all available models and select the best based on default metric (e.g., Accuracy)
#    - This automatically trains multiple models and evaluates them using cross-validation.
best_model = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.7516,0.8084,0.7761,0.7809,0.7735,0.4985,0.5068,0.196
et,Extra Trees Classifier,0.7486,0.8098,0.7827,0.7713,0.7725,0.4908,0.4981,0.155
lda,Linear Discriminant Analysis,0.7351,0.7975,0.7647,0.7636,0.7579,0.4645,0.4732,0.025
ridge,Ridge Classifier,0.7317,0.8011,0.7706,0.753,0.7578,0.4563,0.4621,0.026
nb,Naive Bayes,0.7316,0.8056,0.7588,0.7581,0.7515,0.4596,0.4661,0.026
lr,Logistic Regression,0.7285,0.8093,0.789,0.7405,0.7597,0.4473,0.4552,0.792
xgboost,Extreme Gradient Boosting,0.7283,0.7959,0.7522,0.7662,0.7528,0.4499,0.4592,0.062
lightgbm,Light Gradient Boosting Machine,0.728,0.7949,0.746,0.7659,0.7509,0.4507,0.4581,0.349
gbc,Gradient Boosting Classifier,0.7211,0.7752,0.7507,0.7509,0.7467,0.4346,0.4406,0.24
knn,K Neighbors Classifier,0.7185,0.7814,0.6971,0.7696,0.7278,0.4378,0.4427,0.043


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

In [None]:
# 5. Print the selected best model
print("Best model selected by PyCaret:")
print(best_model)

Best model selected by PyCaret:
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_samples_leaf=1,
                       min_samples_split=2, min_weight_fraction_leaf=0.0,
                       monotonic_cst=None, n_estimators=100, n_jobs=-1,
                       oob_score=False, random_state=42, verbose=0,
                       warm_start=False)


In [None]:
# Display performance report with visualizations (confusion matrix, ROC, feature importance, etc.)
evaluate_model(best_model)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

In [None]:
# Make predictions using the best model
predictions = predict_model(best_model, data=df)
predictions.head()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Random Forest Classifier,0.8993,0.9667,0.9153,0.9038,0.9095,0.796,0.7961


Unnamed: 0,Age,Cholesterol,ExerciseDuration,METs,RestingHeartRate,RestingSystolic,RestingDiastolic,MaxHeartRate,PeakExSystolic,PeakExDiastolic,InducedSTDep,HeartDisease,prediction_label,prediction_score
0,0.708333,0.371508,0.486486,0.6875,0.253165,0.490741,0.583333,0.609023,0.679487,0.615385,0.410714,False,0,0.75
1,0.791667,0.519553,0.432432,0.6875,0.303797,0.62963,0.666667,0.293233,0.487179,0.615385,0.267857,True,1,0.92
2,0.791667,0.360335,0.378378,0.5,0.481013,0.259259,0.5,0.451128,0.358974,0.519231,0.464286,True,1,0.86
3,0.166667,0.418994,0.621622,0.9375,0.556962,0.351852,0.466667,0.887218,0.711538,0.403846,0.625,False,0,0.6
4,0.25,0.290503,0.297297,0.4375,0.392405,0.351852,0.6,0.774436,0.487179,0.461538,0.25,False,0,0.89


In [None]:
# Save the final trained model to disk
save_model(best_model, 'best_heart_model_PyCaret')

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=Memory(location=None),
          steps=[('numerical_imputer',
                  TransformerWrapper(exclude=None,
                                     include=['Age', 'Cholesterol',
                                              'ExerciseDuration', 'METs',
                                              'RestingHeartRate',
                                              'RestingSystolic',
                                              'RestingDiastolic', 'MaxHeartRate',
                                              'PeakExSystolic',
                                              'PeakExDiastolic',
                                              'InducedSTDep'],
                                     transformer=SimpleImputer(add_indicator=False,
                                                               copy=True,
                                                               fill_value=None,
                                                               kee...
               