# Build a ML Classification Model in 12 Lines with PyCaret

PyCaret is the core auto machine learning library that will be used for classification  
Pandas will be used for loading CSV data into a dataframe  
Shap assists with interpreting PyCaret model results  

In [1]:
# !pip install pycaret pandas shap
import pandas as pd
from pycaret.classification import *

# Load Data
source: https://www.youtube.com/watch?v=sL-4rWuEiVw

In [2]:
df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
df.dtypes

age           int64
sex           int64
cp            int64
trestbps      int64
chol          int64
fbs           int64
restecg       int64
thalach       int64
exang         int64
oldpeak     float64
slope         int64
ca            int64
thal          int64
target        int64
dtype: object

# Train and Evaluate Model

In [4]:
cat_features = ['sex','cp','fbs','restecg','exang','thal']

__setup()__: The setup function is from PyCaret, it initiates the ml experiment & sets up the training pipelines. 
There are a number of additional paramenters that can be set for the experiment within the function.

In [5]:
experiment = setup(df, target = 'target', categorical_features=cat_features)

Unnamed: 0,Description,Value
0,session_id,7247
1,Target,target
2,Target Type,Binary
3,Label Encoded,"0: 0, 1: 1"
4,Original Data,"(303, 14)"
5,Missing Values,False
6,Numeric Features,5
7,Categorical Features,8
8,Ordinal Features,False
9,High Cardinality Features,False


In [6]:
best_model = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ridge,Ridge Classifier,0.8297,0.0,0.8936,0.8314,0.8578,0.645,0.6578,0.012
lda,Linear Discriminant Analysis,0.8249,0.889,0.8936,0.8248,0.8544,0.6341,0.6474,0.013
lr,Logistic Regression,0.816,0.891,0.8705,0.8298,0.844,0.6184,0.6335,0.393
lightgbm,Light Gradient Boosting Machine,0.8115,0.881,0.8705,0.8229,0.8407,0.6091,0.6256,0.102
rf,Random Forest Classifier,0.7877,0.8849,0.8218,0.8236,0.814,0.5643,0.5817,0.14
ada,Ada Boost Classifier,0.7877,0.8167,0.8545,0.8079,0.8248,0.5524,0.5671,0.058
et,Extra Trees Classifier,0.7874,0.8893,0.8378,0.8087,0.8177,0.5617,0.5753,0.121
gbc,Gradient Boosting Classifier,0.7829,0.8612,0.8301,0.8102,0.8134,0.5525,0.5671,0.055
xgboost,Extreme Gradient Boosting,0.7788,0.8604,0.8308,0.8035,0.8079,0.5447,0.5642,12.204
dt,Decision Tree Classifier,0.7039,0.6948,0.7494,0.7431,0.7423,0.3917,0.397,0.012


# Test Model

In [7]:
predict_model(best_model)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Ridge Classifier,0.8901,0.8963,0.9762,0.82,0.8913,0.7819,0.7941


Unnamed: 0,age,trestbps,chol,thalach,oldpeak,sex_0,cp_0,cp_1,cp_2,cp_3,...,ca_1,ca_2,ca_3,ca_4,thal_0,thal_1,thal_2,thal_3,target,Label
0,54.0,124.0,266.0,109.0,2.2,0.0,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0
1,56.0,120.0,193.0,162.0,1.9,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1,1
2,59.0,170.0,288.0,159.0,0.2,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0,1
3,55.0,128.0,205.0,130.0,2.0,1.0,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0
4,58.0,128.0,259.0,130.0,3.0,0.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,45.0,138.0,236.0,152.0,0.2,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1,1
87,66.0,160.0,246.0,120.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0,0
88,44.0,120.0,220.0,170.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1,1
89,38.0,138.0,175.0,173.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1,1


In [8]:
predict_model(best_model, df.tail())

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target,Label
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0,1
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0,1
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0,0
302,57,0,1,130,236,0,0,174,0,0.0,1,1,2,0,1


# Save and Load Model

In [9]:
save_model(best_model, model_name = 'ridge-model')

Transformation Pipeline and Model Succesfully Saved


(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=['sex', 'cp', 'fbs',
                                                             'restecg', 'exang',
                                                             'thal'],
                                       display_types=True, features_todrop=[],
                                       id_columns=[],
                                       ml_usecase='classification',
                                       numerical_features=[], target='target',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=Non...
                 ('fix_perfect', Remove_100(target='target')),
                 ('clean_names', Clean_Colum_Names()),
                 ('feature_select', 'passthrough'), ('fix_multi', 'passthrough'),
               

In [11]:
model = load_model('ridge-model')

Transformation Pipeline and Model Successfully Loaded


In [12]:
model.predict(df.tail())

array([1, 1, 0, 0, 1])