# Import Needed Libraries

##### LazyPredict is a Python library that helps you to partially automate the process of selecting the best algorithm to train your dataset. By supplying your data, LazyPredict would use more than 60 ML algorithms to train a model. And the end result would be presented to you

In [1]:
!pip install lazypredict

Collecting lazypredict
  Downloading lazypredict-0.2.12-py2.py3-none-any.whl (12 kB)
Installing collected packages: lazypredict
Successfully installed lazypredict-0.2.12


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from lazypredict.Supervised import LazyClassifier

# EDA

In [3]:
df = pd.read_csv('/kaggle/input/heart-failure-prediction/heart.csv')
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


# Preprocessing

### Define features X and Target y

In [4]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
X

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
0,40,M,ATA,140,289,0,Normal,172,N,0.00,Up
1,49,F,NAP,160,180,0,Normal,156,N,1.00,Flat
2,37,M,ATA,130,283,0,ST,98,N,0.00,Up
3,48,F,ASY,138,214,0,Normal,108,Y,1.50,Flat
4,54,M,NAP,150,195,0,Normal,122,N,0.00,Up
...,...,...,...,...,...,...,...,...,...,...,...
913,45,M,TA,110,264,0,Normal,132,N,1.20,Flat
914,68,M,ASY,144,193,1,Normal,141,N,3.40,Flat
915,57,M,ASY,130,131,0,Normal,115,Y,1.20,Flat
916,57,F,ATA,130,236,0,LVH,174,N,0.00,Flat


In [5]:
y

0      0
1      1
2      0
3      1
4      0
      ..
913    1
914    1
915    1
916    1
917    0
Name: HeartDisease, Length: 918, dtype: int64

### Encode string columns

In [6]:
X = X.apply(LabelEncoder().fit_transform)
X

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
0,12,1,1,41,147,0,1,98,0,10,2
1,21,0,2,55,40,0,1,82,0,20,1
2,9,1,1,31,141,0,2,25,0,10,2
3,20,0,0,39,72,0,1,34,1,25,1
4,26,1,2,49,53,0,1,48,0,10,2
...,...,...,...,...,...,...,...,...,...,...,...
913,17,1,3,14,122,0,1,58,0,22,1
914,40,1,0,45,51,1,1,67,0,42,1
915,29,1,0,31,9,0,1,41,1,22,1
916,29,0,1,31,94,0,0,100,0,10,1


### Scaling Data

In [7]:
StandardScalerModel = StandardScaler()
X = StandardScalerModel.fit_transform(X)
X

array([[-1.4331398 ,  0.51595242,  0.22903206, ..., -0.8235563 ,
        -0.87246276,  1.05211381],
       [-0.47848359, -1.93816322,  1.27505906, ..., -0.8235563 ,
         0.12037326, -0.59607813],
       [-1.75135854,  0.51595242,  0.22903206, ..., -0.8235563 ,
        -0.87246276,  1.05211381],
       ...,
       [ 0.37009972,  0.51595242, -0.81699495, ...,  1.21424608,
         0.31894046, -0.59607813],
       [ 0.37009972, -1.93816322,  0.22903206, ..., -0.8235563 ,
        -0.87246276, -0.59607813],
       [-1.64528563,  0.51595242,  1.27505906, ..., -0.8235563 ,
        -0.87246276,  1.05211381]])

### Split data into train and test

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, shuffle=True)

In [9]:
X_train.shape

(734, 11)

In [10]:
X_test.shape

(184, 11)

# Create Model

In [11]:
clf = LazyClassifier(verbose=1)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)

 14%|█▍        | 4/29 [00:00<00:01, 14.08it/s]

{'Model': 'AdaBoostClassifier', 'Accuracy': 0.8369565217391305, 'Balanced Accuracy': 0.827042116761743, 'ROC AUC': 0.8270421167617429, 'F1 Score': 0.8358749065270805, 'Time taken': 0.11402320861816406}
{'Model': 'BaggingClassifier', 'Accuracy': 0.842391304347826, 'Balanced Accuracy': 0.8408180604442287, 'ROC AUC': 0.8408180604442287, 'F1 Score': 0.8427766789470299, 'Time taken': 0.04504704475402832}
{'Model': 'BernoulliNB', 'Accuracy': 0.8152173913043478, 'Balanced Accuracy': 0.8119917465711859, 'ROC AUC': 0.8119917465711858, 'F1 Score': 0.81553027912749, 'Time taken': 0.020058870315551758}
{'Model': 'CalibratedClassifierCV', 'Accuracy': 0.8315217391304348, 'Balanced Accuracy': 0.8223692195654813, 'ROC AUC': 0.8223692195654811, 'F1 Score': 0.8306181904910375, 'Time taken': 0.11676549911499023}


 31%|███       | 9/29 [00:00<00:01, 19.42it/s]

{'Model': 'DecisionTreeClassifier', 'Accuracy': 0.7663043478260869, 'Balanced Accuracy': 0.7699356718048307, 'ROC AUC': 0.7699356718048307, 'F1 Score': 0.7677625472977796, 'Time taken': 0.015985488891601562}
{'Model': 'DummyClassifier', 'Accuracy': 0.5815217391304348, 'Balanced Accuracy': 0.5, 'ROC AUC': 0.5, 'F1 Score': 0.4276482892574332, 'Time taken': 0.011894464492797852}
{'Model': 'ExtraTreeClassifier', 'Accuracy': 0.8315217391304348, 'Balanced Accuracy': 0.8296516567544605, 'ROC AUC': 0.8296516567544603, 'F1 Score': 0.8319336912882044, 'Time taken': 0.011736154556274414}
{'Model': 'ExtraTreesClassifier', 'Accuracy': 0.8641304347826086, 'Balanced Accuracy': 0.8558684306347858, 'ROC AUC': 0.8558684306347858, 'F1 Score': 0.8634017665250302, 'Time taken': 0.15875768661499023}
{'Model': 'GaussianNB', 'Accuracy': 0.8260869565217391, 'Balanced Accuracy': 0.8213375409637091, 'ROC AUC': 0.8213375409637091, 'F1 Score': 0.8260869565217391, 'Time taken': 0.016239404678344727}


 41%|████▏     | 12/29 [00:00<00:00, 19.43it/s]

{'Model': 'KNeighborsClassifier', 'Accuracy': 0.8532608695652174, 'Balanced Accuracy': 0.850163854836752, 'ROC AUC': 0.850163854836752, 'F1 Score': 0.8533897635462142, 'Time taken': 0.03380942344665527}
{'Model': 'LabelPropagation', 'Accuracy': 0.8206521739130435, 'Balanced Accuracy': 0.8166646437674475, 'ROC AUC': 0.8166646437674474, 'F1 Score': 0.8208097110009284, 'Time taken': 0.10285067558288574}


 62%|██████▏   | 18/29 [00:00<00:00, 19.74it/s]

{'Model': 'LabelSpreading', 'Accuracy': 0.8206521739130435, 'Balanced Accuracy': 0.8166646437674475, 'ROC AUC': 0.8166646437674474, 'F1 Score': 0.8208097110009284, 'Time taken': 0.12416195869445801}
{'Model': 'LinearDiscriminantAnalysis', 'Accuracy': 0.8369565217391305, 'Balanced Accuracy': 0.8306833353562326, 'ROC AUC': 0.8306833353562325, 'F1 Score': 0.8366386981947622, 'Time taken': 0.06966042518615723}
{'Model': 'LinearSVC', 'Accuracy': 0.8260869565217391, 'Balanced Accuracy': 0.8176963223692195, 'ROC AUC': 0.8176963223692195, 'F1 Score': 0.8253636484510037, 'Time taken': 0.04872560501098633}
{'Model': 'LogisticRegression', 'Accuracy': 0.8315217391304348, 'Balanced Accuracy': 0.8241898288627261, 'ROC AUC': 0.8241898288627261, 'F1 Score': 0.8310127102957509, 'Time taken': 0.01883697509765625}
{'Model': 'NearestCentroid', 'Accuracy': 0.8315217391304348, 'Balanced Accuracy': 0.835113484646195, 'ROC AUC': 0.8351134846461951, 'F1 Score': 0.8324746761738089, 'Time taken': 0.0191743373870

 97%|█████████▋| 28/29 [00:01<00:00, 22.50it/s]

{'Model': 'RandomForestClassifier', 'Accuracy': 0.842391304347826, 'Balanced Accuracy': 0.8335356232552494, 'ROC AUC': 0.8335356232552494, 'F1 Score': 0.8415460491690352, 'Time taken': 0.19326329231262207}
{'Model': 'RidgeClassifier', 'Accuracy': 0.8369565217391305, 'Balanced Accuracy': 0.8306833353562326, 'ROC AUC': 0.8306833353562325, 'F1 Score': 0.8366386981947622, 'Time taken': 0.027472972869873047}
{'Model': 'RidgeClassifierCV', 'Accuracy': 0.8315217391304348, 'Balanced Accuracy': 0.8260104381599709, 'ROC AUC': 0.8260104381599709, 'F1 Score': 0.8313629664875727, 'Time taken': 0.01352238655090332}
{'Model': 'SGDClassifier', 'Accuracy': 0.8097826086956522, 'Balanced Accuracy': 0.8036776307804345, 'ROC AUC': 0.8036776307804345, 'F1 Score': 0.8096033492601624, 'Time taken': 0.014966011047363281}
{'Model': 'SVC', 'Accuracy': 0.8586956521739131, 'Balanced Accuracy': 0.8511955334385242, 'ROC AUC': 0.8511955334385242, 'F1 Score': 0.8581079643664407, 'Time taken': 0.02370476722717285}
{'Mo

100%|██████████| 29/29 [00:02<00:00, 14.29it/s]

{'Model': 'LGBMClassifier', 'Accuracy': 0.8532608695652174, 'Balanced Accuracy': 0.8483432455395072, 'ROC AUC': 0.8483432455395071, 'F1 Score': 0.8531225837149826, 'Time taken': 0.5981218814849854}





In [12]:
score = pd.DataFrame(models)
score

Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ExtraTreesClassifier,0.86,0.86,0.86,0.86,0.16
SVC,0.86,0.85,0.85,0.86,0.02
KNeighborsClassifier,0.85,0.85,0.85,0.85,0.03
LGBMClassifier,0.85,0.85,0.85,0.85,0.6
BaggingClassifier,0.84,0.84,0.84,0.84,0.05
XGBClassifier,0.84,0.84,0.84,0.84,0.11
NearestCentroid,0.83,0.84,0.84,0.83,0.02
RandomForestClassifier,0.84,0.83,0.83,0.84,0.19
LinearDiscriminantAnalysis,0.84,0.83,0.83,0.84,0.07
RidgeClassifier,0.84,0.83,0.83,0.84,0.03
