In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/heart-failure-prediction/heart.csv


## About Lazy Classifier Model

In machine learning, a lazy algorithm refers to a type of learning method that defers the processing of training data until the moment it receives a prediction request. Unlike eager algorithms that proactively build a model during the training phase, lazy algorithms retain the entire training dataset and make predictions only when required. The key characteristic of lazy algorithms lies in their on-demand nature, as they don't commit to a specific model until a prediction is needed.

One of the most common examples of lazy learning is the k-Nearest Neighbors (k-NN) algorithm. In k-NN, predictions for new instances are made based on the majority class or average of the k-nearest data points in the feature space.

The advantages of lazy algorithms include simplicity and adaptability to changing data, as the model is not fixed during training. However, lazy algorithms may be computationally expensive during prediction time, as they require searching the entire training dataset for relevant instances.

In summary, lazy algorithms in machine learning prioritize flexibility by postponing model construction until prediction is required, making them suitable for scenarios where adaptability to dynamic data is crucial.

In [2]:
df=pd.read_csv('/kaggle/input/heart-failure-prediction/heart.csv')

In [3]:
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [4]:
df.shape

(918, 12)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Age             918 non-null    int64  
 1   Sex             918 non-null    object 
 2   ChestPainType   918 non-null    object 
 3   RestingBP       918 non-null    int64  
 4   Cholesterol     918 non-null    int64  
 5   FastingBS       918 non-null    int64  
 6   RestingECG      918 non-null    object 
 7   MaxHR           918 non-null    int64  
 8   ExerciseAngina  918 non-null    object 
 9   Oldpeak         918 non-null    float64
 10  ST_Slope        918 non-null    object 
 11  HeartDisease    918 non-null    int64  
dtypes: float64(1), int64(6), object(5)
memory usage: 86.2+ KB


In [6]:
# Checking any null values
df.isnull().sum()

Age               0
Sex               0
ChestPainType     0
RestingBP         0
Cholesterol       0
FastingBS         0
RestingECG        0
MaxHR             0
ExerciseAngina    0
Oldpeak           0
ST_Slope          0
HeartDisease      0
dtype: int64

In [7]:
# Checking duplicates
df.duplicated().sum()

0

In [8]:
# Show extra statistical measures
df.describe()

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease
count,918.0,918.0,918.0,918.0,918.0,918.0,918.0
mean,53.510893,132.396514,198.799564,0.233115,136.809368,0.887364,0.553377
std,9.432617,18.514154,109.384145,0.423046,25.460334,1.06657,0.497414
min,28.0,0.0,0.0,0.0,60.0,-2.6,0.0
25%,47.0,120.0,173.25,0.0,120.0,0.0,0.0
50%,54.0,130.0,223.0,0.0,138.0,0.6,1.0
75%,60.0,140.0,267.0,0.0,156.0,1.5,1.0
max,77.0,200.0,603.0,1.0,202.0,6.2,1.0


### Data pre-processing

In [9]:
from sklearn.preprocessing import LabelEncoder
lb=LabelEncoder()

In [10]:
df.dtypes=='object'

Age               False
Sex                True
ChestPainType      True
RestingBP         False
Cholesterol       False
FastingBS         False
RestingECG         True
MaxHR             False
ExerciseAngina     True
Oldpeak           False
ST_Slope           True
HeartDisease      False
dtype: bool

In [11]:
df['Sex']=lb.fit_transform(df['Sex'])
df['ChestPainType']=lb.fit_transform(df['ChestPainType'])
df['RestingECG']=lb.fit_transform(df['RestingECG'])
df['ExerciseAngina']=lb.fit_transform(df['ExerciseAngina'])
df['ST_Slope']=lb.fit_transform(df['ST_Slope'])

In [12]:
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,1,1,140,289,0,1,172,0,0.0,2,0
1,49,0,2,160,180,0,1,156,0,1.0,1,1
2,37,1,1,130,283,0,2,98,0,0.0,2,0
3,48,0,0,138,214,0,1,108,1,1.5,1,1
4,54,1,2,150,195,0,1,122,0,0.0,2,0


## Train Test Split

In [13]:
X=df.drop('HeartDisease', axis=1)
Y=df['HeartDisease']

In [14]:
print(X)

     Age  Sex  ChestPainType  RestingBP  Cholesterol  FastingBS  RestingECG  \
0     40    1              1        140          289          0           1   
1     49    0              2        160          180          0           1   
2     37    1              1        130          283          0           2   
3     48    0              0        138          214          0           1   
4     54    1              2        150          195          0           1   
..   ...  ...            ...        ...          ...        ...         ...   
913   45    1              3        110          264          0           1   
914   68    1              0        144          193          1           1   
915   57    1              0        130          131          0           1   
916   57    0              1        130          236          0           0   
917   38    1              2        138          175          0           1   

     MaxHR  ExerciseAngina  Oldpeak  ST_Slope  
0  

In [15]:
print(Y)

0      0
1      1
2      0
3      1
4      0
      ..
913    1
914    1
915    1
916    1
917    0
Name: HeartDisease, Length: 918, dtype: int64


In [16]:
from sklearn.model_selection import train_test_split

X_train,X_test,Y_train,Y_test=train_test_split(X,Y, test_size=0.2, random_state=2)

In [17]:
print(X.shape, X_train.shape, X_test.shape)

(918, 11) (734, 11) (184, 11)


### Scaling the data

In [18]:
from sklearn.preprocessing import StandardScaler

scaler=StandardScaler()

In [19]:
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [20]:
X_train

array([[-0.35508576, -1.94868818, -0.82059666, ..., -0.84081909,
        -0.82297207, -0.57689739],
       [-2.04339633,  0.51316573,  0.21967613, ..., -0.84081909,
        -0.82297207,  1.09019979],
       [ 0.80562776,  0.51316573, -0.82059666, ...,  1.18931648,
         0.7585843 , -0.57689739],
       ...,
       [-0.24956635,  0.51316573,  1.25994893, ...,  1.18931648,
         0.7585843 , -0.57689739],
       [ 0.80562776,  0.51316573,  0.21967613, ..., -0.84081909,
        -0.54387389,  1.09019979],
       [ 0.48906953,  0.51316573, -0.82059666, ..., -0.84081909,
        -0.82297207,  1.09019979]])

## Initialize and fit the LazyClassifier

In [21]:
!pip install lazypredict

Collecting lazypredict
  Obtaining dependency information for lazypredict from https://files.pythonhosted.org/packages/49/9b/323d7922bcf4a09b2b63962b021193011b94c74767b382ae681b1437c9b4/lazypredict-0.2.13-py2.py3-none-any.whl.metadata
  Downloading lazypredict-0.2.13-py2.py3-none-any.whl.metadata (12 kB)
Downloading lazypredict-0.2.13-py2.py3-none-any.whl (12 kB)
Installing collected packages: lazypredict
Successfully installed lazypredict-0.2.13


In [22]:
from lazypredict.Supervised import LazyClassifier

### Fit the LazyClassifier model

In [23]:
clf = LazyClassifier(verbose=1)
models, predictions = clf.fit(X_train, X_test, Y_train, Y_test)

  3%|▎         | 1/29 [00:00<00:05,  5.20it/s]

{'Model': 'AdaBoostClassifier', 'Accuracy': 0.8641304347826086, 'Balanced Accuracy': 0.8641632170313425, 'ROC AUC': 0.8641632170313425, 'F1 Score': 0.8641505154135709, 'Time taken': 0.19232726097106934}
{'Model': 'BaggingClassifier', 'Accuracy': 0.842391304347826, 'Balanced Accuracy': 0.8420461265523359, 'ROC AUC': 0.8420461265523359, 'F1 Score': 0.8423586702522373, 'Time taken': 0.06743478775024414}
{'Model': 'BernoulliNB', 'Accuracy': 0.8478260869565217, 'Balanced Accuracy': 0.8476641040804258, 'ROC AUC': 0.8476641040804258, 'F1 Score': 0.8478260869565217, 'Time taken': 0.025094270706176758}


 14%|█▍        | 4/29 [00:00<00:02,  8.93it/s]

{'Model': 'CalibratedClassifierCV', 'Accuracy': 0.842391304347826, 'Balanced Accuracy': 0.8413364872856298, 'ROC AUC': 0.8413364872856298, 'F1 Score': 0.8421343463439467, 'Time taken': 0.18538188934326172}
{'Model': 'DecisionTreeClassifier', 'Accuracy': 0.7608695652173914, 'Balanced Accuracy': 0.7599053814311059, 'ROC AUC': 0.7599053814311059, 'F1 Score': 0.7605862017444885, 'Time taken': 0.018985748291015625}
{'Model': 'DummyClassifier', 'Accuracy': 0.5163043478260869, 'Balanced Accuracy': 0.5, 'ROC AUC': 0.5, 'F1 Score': 0.3516051114227832, 'Time taken': 0.01587080955505371}
{'Model': 'ExtraTreeClassifier', 'Accuracy': 0.7880434782608695, 'Balanced Accuracy': 0.7876404494382023, 'ROC AUC': 0.7876404494382022, 'F1 Score': 0.787999591028871, 'Time taken': 0.016906023025512695}


 31%|███       | 9/29 [00:00<00:01, 12.71it/s]

{'Model': 'ExtraTreesClassifier', 'Accuracy': 0.842391304347826, 'Balanced Accuracy': 0.8409816676522768, 'ROC AUC': 0.8409816676522768, 'F1 Score': 0.8419655498637715, 'Time taken': 0.24490690231323242}
{'Model': 'GaussianNB', 'Accuracy': 0.8532608695652174, 'Balanced Accuracy': 0.8522176227084566, 'ROC AUC': 0.8522176227084566, 'F1 Score': 0.8530216328029848, 'Time taken': 0.020113468170166016}
{'Model': 'KNeighborsClassifier', 'Accuracy': 0.875, 'Balanced Accuracy': 0.874334713187463, 'ROC AUC': 0.874334713187463, 'F1 Score': 0.874900074019245, 'Time taken': 0.03521871566772461}


 41%|████▏     | 12/29 [00:00<00:01, 13.03it/s]

{'Model': 'LabelPropagation', 'Accuracy': 0.8478260869565217, 'Balanced Accuracy': 0.8465996451803666, 'ROC AUC': 0.8465996451803667, 'F1 Score': 0.8475010833453706, 'Time taken': 0.16287493705749512}


 59%|█████▊    | 17/29 [00:01<00:00, 13.67it/s]

{'Model': 'LabelSpreading', 'Accuracy': 0.8478260869565217, 'Balanced Accuracy': 0.8465996451803666, 'ROC AUC': 0.8465996451803667, 'F1 Score': 0.8475010833453706, 'Time taken': 0.20719122886657715}
{'Model': 'LinearDiscriminantAnalysis', 'Accuracy': 0.8695652173913043, 'Balanced Accuracy': 0.8690715552927262, 'ROC AUC': 0.8690715552927262, 'F1 Score': 0.8695034584980236, 'Time taken': 0.07202768325805664}
{'Model': 'LinearSVC', 'Accuracy': 0.8478260869565217, 'Balanced Accuracy': 0.8469544648137197, 'ROC AUC': 0.8469544648137197, 'F1 Score': 0.8476457647464928, 'Time taken': 0.05448317527770996}
{'Model': 'LogisticRegression', 'Accuracy': 0.842391304347826, 'Balanced Accuracy': 0.8413364872856298, 'ROC AUC': 0.8413364872856298, 'F1 Score': 0.8421343463439467, 'Time taken': 0.02467060089111328}
{'Model': 'NearestCentroid', 'Accuracy': 0.8478260869565217, 'Balanced Accuracy': 0.8483737433471319, 'ROC AUC': 0.8483737433471319, 'F1 Score': 0.8478620491243213, 'Time taken': 0.0164735317230

 72%|███████▏  | 21/29 [00:01<00:00, 17.87it/s]

{'Model': 'NuSVC', 'Accuracy': 0.8532608695652174, 'Balanced Accuracy': 0.8511531638083973, 'ROC AUC': 0.8511531638083975, 'F1 Score': 0.8524433935503425, 'Time taken': 0.04469180107116699}
{'Model': 'PassiveAggressiveClassifier', 'Accuracy': 0.8315217391304348, 'Balanced Accuracy': 0.8301005322294501, 'ROC AUC': 0.8301005322294501, 'F1 Score': 0.8310666222681696, 'Time taken': 0.021055221557617188}
{'Model': 'Perceptron', 'Accuracy': 0.8206521739130435, 'Balanced Accuracy': 0.8202838557066825, 'ROC AUC': 0.8202838557066824, 'F1 Score': 0.820615038562891, 'Time taken': 0.01764059066772461}
{'Model': 'QuadraticDiscriminantAnalysis', 'Accuracy': 0.8586956521739131, 'Balanced Accuracy': 0.8574807806031934, 'ROC AUC': 0.8574807806031934, 'F1 Score': 0.8583938631064155, 'Time taken': 0.037122249603271484}


 83%|████████▎ | 24/29 [00:01<00:00, 12.33it/s]

{'Model': 'RandomForestClassifier', 'Accuracy': 0.842391304347826, 'Balanced Accuracy': 0.8413364872856298, 'ROC AUC': 0.8413364872856298, 'F1 Score': 0.8421343463439467, 'Time taken': 0.3591439723968506}
{'Model': 'RidgeClassifier', 'Accuracy': 0.8695652173913043, 'Balanced Accuracy': 0.8690715552927262, 'ROC AUC': 0.8690715552927262, 'F1 Score': 0.8695034584980236, 'Time taken': 0.03811764717102051}
{'Model': 'RidgeClassifierCV', 'Accuracy': 0.8695652173913043, 'Balanced Accuracy': 0.8690715552927262, 'ROC AUC': 0.8690715552927262, 'F1 Score': 0.8695034584980236, 'Time taken': 0.019269466400146484}
{'Model': 'SGDClassifier', 'Accuracy': 0.8369565217391305, 'Balanced Accuracy': 0.836782968657599, 'ROC AUC': 0.836782968657599, 'F1 Score': 0.8369565217391305, 'Time taken': 0.02158498764038086}
{'Model': 'SVC', 'Accuracy': 0.842391304347826, 'Balanced Accuracy': 0.8413364872856298, 'ROC AUC': 0.8413364872856298, 'F1 Score': 0.8421343463439467, 'Time taken': 0.03410458564758301}


 97%|█████████▋| 28/29 [00:02<00:00, 14.23it/s]

{'Model': 'XGBClassifier', 'Accuracy': 0.842391304347826, 'Balanced Accuracy': 0.8416913069189829, 'ROC AUC': 0.8416913069189829, 'F1 Score': 0.8422653107199176, 'Time taken': 0.14821648597717285}


100%|██████████| 29/29 [00:02<00:00,  9.87it/s]

{'Model': 'LGBMClassifier', 'Accuracy': 0.875, 'Balanced Accuracy': 0.8732702542874039, 'ROC AUC': 0.8732702542874038, 'F1 Score': 0.8744982307989652, 'Time taken': 0.7994582653045654}





### Printing the models

In [24]:
print(models)

                               Accuracy  Balanced Accuracy  ROC AUC  F1 Score  \
Model                                                                           
KNeighborsClassifier               0.88               0.87     0.87      0.87   
LGBMClassifier                     0.88               0.87     0.87      0.87   
RidgeClassifierCV                  0.87               0.87     0.87      0.87   
RidgeClassifier                    0.87               0.87     0.87      0.87   
LinearDiscriminantAnalysis         0.87               0.87     0.87      0.87   
AdaBoostClassifier                 0.86               0.86     0.86      0.86   
QuadraticDiscriminantAnalysis      0.86               0.86     0.86      0.86   
GaussianNB                         0.85               0.85     0.85      0.85   
NuSVC                              0.85               0.85     0.85      0.85   
NearestCentroid                    0.85               0.85     0.85      0.85   
BernoulliNB                 