In [18]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

from sklearn import pipeline
from sklearn import compose
from sklearn import impute
from sklearn import preprocessing
import time
from sklearn import metrics

from sklearn.tree          import DecisionTreeClassifier
from sklearn.ensemble      import RandomForestClassifier
from sklearn.ensemble      import ExtraTreesClassifier
from sklearn.ensemble      import AdaBoostClassifier
from sklearn.ensemble      import GradientBoostingClassifier
from sklearn.ensemble      import HistGradientBoostingClassifier
from xgboost               import XGBClassifier
from lightgbm              import LGBMClassifier
from catboost              import CatBoostClassifier



In [19]:
df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [20]:
df.columns

Index(['age', 'sex', 'cp', 'trtbps', 'chol', 'fbs', 'restecg', 'thalachh',
       'exng', 'oldpeak', 'slp', 'caa', 'thall', 'output'],
      dtype='object')

In [21]:
X,y = df.drop(['output'], axis=1), df['output']

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [23]:
num_vars = ['age','trtbps','chol','thalachh','oldpeak']
cat_vars = ['sex', 'cp', 'fbs', 'restecg','exng','slp', 'caa', 'thall']


num_4_treeModels = pipeline.Pipeline(steps=[
    ('imputer', impute.SimpleImputer(strategy='constant', fill_value=-9999)),])
cat_4_treeModels = pipeline.Pipeline(steps=[('ordinal', preprocessing.OrdinalEncoder())])

tree_prepro = compose.ColumnTransformer(transformers=[
    ('num',  num_4_treeModels,num_vars),
    ('cat',cat_4_treeModels, cat_vars),
], remainder='drop') 

In [24]:
tree_classifiers = {
    "Decision Tree": DecisionTreeClassifier(),
    "Extra Trees": ExtraTreesClassifier(),
    "Random Forest": RandomForestClassifier(random_state=0, max_depth=4, n_estimators=200),
    "AdaBoost": AdaBoostClassifier(),
    "Skl GBM": GradientBoostingClassifier(),
    "Skl HistGBM":HistGradientBoostingClassifier(),
    "XGBoost": XGBClassifier(),
    "LightGBM": LGBMClassifier(),
    "CatBoost": CatBoostClassifier()}

tree_classifiers = {name: pipeline.make_pipeline(tree_prepro, model) for name, model in tree_classifiers.items()}

In [25]:
results = pd.DataFrame({'Model': [], 'Accuracy': [], 'Bal Acc.': [], 'Time': []})
for model_name, model in tree_classifiers.items():
    start_time = time.time()        
    model.fit(X_train,y_train)
    pred =model.predict(X_test)

    total_time = time.time() - start_time

    results = results.append({"Model":    model_name,
                            "Accuracy": round(metrics.accuracy_score(y_test, pred)*100),
                            "Bal Acc.": round(metrics.balanced_accuracy_score(y_test, pred)*100),
                            "Time":     total_time},
                            ignore_index=True)
                                
results_ord = results.sort_values(by=['Accuracy'], ascending=False, ignore_index=True)
results_ord.index += 1 
results_ord.style.bar(subset=['Accuracy', 'Bal Acc.'], vmin=0, vmax=100, color='#5fba7d')
results_ord


  results = results.append({"Model":    model_name,
  results = results.append({"Model":    model_name,
  results = results.append({"Model":    model_name,
  results = results.append({"Model":    model_name,
  results = results.append({"Model":    model_name,
  results = results.append({"Model":    model_name,
  results = results.append({"Model":    model_name,




  results = results.append({"Model":    model_name,


Learning rate set to 0.005621
0:	learn: 0.6887887	total: 1.37ms	remaining: 1.37s
1:	learn: 0.6839242	total: 2.75ms	remaining: 1.37s
2:	learn: 0.6798594	total: 4.06ms	remaining: 1.35s
3:	learn: 0.6751661	total: 5.57ms	remaining: 1.39s
4:	learn: 0.6721501	total: 7.48ms	remaining: 1.49s
5:	learn: 0.6687665	total: 9.69ms	remaining: 1.6s
6:	learn: 0.6646959	total: 11.3ms	remaining: 1.6s
7:	learn: 0.6602593	total: 12.8ms	remaining: 1.59s
8:	learn: 0.6557655	total: 14.4ms	remaining: 1.58s
9:	learn: 0.6530049	total: 15.4ms	remaining: 1.52s
10:	learn: 0.6483069	total: 16.7ms	remaining: 1.5s
11:	learn: 0.6439382	total: 17.8ms	remaining: 1.46s
12:	learn: 0.6399754	total: 19.1ms	remaining: 1.45s
13:	learn: 0.6358013	total: 20.5ms	remaining: 1.45s
14:	learn: 0.6328240	total: 22.1ms	remaining: 1.45s
15:	learn: 0.6296357	total: 23.4ms	remaining: 1.44s
16:	learn: 0.6263390	total: 24.7ms	remaining: 1.43s
17:	learn: 0.6227491	total: 26ms	remaining: 1.42s
18:	learn: 0.6205867	total: 26.6ms	remaining: 1.3

  results = results.append({"Model":    model_name,


Unnamed: 0,Model,Accuracy,Bal Acc.,Time
1,AdaBoost,90.0,90.0,0.094749
2,Extra Trees,87.0,86.0,0.148637
3,CatBoost,85.0,85.0,1.495695
4,Random Forest,84.0,83.0,0.296217
5,Skl HistGBM,84.0,83.0,0.30116
6,LightGBM,84.0,83.0,0.068817
7,Skl GBM,80.0,80.0,0.091773
8,Decision Tree,79.0,79.0,0.015985
9,XGBoost,79.0,79.0,0.117684
