## IMPORTING LIBRARIES

#### NOTE YOU WILL NEED OPENDATASETS LIBRARY TO DOWNLOAD DATA RUN THE CELL BELOW TO DOWNLOAD IT.

In [1]:
#!pip install opendatasets

In [37]:
import pandas as pd
import numpy as np
import opendatasets as od

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import DictVectorizer
from sklearn.preprocessing import MinMaxScaler

import xgboost as xgb

## DATA PREPARATION

In [5]:
dataset_url = 'https://www.kaggle.com/datasets/fedesoriano/heart-failure-prediction'
od.download('https://www.kaggle.com/datasets/fedesoriano/heart-failure-prediction')

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: ace991
Your Kaggle Key: ········
Downloading heart-failure-prediction.zip to ./heart-failure-prediction


100%|██████████| 8.56k/8.56k [00:00<00:00, 4.72MB/s]







In [6]:
df = pd.read_csv('./heart-failure-prediction/heart.csv')

In [7]:
df[df.select_dtypes(['object']).columns] = df.select_dtypes(['object']).apply(lambda x: x.astype('category'))

In [8]:
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [9]:
df_train, df_test = train_test_split(df, test_size=0.2, random_state=11)

df_train = df_train.reset_index(drop=True)
df_test = df_test.reset_index(drop=True)

y_train = df_train['HeartDisease'].values
y_test = df_test['HeartDisease'].values

del df_train['HeartDisease']
del df_test['HeartDisease']

In [10]:
columns_to_encode = ['Sex','ChestPainType','FastingBS','RestingECG','ExerciseAngina','ST_Slope']
columns_to_scale  = ['Age', 'RestingBP','Cholesterol','MaxHR','Oldpeak']

In [11]:
scaler = MinMaxScaler()
dv = DictVectorizer(sparse=False)
train_dicts = df_train[columns_to_encode].to_dict(orient='records')
scaled_columns  = scaler.fit_transform(df_train[columns_to_scale]) 
encoded_columns = dv.fit_transform(train_dicts)
X_train = np.concatenate([scaled_columns, encoded_columns], axis=1)

In [12]:
scaled_xcolumns  = scaler.transform(df_test[columns_to_scale])
test_dicts = df_test[columns_to_encode].to_dict(orient='records')
encoded_xcolumns = dv.transform(test_dicts)
X_test = np.concatenate([scaled_xcolumns, encoded_xcolumns], axis=1)

## TRAINING XGBOOST

In [13]:
model = xgb.XGBClassifier(objective='binary:logistic',eta = 0.01, max_depth = 10, n_estimators = 150, subsample = 0.3, tree_method = 'approx')
model.fit(X_train,y_train)

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False, eta=0.01,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.00999999978, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=10, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=150,
              n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, ...)

## SAVING THE MODEL USING BENTOML

In [16]:
import bentoml

In [15]:
bentoml.xgboost.save_model(
    'heart_failure_prediction',
    model,
    custom_objects={
        'dictVectorizer': dv,
        'minmaxscaler': scaler
    })

Model(tag="heart_failure_prediction:qboblida42xazgim", path="/home/akram/bentoml/models/heart_failure_prediction/qboblida42xazgim/")

## TEST

In [17]:
import json

In [89]:
request = df_test.iloc[10].to_dict()
print(json.dumps(request, indent=2))

{
  "Age": 45,
  "Sex": "F",
  "ChestPainType": "ASY",
  "RestingBP": 132,
  "Cholesterol": 297,
  "FastingBS": 0,
  "RestingECG": "Normal",
  "MaxHR": 144,
  "ExerciseAngina": "N",
  "Oldpeak": 0.0,
  "ST_Slope": "Up"
}
