**Heart Failure Prediction**

*Predicting heart failure in hospital ICU (Intensive car Unit)*

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import math

import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/in-hospital-mortality-prediction/data01.csv


In [2]:
#It will take 2-3 mins to install this framework...
!pip install pycaret --ignore-installed llvmlite

Collecting pycaret
  Downloading pycaret-2.3.5-py3-none-any.whl (288 kB)
     |████████████████████████████████| 288 kB 4.9 MB/s            
[?25hCollecting llvmlite
  Downloading llvmlite-0.37.0-cp37-cp37m-manylinux2014_x86_64.whl (26.3 MB)
     |████████████████████████████████| 26.3 MB 66.7 MB/s            
[?25hCollecting wordcloud
  Downloading wordcloud-1.8.1-cp37-cp37m-manylinux1_x86_64.whl (366 kB)
     |████████████████████████████████| 366 kB 46.3 MB/s            
[?25hCollecting pyod
  Downloading pyod-0.9.5.tar.gz (113 kB)
     |████████████████████████████████| 113 kB 40.8 MB/s            
[?25h  Preparing metadata (setup.py) ... [?25l- done
[?25hCollecting mlxtend>=0.17.0
  Downloading mlxtend-0.19.0-py2.py3-none-any.whl (1.3 MB)
     |████████████████████████████████| 1.3 MB 33.9 MB/s            
[?25hCollecting textblob
  Downloading textblob-0.17.1-py2.py3-none-any.whl (636 kB)
     |████████████████████████████████| 636 kB 65.9 MB/s         

In [3]:
# Read CSV file into DataFrame df
df = pd.read_csv('/kaggle/input/in-hospital-mortality-prediction/data01.csv', index_col=0)

In [4]:
# One outcome value to nan: to delete
# drop rows
# from dataFrame where outcome is nan
df = df.dropna(subset=['outcome'])
df = df.reset_index(drop=True)
df.head()

Unnamed: 0,ID,outcome,age,gendera,BMI,hypertensive,atrialfibrillation,CHD with no MI,diabetes,deficiencyanemias,...,Blood sodium,Blood calcium,Chloride,Anion gap,Magnesium ion,PH,Bicarbonate,Lactic acid,PCO2,EF
0,125047,0.0,72,1,37.588179,0,0,0,1,1,...,138.75,7.463636,109.166667,13.166667,2.618182,7.23,21.166667,0.5,40.0,55
1,139812,0.0,75,2,,0,0,0,0,1,...,138.888889,8.1625,98.444444,11.444444,1.8875,7.225,33.444444,0.5,78.0,55
2,109787,0.0,83,2,26.572634,0,0,0,0,1,...,140.714286,8.266667,105.857143,10.0,2.157143,7.268,30.571429,0.5,71.5,35
3,130587,0.0,43,2,83.264629,0,0,0,0,0,...,138.5,9.476923,92.071429,12.357143,1.942857,7.37,38.571429,0.6,75.0,55
4,138290,0.0,75,2,31.824842,1,0,0,0,1,...,136.666667,8.733333,104.5,15.166667,1.65,7.25,22.0,0.6,50.0,55


In [5]:
# Preprocessing

features = df.iloc[:, 2:]

# Eliminate Nan values
from sklearn.impute import KNNImputer
imputer = KNNImputer(n_neighbors=2, weights="uniform")
featurespp = imputer.fit_transform(features)

In [6]:
col = ['age', 'gendera', 'BMI', 'hypertensive',
       'atrialfibrillation', 'CHD with no MI', 'diabetes', 'deficiencyanemias',
       'depression', 'Hyperlipemia', 'Renal failure', 'COPD', 'heart rate',
       'Systolic blood pressure', 'Diastolic blood pressure',
       'Respiratory rate', 'temperature', 'SP O2', 'Urine output',
       'hematocrit', 'RBC', 'MCH', 'MCHC', 'MCV', 'RDW', 'Leucocyte',
       'Platelets', 'Neutrophils', 'Basophils', 'Lymphocyte', 'PT', 'INR',
       'NT-proBNP', 'Creatine kinase', 'Creatinine', 'Urea nitrogen',
       'glucose', 'Blood potassium', 'Blood sodium', 'Blood calcium',
       'Chloride', 'Anion gap', 'Magnesium ion', 'PH', 'Bicarbonate',
       'Lactic acid', 'PCO2', 'EF']

dfpp = pd.DataFrame(data = featurespp, columns=col )
dfpp.loc[:, 'outcome'] = df['outcome']
dfpp.head()

Unnamed: 0,age,gendera,BMI,hypertensive,atrialfibrillation,CHD with no MI,diabetes,deficiencyanemias,depression,Hyperlipemia,...,Blood calcium,Chloride,Anion gap,Magnesium ion,PH,Bicarbonate,Lactic acid,PCO2,EF,outcome
0,72.0,1.0,37.588179,0.0,0.0,0.0,1.0,1.0,0.0,1.0,...,7.463636,109.166667,13.166667,2.618182,7.23,21.166667,0.5,40.0,55.0,0.0
1,75.0,2.0,24.005318,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,8.1625,98.444444,11.444444,1.8875,7.225,33.444444,0.5,78.0,55.0,0.0
2,83.0,2.0,26.572634,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,8.266667,105.857143,10.0,2.157143,7.268,30.571429,0.5,71.5,35.0,0.0
3,43.0,2.0,83.264629,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,9.476923,92.071429,12.357143,1.942857,7.37,38.571429,0.6,75.0,55.0,0.0
4,75.0,2.0,31.824842,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,8.733333,104.5,15.166667,1.65,7.25,22.0,0.6,50.0,55.0,0.0


In [7]:
# initialize setup
from pycaret.classification import *

In [8]:
'''setup()-function initializes the environment in pycaret and creates the transformation pipeline 
to prepare the data for modeling and deployment.'''
s = setup(data = dfpp, target = 'outcome', session_id=123, numeric_imputation = 'mean',
            silent = True,
            remove_outliers = True)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,outcome
2,Target Type,Binary
3,Label Encoded,"0.0: 0, 1.0: 1"
4,Original Data,"(1176, 49)"
5,Missing Values,False
6,Numeric Features,38
7,Categorical Features,10
8,Ordinal Features,False
9,High Cardinality Features,False


In [9]:
#Compare with all the classification models
'''Compare_model()-It is a function that actually allows to you create a model is unimaginatively called create_model(). 
This function creates a model and scores it using stratified cross validation'''
best_model = compare_models(round = 2)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ridge,Ridge Classifier,0.9,0.0,0.08,0.55,0.14,0.12,0.19,0.01
lr,Logistic Regression,0.89,0.75,0.13,0.32,0.17,0.14,0.17,0.6
rf,Random Forest Classifier,0.89,0.78,0.01,0.1,0.02,0.02,0.03,0.35
et,Extra Trees Classifier,0.89,0.78,0.0,0.0,0.0,0.0,0.0,0.29
xgboost,Extreme Gradient Boosting,0.89,0.77,0.09,0.36,0.14,0.11,0.14,14.36
lightgbm,Light Gradient Boosting Machine,0.89,0.77,0.13,0.43,0.19,0.16,0.19,0.25
catboost,CatBoost Classifier,0.89,0.78,0.03,0.2,0.06,0.05,0.08,6.92
dummy,Dummy Classifier,0.89,0.5,0.0,0.0,0.0,0.0,0.0,0.01
knn,K Neighbors Classifier,0.88,0.62,0.06,0.3,0.1,0.06,0.09,0.07
ada,Ada Boost Classifier,0.88,0.71,0.21,0.35,0.25,0.2,0.21,0.11
