In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import matthews_corrcoef



  #preprocessing
class Preprocessor:
    def __init__(self):
        pass

    def fit(self, data, numer, categ):
        
        # Filling Nun values with means in numerical columns and mode values in categorical columns
        numm = data[numer]
        cat = data[categ]

        self.imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')
        self.imp_mode = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
        self.scaler = StandardScaler()
        self.imp_mean.fit(numm)
        self.scaler.fit(numm)
        self.imp_mode.fit(cat)

    def transform(self, data, numer, categ):


        numer = data[numer]
        categ = data[categ]

        n = self.scaler.transform(self.imp_mean.transform(numer))
        c = self.imp_mode.transform(categ)
        
        final_data = pd.concat((pd.DataFrame(data = n, columns = numer.columns),
                                pd.DataFrame(data = c, columns = categ.columns)), axis=1)

        return final_data



In [2]:
data = pd.read_csv('hospital_deaths_train.csv')


target = data['In-hospital_death']
X = data.drop('In-hospital_death', axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, target)
numer = ['Age', 'Glucose_first', 'HR_first',
          'DiasABP_last', 'GCS_last', 'Glucose_last', 'HR_last', 'NIDiasABP_last', 'NIMAP_last',
          'NISysABP_last', 'Temp_last', 'DiasABP_lowest', 'GCS_lowest', 'NIDiasABP_lowest',
          'NIMAP_lowest', 'NISysABP_lowest', 'Temp_lowest', 'GCS_highest', 'Glucose_highest', 'HR_highest',
          'GCS_median', 'Glucose_median', 'HR_median', 'NIDiasABP_median', 'NIMAP_median', 'NISysABP_median',
          'Temp_median', 'BUN_first', 'Creatinine_first', 'HCO3_first', 'Lactate_first',
          'PaO2_first', 'WBC_first', 'Weight_first', 'pH_first', 'BUN_last', 'Creatinine_last', 'FiO2_last',
          'HCO3_last', 'Lactate_last', 'Mg_last',
          'Na_last', 'PaCO2_last', 'SysABP_last', 'WBC_last', 'Weight_last', 'pH_last',
          'MechVentDuration', 'UrineOutputSum']

categ = ['MechVentLast8Hour', 'CSRU']

In [3]:
prep = Preprocessor()
prep.fit(X_train, numer, categ)

In [4]:
X_train_final = prep.transform(X_train, numer, categ)
X_test_final = prep.transform(X_test, numer, categ)



In [7]:
model = keras.models.Sequential([
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


model.fit(X_train_final, y_train, epochs=10, batch_size=32)


loss, accuracy = model.evaluate(X_test_final, y_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
predictions = model.predict(X_test_final)
y_pred = [1 if pred > 0.30 else 0 for pred in predictions]



In [13]:
cm = confusion_matrix(y_test, y_pred)
Accuracy = (cm[0][0]+cm[1][1]) / (cm[0][0]+cm[1][1]+cm[0][1]+cm[1][0])
Sensitivity = cm[0][0] / (cm[0][0] + cm[0][1])
Specificity = cm[1][1] / (cm[1][1] + cm[1][0])
MCC = matthews_corrcoef(y_test, y_pred)
f1 = cm[1][1]/ (cm[1][1]+1/2*(cm[0][1]+cm[1][0]))
result = np.array(['KNN',Accuracy,Sensitivity,Specificity,MCC,f1]).reshape(1,6)
pand = pd.DataFrame(data = result, columns = ['model','Accuracy','Sensitivity','Specificity','MCC','f1'])
pand.set_index('model', inplace = True)

In [14]:
cm

array([[626,  69],
       [ 64,  54]])

In [15]:
pand

Unnamed: 0_level_0,Accuracy,Sensitivity,Specificity,MCC,f1
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
KNN,0.8364083640836408,0.9007194244604316,0.4576271186440678,0.3522569124268261,0.4481327800829875
