# Hospital

In [1]:
from keras.layers import Dense, Dropout, Input, Conv2D, MaxPooling2D, Flatten
from keras.models import Sequential, Model
import keras
import numpy as np
from keras import optimizers
from sklearn.model_selection import train_test_split
from keras.callbacks import CSVLogger
import tensorflow as tf
import innvestigate
import innvestigate.utils as iutils
import pandas as pd
import matplotlib.pyplot as plt
import os
import time
from collections import namedtuple

Using TensorFlow backend.


In [2]:
import setGPU

setGPU: Setting GPU to: 5


In [3]:
sess = tf.InteractiveSession()

In [4]:
random_state = 42

In [5]:
path_to_data = r'' + '../data/'

### 1 Loading hospital

In [6]:
def loading_hospital(path_to_data, random_state):
    df = pd.read_csv( path_to_data + "/hospital/hospital")

    x = df.loc[:, ['time_in_hospital', 'num_lab_procedures', 'num_procedures', 'num_medications', 'number_outpatient',
                   'number_emergency', 'number_inpatient', 'number_diagnoses']]

    categorical_var_names = ['gender', 'race', 'age', 'discharge_disposition_id', 'max_glu_serum', 'A1Cresult',
                             'metformin', 'repaglinide', 'nateglinide', 'chlorpropamide', 'glimepiride',
                             'acetohexamide', 'glipizide', 'glyburide', 'tolbutamide', 'pioglitazone', 'rosiglitazone',
                             'acarbose', 'miglitol', 'troglitazone', 'tolazamide', 'examide', 'citoglipton', 'insulin',
                             'glyburide-metformin', 'glipizide-metformin', 'glimepiride-pioglitazone',
                             'metformin-rosiglitazone', 'metformin-pioglitazone', 'change', 'diabetesMed']

    for categorical_var_name in categorical_var_names:
        categorical_var = pd.Categorical(
            df.loc[:, categorical_var_name])
        # Just have one dummy variable if it's boolean
        if len(categorical_var.categories) == 2:
            drop_first = True
        else:
            drop_first = False

        dummies = pd.get_dummies(
            categorical_var,
            prefix=categorical_var_name,
            drop_first=drop_first)

        x = pd.concat([x, dummies], axis=1)

    # Set the Y labels
    readmitted = pd.Categorical(df.readmitted)
    y = np.copy(readmitted.codes)
    # Combine >30 and 0 and flip labels, so 1 (>30) and 2 (No) become 0, while 0 becomes 1
    y[y >= 1] = -1
    y[y == 0] = 1
    X_temp, y_temp = x, y
    y_temp =  keras.utils.to_categorical(y_temp, 2)
    X_temp = np.asarray(X_temp)
    return X_temp, y_temp

In [7]:
def load_hospital():
    df = pd.read_csv( '../data2' + "/hospital/hospital")

    x = df.loc[:, ['time_in_hospital', 'num_lab_procedures', 'num_procedures', 'num_medications', 'number_outpatient',
                   'number_emergency', 'number_inpatient', 'number_diagnoses']]

    categorical_var_names = ['gender', 'race', 'age', 'discharge_disposition_id', 'max_glu_serum', 'A1Cresult',
                             'metformin', 'repaglinide', 'nateglinide', 'chlorpropamide', 'glimepiride',
                             'acetohexamide', 'glipizide', 'glyburide', 'tolbutamide', 'pioglitazone', 'rosiglitazone',
                             'acarbose', 'miglitol', 'troglitazone', 'tolazamide', 'examide', 'citoglipton', 'insulin',
                             'glyburide-metformin', 'glipizide-metformin', 'glimepiride-pioglitazone',
                             'metformin-rosiglitazone', 'metformin-pioglitazone', 'change', 'diabetesMed']

    for categorical_var_name in categorical_var_names:
        categorical_var = pd.Categorical(
            df.loc[:, categorical_var_name])
        # Just have one dummy variable if it's boolean
        if len(categorical_var.categories) == 2:
            drop_first = True
        else:
            drop_first = False

        dummies = pd.get_dummies(
            categorical_var,
            prefix=categorical_var_name,
            drop_first=drop_first)

        x = pd.concat([x, dummies], axis=1)

    # Set the Y labels
    readmitted = pd.Categorical(df.readmitted)
    y = np.copy(readmitted.codes)
    # Combine >30 and 0 and flip labels, so 1 (>30) and 2 (No) become 0, while 0 becomes 1
    y[y >= 1] = -1
    y[y == 0] = 1
    return x, y

In [None]:
X_temp, y_temp = loading_hospital(path_to_data, random_state) 

### Training the models

In [None]:
def make_categorical(y, categorical, num_classes):
    if categorical:
        return keras.utils.to_categorical(y, num_classes)
    else:
        return y.astype(int)


In [None]:
def create_hospital_base():
    # batch_size = 512
    # epochs = 50
    # learning_rate = 0.01
    model = Sequential()
    model.add(Dense(1024, activation='tanh', name='base_dense_1',
                    kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None),
                    bias_initializer='zeros'))
    model.add(Dropout(0, name='dropout_1'))
    model.add(Dense(512, activation='tanh', name='base_dense_2',
                    kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None),
                    bias_initializer='zeros'))
    model.add(Dropout(0, name='dropout_2'))
    model.add(Dense(256, activation='tanh', name='base_dense_3',
                    kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None),
                    bias_initializer='zeros'))
    model.add(Dropout(0, name='dropout_3'))
    model.add(Dense(2, activation='softmax', name='base_dense_4',
                    kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None),
                    bias_initializer='zeros'))
    return model

In [None]:
def create_hospital(seed, data_size, categorical):

    x, y = load_hospital()
    y = (y + 1) / 2

    np.random.seed(seed)
    num_examples = len(y)
    assert x.shape[0] == num_examples
    num_train_examples_per_class = int(data_size / 2)
    num_test_examples_per_class = int(data_size / 2)

    pos_idx = np.where(y == 1)[0]
    neg_idx = np.where(y == 0)[0]
    np.random.shuffle(pos_idx)
    np.random.shuffle(neg_idx)
    assert len(pos_idx) + len(neg_idx) == num_examples

    train_idx = np.concatenate((pos_idx[:num_train_examples_per_class], neg_idx[:num_train_examples_per_class]))
    test_idx = np.concatenate((pos_idx[num_train_examples_per_class:num_train_examples_per_class +
                               num_test_examples_per_class],
                               neg_idx[num_train_examples_per_class:num_train_examples_per_class +
                               num_test_examples_per_class]))
    np.random.shuffle(train_idx)
    np.random.shuffle(test_idx)

    x_train = np.array(x.iloc[train_idx, :], dtype=np.float32)
    y_train = y[train_idx]

    x_test = np.array(x.iloc[test_idx, :], dtype=np.float32)
    y_test = y[test_idx]

    y_train = make_categorical(y_train, categorical, 2)
    y_test = make_categorical(y_test, categorical, 2)

    return x_train, y_train, x_test, y_test

In [None]:
def evaluate_model(model,X,y):
    prediction = model.predict(X)
    loss = keras.losses.categorical_crossentropy(tf.convert_to_tensor(prediction),tf.convert_to_tensor(y))
    with sess.as_default():
        loss = loss.eval()
    try:
    # noinspection PyUnresolvedReferences
        model_wo_softmax = iutils.keras.graph.model_wo_softmax(model)
    except Exception:
        model_wo_softmax = model
    analyzer = innvestigate.create_analyzer("gradient", model_wo_softmax)
    analysis = analyzer.analyze(X)
    prediction_var = np.var(prediction,axis=1)
    analysis_var = np.var(analysis,axis=1)
    analysis_1 = np.linalg.norm(analysis,axis=1,ord=1)
    analysis_2 = np.linalg.norm(analysis,axis=1,ord=2)
    return loss, prediction_var, analysis_var, analysis_1, analysis_2

In [None]:
lr = 0.001
decay = 1e-6
epochs = 1000
points_per_model = 10000
verbose = 0
explanation = "gradient"
experiment_name = "hospital"

In [None]:
for random_state in range(1):
    print(random_state)
    print("######")
    path="ThresholdExperiments/{}/{}/".format(experiment_name,random_state)
    os.makedirs(path)
    np.random.seed(random_state)
    main_indicies = np.random.choice(len(X_temp),80000,replace=False)
    X,y = X_temp[main_indicies], y_temp[main_indicies]
    start = time.time()
    for model_number in range(4):
        print(model_number, end=" ")
        x_train, y_train, x_test , y_test =  create_hospital(random_state, 10000,categorical=True)
        optimizer = optimizers.Adagrad(lr=lr, decay=decay)
        model = create_hospital_base()
        model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
        model.fit(x_train, y_train, epochs=epochs, 
              validation_data=(x_test, y_test), verbose=verbose)
        train_loss, train_prediction_var, train_analysis_var, train_analysis_1, train_analysis_2 = evaluate_model(model,x_train,y_train)
        df = pd.DataFrame()
        df['train_loss'] = train_loss
        df['train_prediction_var'] = train_prediction_var
        df['train_analysis_var'] = train_analysis_var
        df['train_analysis_1'] = train_analysis_1
        df['train_analysis_2'] = train_analysis_2
        test_loss, test_prediction_var, test_analysis_var, test_analysis_1, test_analysis_2 = evaluate_model(model,x_test,y_test)
        df['test_loss'] = test_loss
        df['test_prediction_var'] = test_prediction_var
        df['test_analysis_var'] = test_analysis_var
        df['test_analysis_1'] = test_analysis_1
        df['test_analysis_2'] = test_analysis_2
        df.to_csv("{}/{}.csv".format(path,model_number))
        print(time.time() - start)