## Import Library

In [15]:
import os
#os.environ["KERAS_BACKEND"] = "plaidml.keras.backend" #for Mac
os.environ["CUDA_VISIBLE_DEVICES"] = '1'
#os.environ["CUDA_VISIBLE_DEVICES"]='1'
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import backend as K
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
pd.options.display.max_columns = None

## Load Data and Preprocessing

In [2]:
import datatable
df_amc = datatable.fread('./1205_asan_raw_v2.csv', na_strings=['']).to_pandas()
df_brmh = datatable.fread('./brmh_1209.csv', na_strings=['']).to_pandas()
df_eumc = datatable.fread('./1205_eumc_raw2.csv', na_strings=['', 'NA']).to_pandas()
df_snuh = datatable.fread("./snuh_2004_2019_v2.csv", na_strings=['']).to_pandas()

## snuh
df_snuh.loc[df_snuh['sex'] == 'M', 'sex'] = 0
df_snuh.loc[df_snuh['sex'] == 'F', 'sex'] = 1

## eumc
df_eumc.loc[df_eumc['sex'] == 2, 'sex'] = 0
df_eumc['death30'] = df_eumc['death_inhosp'].astype(int)

## brmh
df_brmh['sex'] = df_brmh['sex'].astype(int)

In [16]:
print(df_amc.shape)
print(df_snuh.shape)
print(df_brmh.shape)
print(df_eumc.shape)

(30680, 40)
(225634, 27)
(39185, 32)
(33603, 26)


### Variable Selection

In [3]:
var_list = ['age','bmi','sex', 'preop_alb','preop_bun','preop_cr','preop_glu','preop_gpt','preop_got','preop_hb','preop_k','preop_na','preop_plt','preop_wbc']

## Model

In [4]:
def my_model(hidden_layer_num, node_size, batch_normalization, drop_out, drop_out_rate, learning_rate):
    
    ### clear memory
    if K.backend() == 'tensorflow':
        K.clear_session()
    
    model = keras.Sequential()

    ### Input Layer
    model.add(keras.layers.InputLayer(input_shape=(14,)))
    
    ### Hidden Layer
    for i in range(hidden_layer_num):
        model.add(keras.layers.Dense(node_size))
        if batch_normalization: model.add(keras.layers.BatchNormalization())
        model.add(keras.layers.ReLU())
        if drop_out: model.add(keras.layers.Dropout(drop_out_rate))    
    
    ### Output Layer
    model.add(keras.layers.Dense(1, activation='sigmoid'))
    
    ### Compile
    model.compile(optimizer= keras.optimizers.Adam(learning_rate), 
                  loss='binary_crossentropy', 
                  metrics=["accuracy"])
    
    return model

### Display results

In [5]:
#from sklearn.metrics import plot_roc_curve
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score

def plot_result(X_val, y_val, model, options):
    if options == True:
        tmp_list = []
        y_test_proba = model.predict((X_val))

        tmp_list.append(roc_auc_score(y_val, y_test_proba))
        print("AUROC: {}".format(tmp_list[0]))

        fpr, tpr, _ = roc_curve(y_val, y_test_proba)
        #plot_roc_curve(estimator=model, X=fpr, y=tpr)
        plt.figure(figsize=(10,10))
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC Curve')
        plt.plot(fpr, tpr)
        plt.show()

        tmp_list.append(average_precision_score(y_val, y_test_proba))
        print("AUPRC: {}".format(tmp_list[1]))

        prec, recall, _ = precision_recall_curve(y_val, y_test_proba)

        plt.figure(figsize=(10,10))
        plt.plot(recall, prec)
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.title('Precision-Recall Curve')
        plt.show()
    else:
        tmp_list = []
        y_test_proba = model.predict((X_val))

        tmp_list.append(roc_auc_score(y_val, y_test_proba))
        print("AUROC: {}".format(tmp_list[0]))

        tmp_list.append(average_precision_score(y_val, y_test_proba))
        print("AUPRC: {}".format(tmp_list[1]))
        
    return tmp_list

## Transfer Learning - Optimization Test

In [6]:
snuh_set = {'data':df_snuh, 'build_epoch':15, 'build_batch':2**8}
amc_set = {'data':df_amc, 'build_epoch':3, 'build_batch':2**5}
eumc_set = {'data':df_eumc, 'build_epoch':15, 'build_batch':2**5}
brmh_set = {'data':df_brmh, 'build_epoch':10, 'build_batch':2**5}

In [7]:
def transfer_test(build_set, val_set_1, val_set_2, val_set_3, variable):
    
    build_data = build_set['data']
    build_epoch = build_set['build_epoch']
    build_batch = build_set['build_batch']
    
    X_data = build_data[variable]
    y_data = build_data['death30']
    
    ## Missing Imputation
    X_data.fillna(X_data.median(), inplace=True)
    
    ## Train Test Split
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, stratify=y_data, random_state=1004)
    
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train = pd.DataFrame(scaler.transform(X_train))
    X_test = pd.DataFrame(scaler.transform(X_test))
    
    #transfer_epoch = [1, 2, 3, 4, 5, 6]
    tune_size = [0.2, 0.4, 0.6, 0.8]
    
    result = []
    
    for val_set in tqdm([val_set_1, val_set_2, val_set_3], desc='data_set'):
        
        val_data = val_set['data']
        
        X_val_set = val_data[variable]
        X_val_set.fillna(X_val_set.median(), inplace=True)
        y_val_set = val_data['death30']
        
        for size in tqdm(tune_size, desc='train_size'):        
            X_tune, X_val, y_tune, y_val = train_test_split(X_val_set, y_val_set, train_size=size, stratify=y_val_set, random_state=1004)
            X_tune = scaler.transform(X_tune)
            X_val = scaler.transform(X_val)
            model = my_model(hidden_layer_num=5, node_size=1000, drop_out=True, drop_out_rate=0.4, batch_normalization=True, learning_rate=0.001)
            for val_batch in [2**5, 2**6, 2**7, 2**8, 2**9, 2**10]:
                ## Build Initial Model
                model.fit(X_train, y_train, batch_size=build_batch, epochs=build_epoch, verbose=0)
                print('\nInternal Validation:')
                result.append(plot_result(X_val=X_test, y_val=y_test, model=model, options=False))

                for epoch_num in range(10):
                    ## Transfer Learning
                    model.fit(X_tune, y_tune, batch_size=val_batch, epochs=1, verbose=0)
                    print('External Validation: train_size = {0}, epoch = {1}, batch_size = {2}'.format(size, epoch_num+1, val_batch))
                    result.append(plot_result(X_val=X_val, y_val=y_val, model=model, options=False))

    return result

In [8]:
result_snuh = transfer_test(build_set=snuh_set, 
                            val_set_1=amc_set, val_set_2=eumc_set, val_set_3=brmh_set, 
                            variable=var_list)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


HBox(children=(FloatProgress(value=0.0, description='data_set', max=3.0, style=ProgressStyle(description_width…

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


HBox(children=(FloatProgress(value=0.0, description='train_size', max=4.0, style=ProgressStyle(description_wid…


Internal Validation:
AUROC: 0.930931307088488
AUPRC: 0.16097492320304022
External Validation: train_size = 0.2, epoch = 1, batch_size = 32
AUROC: 0.9266938651978656
AUPRC: 0.12382356067839169
External Validation: train_size = 0.2, epoch = 2, batch_size = 32
AUROC: 0.8418149629418339
AUPRC: 0.03042418233812497
External Validation: train_size = 0.2, epoch = 3, batch_size = 32
AUROC: 0.6629883030245811
AUPRC: 0.015481288600055636
External Validation: train_size = 0.2, epoch = 4, batch_size = 32
AUROC: 0.8381345905455977
AUPRC: 0.03875915025452585
External Validation: train_size = 0.2, epoch = 5, batch_size = 32
AUROC: 0.8467220145393624
AUPRC: 0.05890968361463278
External Validation: train_size = 0.2, epoch = 6, batch_size = 32
AUROC: 0.8446302318838833
AUPRC: 0.03553625447336624
External Validation: train_size = 0.2, epoch = 7, batch_size = 32
AUROC: 0.8612205324135918
AUPRC: 0.07033047976941943
External Validation: train_size = 0.2, epoch = 8, batch_size = 32
AUROC: 0.8270583201592541


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


HBox(children=(FloatProgress(value=0.0, description='train_size', max=4.0, style=ProgressStyle(description_wid…


Internal Validation:
AUROC: 0.9334268396443821
AUPRC: 0.1571790693346748
External Validation: train_size = 0.2, epoch = 1, batch_size = 32
AUROC: 0.901260177655562
AUPRC: 0.23055600914334798
External Validation: train_size = 0.2, epoch = 2, batch_size = 32
AUROC: 0.9065152059286419
AUPRC: 0.21313587095004208
External Validation: train_size = 0.2, epoch = 3, batch_size = 32
AUROC: 0.899635631999117
AUPRC: 0.2170659022063235
External Validation: train_size = 0.2, epoch = 4, batch_size = 32
AUROC: 0.8986062354078834
AUPRC: 0.24098652058639247
External Validation: train_size = 0.2, epoch = 5, batch_size = 32
AUROC: 0.8693224218894346
AUPRC: 0.22697377033709534
External Validation: train_size = 0.2, epoch = 6, batch_size = 32
AUROC: 0.8229217240006708
AUPRC: 0.16717505083181547
External Validation: train_size = 0.2, epoch = 7, batch_size = 32
AUROC: 0.8777051654593052
AUPRC: 0.20699421904048085
External Validation: train_size = 0.2, epoch = 8, batch_size = 32
AUROC: 0.8638756315085472
AUPR

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


HBox(children=(FloatProgress(value=0.0, description='train_size', max=4.0, style=ProgressStyle(description_wid…


Internal Validation:
AUROC: 0.9323966215872347
AUPRC: 0.1694295511774942
External Validation: train_size = 0.2, epoch = 1, batch_size = 32
AUROC: 0.7456634287911523
AUPRC: 0.016036728673526745
External Validation: train_size = 0.2, epoch = 2, batch_size = 32
AUROC: 0.9054894349434212
AUPRC: 0.10826090016239077
External Validation: train_size = 0.2, epoch = 3, batch_size = 32
AUROC: 0.887794109568975
AUPRC: 0.19053096775207454
External Validation: train_size = 0.2, epoch = 4, batch_size = 32
AUROC: 0.8723249761965796
AUPRC: 0.17497320065511116
External Validation: train_size = 0.2, epoch = 5, batch_size = 32
AUROC: 0.8641909583623246
AUPRC: 0.18925933715528465
External Validation: train_size = 0.2, epoch = 6, batch_size = 32
AUROC: 0.8535869923462849
AUPRC: 0.1810798795116024
External Validation: train_size = 0.2, epoch = 7, batch_size = 32
AUROC: 0.8568498919690921
AUPRC: 0.16767696896543977
External Validation: train_size = 0.2, epoch = 8, batch_size = 32
AUROC: 0.8298787856593548
AU

In [None]:
result_amc = transfer_test(build_set=amc_set, 
                            val_set_1=snuh_set, val_set_2=eumc_set, val_set_3=brmh_set, 
                            variable=var_list)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


HBox(children=(FloatProgress(value=0.0, description='data_set', max=3.0, style=ProgressStyle(description_width…

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


HBox(children=(FloatProgress(value=0.0, description='train_size', max=4.0, style=ProgressStyle(description_wid…


Internal Validation:
AUROC: 0.7952077547241737
AUPRC: 0.044411342241986285
External Validation: train_size = 0.2, epoch = 1, batch_size = 32
AUROC: 0.8600806608411615
AUPRC: 0.01915425984510568
External Validation: train_size = 0.2, epoch = 2, batch_size = 32
AUROC: 0.9172383229112465
AUPRC: 0.10198318910471153
External Validation: train_size = 0.2, epoch = 3, batch_size = 32
AUROC: 0.9104809097120763
AUPRC: 0.08330397756180911
External Validation: train_size = 0.2, epoch = 4, batch_size = 32
AUROC: 0.9149737165447142
AUPRC: 0.08401820424305481
External Validation: train_size = 0.2, epoch = 5, batch_size = 32
AUROC: 0.9244397332246543
AUPRC: 0.1137943436926821
External Validation: train_size = 0.2, epoch = 6, batch_size = 32
AUROC: 0.9271119981402016
AUPRC: 0.09665414913674858
External Validation: train_size = 0.2, epoch = 7, batch_size = 32
AUROC: 0.9299187819600234
AUPRC: 0.09947015252724442
External Validation: train_size = 0.2, epoch = 8, batch_size = 32
AUROC: 0.9177297632881396


In [None]:
result_eumc = transfer_test(build_set=eumc_set, 
                            val_set_1=amc_set, val_set_2=snuh_set, val_set_3=brmh_set, 
                            variable=var_list)

In [None]:
result_brmh = transfer_test(build_set=brmh_set, 
                            val_set_1=amc_set, val_set_2=eumc_set, val_set_3=snuh_set, 
                            variable=var_list)

In [14]:
np.savez('./transfer_result' , snuh = np.array(result_snuh), amc = np.array(result_amc), eumc = np.array(result_eumc), brmh = np.array(result_brmh))