In [1]:
import pandas as pd 
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
import os, sys, time
sys.path.append("..")
from all_funcs import util
from model import Generator, Discriminator, train_discriminator, train_generator
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import layers
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

tf.keras.backend.set_floatx('float64')
pd.set_option('display.max_rows',500)
pd.set_option('display.max_columns',500)

In [2]:
from numba import cuda
os.environ['CUDA_VISIBLE_DEVICES']="0"
print(tf.test.is_gpu_available())

True


In [3]:
df=pd.read_csv("../dataset/df_noOutliner_ana.csv",index_col=0)
df, imp_mode, imp_mean=util.FeatureArrange(df)

In [4]:
## reduce redundant features which can be assembled
dataset=df.drop(['NIHTotal','THD_ID','cortical_CT', 'subcortical_CT',
              'circulation_CT', 'CT_find', 'watershed_CT', 'Hemorrhagic_infarct_CT',
              'CT_left', 'CT_right',],axis=1)

In [5]:
## prepare for inverse tensor values from range(0,1) to original values
params=dict()
params['max']=dataset.max().to_numpy()
params['min']=dataset.min().to_numpy()

In [6]:
sc = MinMaxScaler()
dataset.loc[:,dataset.columns] = sc.fit_transform(dataset.loc[:,dataset.columns])
dataset.shape[1]

63

In [7]:
## setting hyperparameter
latent_dim = dataset.shape[1]-1 
epochs = 15000
batch_size= 128
buffer_size = 6000
# save_interval = 50
n_critic = 5
checkpoint_dir = './training_checkpoints'


In [8]:
generator = Generator(latent_dim)
discriminator = Discriminator()

In [9]:
## create Cross Entropy
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

In [10]:
gen_opt = tf.keras.optimizers.Adam(0.0001,)
disc_opt = tf.keras.optimizers.Adam(0.00001,)

In [11]:
# save checkpoints
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
# checkpoint = tf.train.Checkpoint(generator_optimizer=gen_opt,
#                                  discriminator_optimizer=disc_opt,
#                                  generator=generator,
#                                  discriminator=discriminator)

In [12]:
X_train, X_test = train_test_split(dataset, test_size=0.2,shuffle=True,
                                   stratify=dataset['elapsed_class'],
                                   random_state=None)

In [None]:
# separate to 0,1 dataset
data_1=X_train.loc[X_train['elapsed_class']==1]
data_0=X_train.loc[X_train['elapsed_class']==0]
## store losses
### generator losses
losses_gen = np.array([])
best_loss_gen = np.inf
### discriminator losses
losses_dis = np.array([])
best_loss_dis = np.inf

for epoch in range(epochs):
    start = time.time()
    disc_loss = 0
    gen_loss = 0

    # resample the dataset
    data1_shape_0 = data_1.sample(data_0.shape[0])
    df_same_shape = pd.concat([data1_shape_0, data_0])

    # slices to data and labels
    df_training = df_same_shape.iloc[:, :-1].to_numpy()
    training_labels = df_same_shape.iloc[:, -1].to_numpy().reshape(-1, 1)

    # create batch dataset
    training_dataset = tf.data.Dataset.from_tensor_slices((df_training, training_labels))\
        .shuffle(buffer_size).batch(batch_size, drop_remainder=True)

    for data, label in training_dataset:
        for _ in range(n_critic): # 5*discriminator times, 1*generator of times
            disc_loss += train_discriminator(data, label, generator,
                                         discriminator, disc_opt, latent_dim)

#         if disc_opt.iterations.numpy() % n_critic == 0:
        gen_loss+= train_generator(data, label, generator,
                                        discriminator, gen_opt, params, batch_size, latent_dim)
    
    losses_gen= np.append(losses_gen, gen_loss / batch_size)
    losses_dis= np.append(losses_dis, disc_loss / (batch_size*n_critic))
    print('Time for epoch {} is {} sec - gen_loss = {}, disc_loss = {}'.format(epoch + 1, time.time() - start,
                                                                               gen_loss / batch_size,
                                                                               disc_loss / (batch_size*n_critic)))
    # save best discriminator or generator
    if abs(best_loss_gen) > abs(gen_loss / batch_size):
        best_loss_gen = (gen_loss / batch_size)
        generator.save_weights(checkpoint_prefix+"gen", save_format='tf')
        
    if abs(best_loss_dis) > abs(disc_loss / (batch_size*n_critic)):
        best_loss_dis = (disc_loss / (batch_size*n_critic))
        discriminator.save_weights(checkpoint_prefix+"dis", save_format='tf')
        



To change all layers to have dtype float32 by default, call `tf.keras.backend.set_floatx('float32')`. To change just this layer, pass dtype='float32' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Time for epoch 1 is 23.614110469818115 sec - gen_loss = 79.8617259788368, disc_loss = 266.4508589247705
Time for epoch 2 is 1.6561033725738525 sec - gen_loss = 80.65007720600656, disc_loss = 189.50135236592843
Time for epoch 3 is 1.8201136589050293 sec - gen_loss = 76.19377535319634, disc_loss = 136.94581991924247
Time for epoch 4 is 1.9521222114562988 sec - gen_loss = 76.34106080283873, disc_loss = 98.9377295331921
Time for epoch 5 is 2.0961310863494873 sec - gen_loss = 76.75109614752908, disc_loss = 72.52977266306999
Time for epoch 6 is 2.360147476196289 sec - gen_loss = 70.43104053063168, disc_loss = 53.81337803110497
Time for epoch 7 is 2.8601787090301514 sec - gen_loss = 70.60724799826

Time for epoch 71 is 0.7960500717163086 sec - gen_loss = 30.5944305475934, disc_loss = -0.008273305549353627
Time for epoch 72 is 0.8280515670776367 sec - gen_loss = 30.571309184864468, disc_loss = -0.009932280448531098
Time for epoch 73 is 0.7680480480194092 sec - gen_loss = 30.591769412288368, disc_loss = -0.00978338990239081
Time for epoch 74 is 0.8600537776947021 sec - gen_loss = 30.40931252120938, disc_loss = -0.011142124492327685
Time for epoch 75 is 0.852053165435791 sec - gen_loss = 30.726754263442324, disc_loss = -0.011481748112331075
Time for epoch 76 is 0.8280518054962158 sec - gen_loss = 30.771282392766903, disc_loss = -0.01280510030746931
Time for epoch 77 is 0.8080503940582275 sec - gen_loss = 30.808356088722736, disc_loss = -0.011849472736296668
Time for epoch 78 is 0.836052656173706 sec - gen_loss = 30.266480901256838, disc_loss = -0.013456954727739973
Time for epoch 79 is 0.8640537261962891 sec - gen_loss = 30.61852186625696, disc_loss = -0.014462276672782295
Time for 

Time for epoch 145 is -9.225577354431152 sec - gen_loss = 27.825704420062383, disc_loss = -0.002065479585863564
Time for epoch 146 is 1.596099853515625 sec - gen_loss = 27.759782265667035, disc_loss = -0.0019611479599483876
Time for epoch 147 is 1.5840990543365479 sec - gen_loss = 27.84874989707889, disc_loss = -0.002380747437626397
Time for epoch 148 is 1.8281142711639404 sec - gen_loss = 27.470162772632566, disc_loss = -0.0021606967968925933
Time for epoch 149 is 2.0401275157928467 sec - gen_loss = 27.35401138825831, disc_loss = -0.0026272559238942625
Time for epoch 150 is 1.980123519897461 sec - gen_loss = 27.55864346061911, disc_loss = -0.0021862806961601444
Time for epoch 151 is 2.340146541595459 sec - gen_loss = 27.285077474856934, disc_loss = -0.001965970795318707
Time for epoch 152 is 2.716169834136963 sec - gen_loss = 27.823293272857562, disc_loss = -0.0019197301602099075
Time for epoch 153 is 3.492218017578125 sec - gen_loss = 27.445586018460208, disc_loss = -0.00177527145902

Time for epoch 219 is 0.844052791595459 sec - gen_loss = 24.625784409121536, disc_loss = -0.0029051542485269027
Time for epoch 220 is 0.8920557498931885 sec - gen_loss = 24.665392106702697, disc_loss = -0.002896886903004422
Time for epoch 221 is 0.8680543899536133 sec - gen_loss = 25.049780683947525, disc_loss = -0.002990089026724583
Time for epoch 222 is 0.8320519924163818 sec - gen_loss = 24.54395923274279, disc_loss = -0.0029756750520801485
Time for epoch 223 is 0.9280576705932617 sec - gen_loss = 24.48098262826428, disc_loss = -0.0029990992484426595
Time for epoch 224 is 0.8200514316558838 sec - gen_loss = 24.9043443529044, disc_loss = -0.003104384048850426
Time for epoch 225 is 0.9040563106536865 sec - gen_loss = 24.318774273323967, disc_loss = -0.002893414345239


## Show the training results

In [None]:
plt.title("cADS-GAN training Loss")
plt.ylabel("Loss")
plt.xlabel("Epochs")
plt.grid()
plt.plot(losses_gen, label='Generator')
plt.plot(losses_dis, label='Discriminator')
plt.legend(loc='best')
plt.savefig("./cADS-GAN_LOSS.png",dpi=300)
plt.show()

In [None]:
## create matrix 0 row*latent_dim columns
arr=np.empty((0,latent_dim))
noise = tf.random.normal([128, latent_dim])

## slice the label and testing dataset
X_test_data = X_test.iloc[:, :-1].to_numpy()
X_test_labels = X_test.iloc[:, -1].to_numpy().reshape(-1, 1)

## batch testing data
testing_dataset = tf.data.Dataset.from_tensor_slices((X_test_data, X_test_labels))\
        .shuffle(buffer_size).batch(batch_size, drop_remainder=True)

## generate dataset
for data, label in testing_dataset:
    gen_=generator(noise, data).numpy()
    arr=np.append(arr,gen_,axis=0)
arr.shape

In [None]:
output_dataset = pd.DataFrame(np.round(sc.inverse_transform(arr)), columns=[
    'BT_NM', 'HR_NM', 'RR_NM', 'HB_NM', 'HCT_NM', 'PLATELET_NM', 'WBC_NM',
    'PTT1_NM', 'PTT2_NM', 'PTINR_NM', 'ER_NM', 'BUN_NM', 'CRE_NM', 'BMI',
    'age', 'PPD', 'THDA_FL', 'THDH_FL', 'THDI_FL', 'THDAM_FL', 'THDV_FL',
    'THDE_FL', 'THDM_FL', 'THDR_FL', 'THDP_FL', 'THDOO_FL', 'Gender',
    'cortical_ACA_ctr', 'cortical_MCA_ctr', 'subcortical_ACA_ctr',
    'subcortical_MCA_ctr', 'PCA_cortex_ctr', 'thalamus_ctr',
    'brainstem_ctr', 'cerebellum_ctr', 'Watershed_ctr',
    'Hemorrhagic_infarct_ctr', 'cortical_ACA_ctl', 'cortical_MCA_ctl',
    'subcortical_ACA_ctl', 'subcortical_MCA_ctl', 'PCA_cortex_ctl',
    'thalamus_ctl', 'brainstem_ctl', 'cerebellum_ctl', 'Watershed_ctl',
    'Hemorrhagic_infarct_ctl', 'NIHS_1a_in', 'NIHS_1b_in', 'NIHS_1c_in',
    'NIHS_2_in', 'NIHS_3_in', 'NIHS_4_in', 'NIHS_5aL_in', 'NIHS_5bR_in',
    'NIHS_6aL_in', 'NIHS_6bR_in', 'NIHS_7_in', 'NIHS_8_in', 'NIHS_9_in',
    'NIHS_10_in', 'NIHS_11_in','elapsed_class'
])
output_dataset

In [None]:
output_dataset.to_csv("../dataset/output_dataset/cADS-GAN_.csv",encoding='utf_8_sig')

In [None]:
X_test_dataset = pd.DataFrame(np.round(sc.inverse_transform(X_test)), columns=[
    'BT_NM', 'HR_NM', 'RR_NM', 'HB_NM', 'HCT_NM', 'PLATELET_NM', 'WBC_NM',
    'PTT1_NM', 'PTT2_NM', 'PTINR_NM', 'ER_NM', 'BUN_NM', 'CRE_NM', 'BMI',
    'age', 'PPD', 'THDA_FL', 'THDH_FL', 'THDI_FL', 'THDAM_FL', 'THDV_FL',
    'THDE_FL', 'THDM_FL', 'THDR_FL', 'THDP_FL', 'THDOO_FL', 'Gender',
    'cortical_ACA_ctr', 'cortical_MCA_ctr', 'subcortical_ACA_ctr',
    'subcortical_MCA_ctr', 'PCA_cortex_ctr', 'thalamus_ctr',
    'brainstem_ctr', 'cerebellum_ctr', 'Watershed_ctr',
    'Hemorrhagic_infarct_ctr', 'cortical_ACA_ctl', 'cortical_MCA_ctl',
    'subcortical_ACA_ctl', 'subcortical_MCA_ctl', 'PCA_cortex_ctl',
    'thalamus_ctl', 'brainstem_ctl', 'cerebellum_ctl', 'Watershed_ctl',
    'Hemorrhagic_infarct_ctl', 'NIHS_1a_in', 'NIHS_1b_in', 'NIHS_1c_in',
    'NIHS_2_in', 'NIHS_3_in', 'NIHS_4_in', 'NIHS_5aL_in', 'NIHS_5bR_in',
    'NIHS_6aL_in', 'NIHS_6bR_in', 'NIHS_7_in', 'NIHS_8_in', 'NIHS_9_in',
    'NIHS_10_in', 'NIHS_11_in','elapsed_class'
])
X_test_dataset

In [None]:
X_test_dataset.to_csv("../dataset/output_dataset/cADS-GAN_xtest_.csv",encoding='utf_8_sig')