In [1]:
import pandas as pd 
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
import os, sys, time
sys.path.append("..")
from all_funcs import util
from model import Generator, Discriminator, train_discriminator, train_generator
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import layers
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

tf.keras.backend.set_floatx('float64')
pd.set_option('display.max_rows',500)
pd.set_option('display.max_columns',500)

In [2]:
from numba import cuda
os.environ['CUDA_VISIBLE_DEVICES']="1"
print(tf.test.is_gpu_available())

True


In [3]:
df=pd.read_csv("../dataset/df_noOutliner_ana.csv",index_col=0)
df, imp_mode, imp_mean=util.FeatureArrange(df)

In [4]:
## reduce redundant features which can be assembled
dataset=df.drop(['NIHTotal','THD_ID','cortical_CT', 'subcortical_CT',
              'circulation_CT', 'CT_find', 'watershed_CT', 'Hemorrhagic_infarct_CT',
              'CT_left', 'CT_right',],axis=1)

In [5]:
sc = MinMaxScaler()
dataset.loc[:,dataset.columns] = sc.fit_transform(dataset.loc[:,dataset.columns])


In [6]:
## setting hyperparameter
latent_dim = dataset.shape[1]
epochs = 15000
batch_size= 128
buffer_size = 6000
# save_interval = 50
n_critic = 5
checkpoint_dir = './training_checkpoints'


In [7]:
generator = Generator()
discriminator = Discriminator()

In [8]:
## create Cross Entropy
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

In [9]:
gen_opt = tf.keras.optimizers.Adam(0.0001,)
disc_opt = tf.keras.optimizers.Adam(0.00001,)

In [10]:
# save checkpoints
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(generator_optimizer=gen_opt,
                                 discriminator_optimizer=disc_opt,
                                 generator=generator,
                                 discriminator=discriminator)

In [11]:
X_train, X_test = train_test_split(dataset, test_size=0.2,shuffle=True,
                                   stratify=dataset['elapsed_class'],
                                   random_state=None)

In [None]:
# separate to 0,1 dataset
data_1=X_train.loc[X_train['elapsed_class']==1]
data_0=X_train.loc[X_train['elapsed_class']==0]

for epoch in range(epochs):
    start = time.time()
    disc_loss = 0
    gen_loss = 0

    # resample the dataset
    data1_shape_0 = data_1.sample(data_0.shape[0])
    df_same_shape = pd.concat([data1_shape_0, data_0]).to_numpy()

#     # slices to data and labels
#     df_same_ = df_same_shape.iloc[:, :-1].to_numpy()
#     org_label = df_same_shape.iloc[:, -1].to_numpy().reshape(-1, 1)

    # create batch dataset
    training_dataset = tf.data.Dataset.from_tensor_slices(df_same_shape)\
        .shuffle(buffer_size).batch(batch_size, drop_remainder=True)

    for data in training_dataset:

        disc_loss += train_discriminator(data, generator,
                                         discriminator, disc_opt, latent_dim)

        if disc_opt.iterations.numpy() % n_critic == 0:
            gen_loss+= train_generator(data, generator,
                                        discriminator, gen_opt, batch_size, latent_dim)
    print('Time for epoch {} is {} sec - gen_loss = {}, disc_loss = {}'.format(epoch + 1, time.time() - start,
                                                                               gen_loss / batch_size,
                                                                               disc_loss / (batch_size*n_critic)))



To change all layers to have dtype float32 by default, call `tf.keras.backend.set_floatx('float32')`. To change just this layer, pass dtype='float32' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Time for epoch 1 is 1.1760735511779785 sec - gen_loss = 12.795087008656301, disc_loss = 19.20398057355596
Time for epoch 2 is 0.012000799179077148 sec - gen_loss = 16.794644976006722, disc_loss = 7.808156999994141
Time for epoch 3 is 0.028001785278320312 sec - gen_loss = 16.934784935025633, disc_loss = 3.3722251809084915
Time for epoch 4 is 0.012000799179077148 sec - gen_loss = 16.857927715284305, disc_loss = 1.6323829913681256
Time for epoch 5 is 0.008000373840332031 sec - gen_loss = 15.845829755941852, disc_loss = 0.8492072985687944
Time for epoch 6 is 0.012000799179077148 sec - gen_loss = 12.849808108766398, disc_loss = 0.48945861703641347
Time for epoch 7 is 0.008000612258911133 sec - g

Time for epoch 73 is 0.012000799179077148 sec - gen_loss = 8.011470051890614, disc_loss = -0.001793271812827351
Time for epoch 74 is 0.012000799179077148 sec - gen_loss = 7.538464063040637, disc_loss = -0.0018648952612608065
Time for epoch 75 is 0.012000799179077148 sec - gen_loss = 7.651401727357525, disc_loss = -0.0019177239411713285
Time for epoch 76 is 0.012000799179077148 sec - gen_loss = 5.965423494153471, disc_loss = -0.0016266143379238041
Time for epoch 77 is 0.008000373840332031 sec - gen_loss = 7.541047995028052, disc_loss = -0.0017823417009251354
Time for epoch 78 is 0.012000799179077148 sec - gen_loss = 7.224423386847784, disc_loss = -0.0019757287369547715
Time for epoch 79 is 0.012000799179077148 sec - gen_loss = 7.992927003312695, disc_loss = -0.0018855128435650108
Time for epoch 80 is 0.008000373840332031 sec - gen_loss = 7.717462977156025, disc_loss = -0.001676972228607761
Time for epoch 81 is 0.008000612258911133 sec - gen_loss = 5.306684803206478, disc_loss = -0.00189

Time for epoch 146 is 0.00800013542175293 sec - gen_loss = 5.0562042661809175, disc_loss = -0.0013045960859679965
Time for epoch 147 is 0.012000799179077148 sec - gen_loss = 6.876156576275736, disc_loss = -0.0013671132576620552
Time for epoch 148 is 0.012000799179077148 sec - gen_loss = 6.447655498486049, disc_loss = -0.0012832221277924627
Time for epoch 149 is 0.008000373840332031 sec - gen_loss = 6.311893543366526, disc_loss = -0.0013892822815775652
Time for epoch 150 is 0.008000612258911133 sec - gen_loss = 6.433909603295682, disc_loss = -0.0012693774571107001
Time for epoch 151 is 0.012000799179077148 sec - gen_loss = 5.321931166115407, disc_loss = -0.0013998097745946364
Time for epoch 152 is 0.008000373840332031 sec - gen_loss = 6.927058322604109, disc_loss = -0.0013517741178829137
Time for epoch 153 is 0.008000612258911133 sec - gen_loss = 6.18301652135553, disc_loss = -0.0013240176623692747
Time for epoch 154 is 0.016000986099243164 sec - gen_loss = 6.367452459982686, disc_loss 

In [None]:
arr=np.array([])
noise = tf.random.normal([128, latent_dim])
count=0
## batch testing data
testing_dataset = tf.data.Dataset.from_tensor_slices(X_test.to_numpy())\
        .shuffle(buffer_size).batch(batch_size, drop_remainder=True)

for data in testing_dataset:
    gen_=generator(noise, data).numpy()
    break
#     arr=np.append(arr,gen_,)
#     if count==2:
#         break
#     count+=1

In [None]:
# gen_=arr.reshape(-1,62)

In [None]:
output_dataset = pd.DataFrame(np.round(sc.inverse_transform(gen_)), columns=[
    'BT_NM', 'HR_NM', 'RR_NM', 'HB_NM', 'HCT_NM', 'PLATELET_NM', 'WBC_NM',
    'PTT1_NM', 'PTT2_NM', 'PTINR_NM', 'ER_NM', 'BUN_NM', 'CRE_NM', 'BMI',
    'age', 'PPD', 'THDA_FL', 'THDH_FL', 'THDI_FL', 'THDAM_FL', 'THDV_FL',
    'THDE_FL', 'THDM_FL', 'THDR_FL', 'THDP_FL', 'THDOO_FL', 'Gender',
    'cortical_ACA_ctr', 'cortical_MCA_ctr', 'subcortical_ACA_ctr',
    'subcortical_MCA_ctr', 'PCA_cortex_ctr', 'thalamus_ctr',
    'brainstem_ctr', 'cerebellum_ctr', 'Watershed_ctr',
    'Hemorrhagic_infarct_ctr', 'cortical_ACA_ctl', 'cortical_MCA_ctl',
    'subcortical_ACA_ctl', 'subcortical_MCA_ctl', 'PCA_cortex_ctl',
    'thalamus_ctl', 'brainstem_ctl', 'cerebellum_ctl', 'Watershed_ctl',
    'Hemorrhagic_infarct_ctl', 'NIHS_1a_in', 'NIHS_1b_in', 'NIHS_1c_in',
    'NIHS_2_in', 'NIHS_3_in', 'NIHS_4_in', 'NIHS_5aL_in', 'NIHS_5bR_in',
    'NIHS_6aL_in', 'NIHS_6bR_in', 'NIHS_7_in', 'NIHS_8_in', 'NIHS_9_in',
    'NIHS_10_in', 'NIHS_11_in','elapsed_class'
])
output_dataset

In [None]:
# output_dataset.to_csv("../dataset/output_dataset/cWGAN_1d_2_models.csv",encoding='utf_8_sig')

In [None]:
# output_dataset = pd.DataFrame(np.round(sc.inverse_transform(data)), columns=[
#     'BT_NM', 'HR_NM', 'RR_NM', 'HB_NM', 'HCT_NM', 'PLATELET_NM', 'WBC_NM',
#     'PTT1_NM', 'PTT2_NM', 'PTINR_NM', 'ER_NM', 'BUN_NM', 'CRE_NM', 'BMI',
#     'age', 'PPD', 'THDA_FL', 'THDH_FL', 'THDI_FL', 'THDAM_FL', 'THDV_FL',
#     'THDE_FL', 'THDM_FL', 'THDR_FL', 'THDP_FL', 'THDOO_FL', 'Gender',
#     'cortical_ACA_ctr', 'cortical_MCA_ctr', 'subcortical_ACA_ctr',
#     'subcortical_MCA_ctr', 'PCA_cortex_ctr', 'thalamus_ctr',
#     'brainstem_ctr', 'cerebellum_ctr', 'Watershed_ctr',
#     'Hemorrhagic_infarct_ctr', 'cortical_ACA_ctl', 'cortical_MCA_ctl',
#     'subcortical_ACA_ctl', 'subcortical_MCA_ctl', 'PCA_cortex_ctl',
#     'thalamus_ctl', 'brainstem_ctl', 'cerebellum_ctl', 'Watershed_ctl',
#     'Hemorrhagic_infarct_ctl', 'NIHS_1a_in', 'NIHS_1b_in', 'NIHS_1c_in',
#     'NIHS_2_in', 'NIHS_3_in', 'NIHS_4_in', 'NIHS_5aL_in', 'NIHS_5bR_in',
#     'NIHS_6aL_in', 'NIHS_6bR_in', 'NIHS_7_in', 'NIHS_8_in', 'NIHS_9_in',
#     'NIHS_10_in', 'NIHS_11_in','elapsed_class'
# ])
# output_dataset