In [1]:
import pandas as pd 
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
import os, sys, time
sys.path.append("..")
from all_funcs import util
from model import Generator, Discriminator, train_discriminator, train_generator
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import layers
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

tf.keras.backend.set_floatx('float64')
pd.set_option('display.max_rows',500)
pd.set_option('display.max_columns',500)

In [2]:
# from numba import cuda
# os.environ['CUDA_VISIBLE_DEVICES']="1"
# print(tf.test.is_gpu_available())

True


In [3]:
df=pd.read_csv("../dataset/df_noOutliner_ana.csv",index_col=0)
df, imp_mode, imp_mean=util.FeatureArrange(df)

In [4]:
## reduce redundant features which can be assembled
dataset=df.drop(['NIHTotal','THD_ID','cortical_CT', 'subcortical_CT',
              'circulation_CT', 'CT_find', 'watershed_CT', 'Hemorrhagic_infarct_CT',
              'CT_left', 'CT_right',],axis=1)

In [5]:
sc = MinMaxScaler()
dataset.loc[:,dataset.columns] = sc.fit_transform(dataset.loc[:,dataset.columns])
sc.get_params()

{'copy': True, 'feature_range': (0, 1)}

In [6]:
## setting hyperparameter
latent_dim = dataset.shape[1]
epochs = 15000
batch_size= 128
buffer_size = 6000
# save_interval = 50
n_critic = 5
checkpoint_dir = './training_checkpoints'


In [7]:
generator = Generator(latent_dim)
discriminator = Discriminator()

In [8]:
## create Cross Entropy
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

In [9]:
gen_opt = tf.keras.optimizers.Adam(0.0001,)
disc_opt = tf.keras.optimizers.Adam(0.00001,)

In [10]:
# save checkpoints
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(generator_optimizer=gen_opt,
                                 discriminator_optimizer=disc_opt,
                                 generator=generator,
                                 discriminator=discriminator)

In [11]:
X_train, X_test = train_test_split(dataset, test_size=0.2,shuffle=True,
                                   stratify=dataset['elapsed_class'],
                                   random_state=None)

In [12]:
# separate to 0,1 dataset
data_1=X_train.loc[X_train['elapsed_class']==1]
data_0=X_train.loc[X_train['elapsed_class']==0]
## store losses
### generator losses
losses_gen = np.array([])
best_loss_gen = np.inf
### discriminator losses
losses_dis = np.array([])
best_loss_dis = np.inf

for epoch in range(epochs):
    start = time.time()
    disc_loss = 0
    gen_loss = 0

    # resample the dataset
    data1_shape_0 = data_1.sample(data_0.shape[0])
    df_same_shape = pd.concat([data1_shape_0, data_0]).to_numpy()

#     # slices to data and labels
#     df_same_ = df_same_shape.iloc[:, :-1].to_numpy()
#     org_label = df_same_shape.iloc[:, -1].to_numpy().reshape(-1, 1)

    # create batch dataset
    training_dataset = tf.data.Dataset.from_tensor_slices(df_same_shape)\
        .shuffle(buffer_size).batch(batch_size, drop_remainder=True)

    for data in training_dataset:
        for _ in range(n_critic): # 5*discriminator times, 1*generator times
            disc_loss += train_discriminator(data, generator,
                                             discriminator, disc_opt, latent_dim)
#         if disc_opt.iterations.numpy() % n_critic == 0: ### using samples
        gen_loss+= train_generator(data, generator,
                                    discriminator, gen_opt, sc, batch_size, latent_dim)
    
    losses_gen= np.append(losses_gen, gen_loss / batch_size)
    losses_dis= np.append(losses_dis, disc_loss / (batch_size*n_critic))
    
    print('Time for epoch {} is {} sec - gen_loss = {}, disc_loss = {}'.format(epoch + 1, time.time() - start,
                                                                               gen_loss / batch_size,
                                                                               disc_loss / (batch_size*n_critic)))
    # save best discriminator or generator
    if abs(best_loss_gen) > abs((gen_loss / batch_size)):
        best_loss_gen = (gen_loss / batch_size)
        generator.save_weights(checkpoint_prefix+"gen", save_format='tf')
        
    if abs(best_loss_dis) > abs((disc_loss / (batch_size*n_critic))):
        best_loss_dis = (disc_loss / (batch_size*n_critic))
        discriminator.save_weights(checkpoint_prefix+"dis", save_format='tf')
    



To change all layers to have dtype float32 by default, call `tf.keras.backend.set_floatx('float32')`. To change just this layer, pass dtype='float32' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



AttributeError: in converted code:

    /home/cheyu/data/digital-twins/ADS-GAN-constraint/model.py:95 train_generator  *
        sum_loss = gen_loss + \

    AttributeError: 'Tensor' object has no attribute 'numpy'


In [None]:
## Record the 40000 gen_loss = , disc_loss = 

## show the training results

In [None]:
plt.title("ADS-GAN-constraint training Loss")
plt.ylabel("Loss")
plt.xlabel("Epochs")
plt.grid()
plt.plot(losses_gen, label='Generator')
plt.plot(losses_dis, label='Discriminator')
plt.legend(loc='best')
plt.savefig("./ADS-GAN-constraint_LOSS.png",dpi=300)
plt.show()

In [None]:
## create matrix 0 row*latent_dim columns
arr=np.empty((0,latent_dim))
noise = tf.random.normal([128, latent_dim])

## batch testing data
testing_dataset = tf.data.Dataset.from_tensor_slices(X_test.to_numpy())\
        .shuffle(buffer_size).batch(batch_size, drop_remainder=True)

## generate dataset
for data in testing_dataset:
    gen_=generator(noise, data).numpy()
    arr=np.append(arr,gen_,axis=0)
arr.shape

In [None]:
output_dataset = pd.DataFrame(np.round(sc.inverse_transform(arr)), columns=[
    'BT_NM', 'HR_NM', 'RR_NM', 'HB_NM', 'HCT_NM', 'PLATELET_NM', 'WBC_NM',
    'PTT1_NM', 'PTT2_NM', 'PTINR_NM', 'ER_NM', 'BUN_NM', 'CRE_NM', 'BMI',
    'age', 'PPD', 'THDA_FL', 'THDH_FL', 'THDI_FL', 'THDAM_FL', 'THDV_FL',
    'THDE_FL', 'THDM_FL', 'THDR_FL', 'THDP_FL', 'THDOO_FL', 'Gender',
    'cortical_ACA_ctr', 'cortical_MCA_ctr', 'subcortical_ACA_ctr',
    'subcortical_MCA_ctr', 'PCA_cortex_ctr', 'thalamus_ctr',
    'brainstem_ctr', 'cerebellum_ctr', 'Watershed_ctr',
    'Hemorrhagic_infarct_ctr', 'cortical_ACA_ctl', 'cortical_MCA_ctl',
    'subcortical_ACA_ctl', 'subcortical_MCA_ctl', 'PCA_cortex_ctl',
    'thalamus_ctl', 'brainstem_ctl', 'cerebellum_ctl', 'Watershed_ctl',
    'Hemorrhagic_infarct_ctl', 'NIHS_1a_in', 'NIHS_1b_in', 'NIHS_1c_in',
    'NIHS_2_in', 'NIHS_3_in', 'NIHS_4_in', 'NIHS_5aL_in', 'NIHS_5bR_in',
    'NIHS_6aL_in', 'NIHS_6bR_in', 'NIHS_7_in', 'NIHS_8_in', 'NIHS_9_in',
    'NIHS_10_in', 'NIHS_11_in','elapsed_class'
])
output_dataset

In [None]:
output_dataset.to_csv("../dataset/output_dataset/ADS-GAN-constraint_models.csv",encoding='utf_8_sig')

In [None]:
X_TEST_dataset = pd.DataFrame(np.round(sc.inverse_transform(X_test)), columns=[
    'BT_NM', 'HR_NM', 'RR_NM', 'HB_NM', 'HCT_NM', 'PLATELET_NM', 'WBC_NM',
    'PTT1_NM', 'PTT2_NM', 'PTINR_NM', 'ER_NM', 'BUN_NM', 'CRE_NM', 'BMI',
    'age', 'PPD', 'THDA_FL', 'THDH_FL', 'THDI_FL', 'THDAM_FL', 'THDV_FL',
    'THDE_FL', 'THDM_FL', 'THDR_FL', 'THDP_FL', 'THDOO_FL', 'Gender',
    'cortical_ACA_ctr', 'cortical_MCA_ctr', 'subcortical_ACA_ctr',
    'subcortical_MCA_ctr', 'PCA_cortex_ctr', 'thalamus_ctr',
    'brainstem_ctr', 'cerebellum_ctr', 'Watershed_ctr',
    'Hemorrhagic_infarct_ctr', 'cortical_ACA_ctl', 'cortical_MCA_ctl',
    'subcortical_ACA_ctl', 'subcortical_MCA_ctl', 'PCA_cortex_ctl',
    'thalamus_ctl', 'brainstem_ctl', 'cerebellum_ctl', 'Watershed_ctl',
    'Hemorrhagic_infarct_ctl', 'NIHS_1a_in', 'NIHS_1b_in', 'NIHS_1c_in',
    'NIHS_2_in', 'NIHS_3_in', 'NIHS_4_in', 'NIHS_5aL_in', 'NIHS_5bR_in',
    'NIHS_6aL_in', 'NIHS_6bR_in', 'NIHS_7_in', 'NIHS_8_in', 'NIHS_9_in',
    'NIHS_10_in', 'NIHS_11_in','elapsed_class'
])
X_TEST_dataset.to_csv("../dataset/output_dataset/ADS-GAN-constraint_XTEST.csv",encoding='utf_8_sig')