In [1]:
import tensorflow as tf
import numpy as np
from keras.layers import Lambda, Input, Dense
from keras.models import Model
from keras import backend as K
from keras.utils import multi_gpu_model
from keras.callbacks import ModelCheckpoint
from keras.objectives import binary_crossentropy

import pandas as pd
from scipy import sparse

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


# Load dataset

In [15]:
train_data = sparse.load_npz("/home/jl5307/current_research/tutorial/sparse_patient/train_csr_patient.npz")

# Keras Modeling

In [16]:
def sampling(args):
    """Reparameterization trick by sampling from an isotropic unit Gaussian.
    # Arguments: args (tensor): mean and log of variance of Q(z|X)
    # Returns: z (tensor): sampled latent vector
    """
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    # by default, random_normal has mean = 0 and std = 1.0
    # adjust std if it does not work at std=1.0
    epsilon = K.random_normal(shape=(batch, dim), stddev=1)
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

In [17]:
# hyperparameters
input_shape = (17361, )
original_dim = input_shape[0]
latent_dim = 100

In [18]:
# build encoder model
inputs = Input(shape=input_shape, name='encoder_input')
x_1 = Dense(500, activation='tanh')(inputs)
x_2 = Dense(200, activation='tanh')(x_1)
z_mean = Dense(latent_dim,name='z_mean')(x_2)
z_log_var = Dense(latent_dim, name='z_log_var')(x_2)
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

In [19]:
# instantiate encoder model
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
encoder.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      (None, 17361)        0                                            
__________________________________________________________________________________________________
dense_7 (Dense)                 (None, 500)          8681000     encoder_input[0][0]              
__________________________________________________________________________________________________
dense_8 (Dense)                 (None, 200)          100200      dense_7[0][0]                    
__________________________________________________________________________________________________
z_mean (Dense)                  (None, 100)          20100       dense_8[0][0]                    
__________________________________________________________________________________________________
z_log_var 

In [7]:
# build decoder model
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
output_1 = Dense(200, activation='tanh')(latent_inputs)
output_2 = Dense(500, activation='tanh')(output_1)
output_3 = Dense(5000, activation='tanh')(output_2)
softmax_val = Dense(original_dim, activation="softmax")(output_3)

In [8]:
# instantiate decoder model
decoder = Model(latent_inputs, softmax_val, name='decoder')
decoder.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
z_sampling (InputLayer)      (None, 100)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 200)               20200     
_________________________________________________________________
dense_4 (Dense)              (None, 500)               100500    
_________________________________________________________________
dense_5 (Dense)              (None, 5000)              2505000   
_________________________________________________________________
dense_6 (Dense)              (None, 17361)             86822361  
Total params: 89,448,061
Trainable params: 89,448,061
Non-trainable params: 0
_________________________________________________________________


In [9]:
# instantiate VAE model
softmax_val = decoder(encoder(inputs)[2])
multi_vae = Model(inputs, softmax_val, name='vae_mlp')
parallel_multi_vae = multi_gpu_model(multi_vae, gpus=3)

In [10]:
def vae_loss(inputs, softmax_val):
    log_softmax_var = tf.log(softmax_val)
    negative_ll = - tf.reduce_mean(tf.reduce_sum(log_softmax_var * inputs, axis=-1))
    kl_loss = tf.reduce_mean(0.5 * tf.reduce_sum(-1 - z_log_var + K.square(z_mean) + K.exp(z_log_var), axis=1))
    return negative_ll + 0.5 * kl_loss 

In [11]:
parallel_multi_vae.compile(optimizer='adam', loss=vae_loss)

In [12]:
checkpoint = ModelCheckpoint(filepath="/home/jl5307/current_research/tutorial/checkpoint_VAE/VAE_beta/weights_{epoch:02d}_{loss:.2f}.hdf5",
                             monitor="loss",
                             verbose=1,
                             period=1,
                            mode="min")

In [13]:
parallel_multi_vae.fit(train_data, train_data, epochs=10, batch_size=256, callbacks=[checkpoint])

Epoch 1/20
  27648/1414308 [..............................] - ETA: 31:48 - loss: 229.4759

KeyboardInterrupt: 