In [1]:
import sys
import tensorflow as tf
import time
import numpy as np
import os
import pandas as pd
import parser_arguments
from tensorflow.keras.layers.experimental import preprocessing
import loglik_models_missing_normalize

In [2]:
# args = parser_arguments.getArgs(sys.argv[1:])
args = parser_arguments.getArgs_jupyter_notebook()

In [3]:
if not os.path.exists('./Saved_Networks/'+args.save_file):
    os.makedirs('./Saved_Networks/'+args.save_file)

In [4]:
network_file_name = './Saved_Networks/' + args.save_file + '/' + args.save_file + '.ckpt'
log_file_name = './Saved_Networks' + args.save_file + '/log_file_' + args.save_file + '.txt'

In [5]:
print(args)

{'batch_size': 200, 'epochs': 5, 'perp': 10, 'train': 1, 'display': 1, 'save': 1000, 'restore': 0, 'plot': 1, 'dim_latent_s': 4, 'dim_latent_z': 2, 'dim_latent_y': 3, 'dim_latent_y_partition': [], 'miss_perentage_train': 0.0, 'miss_percentage_test': 0.0, 'model_name': 'model_new', 'save_file': 'breast_data_zdim5_ydim10_4images_', 'data_file': './Breast/data.csv', 'types_file': './Breast/data_types.csv', 'miss_file': './Breast/Missing40_4.csv', 'true_miss_file': ''}


In [6]:
# Read functions.read_data 
# Inputs: data_file, types_file, miss_file, true_miss_file

nan_definition = [-9999., 'nan']
# Read types of data from types_file
data_types = pd.read_csv(args.types_file)
types_dict = data_types.to_dict('records')


# Read data from the input file, compute the true missing mask and replace the true missing values with placeholder value
column_names = [item['type']+'_'+str(i) for i, item in enumerate(types_dict)]

data_df = pd.read_csv(args.data_file, header=None, na_values=nan_definition, names=column_names, dtype=np.float64)
filler_value = dict()
for col in column_names:
    if 'cat' in col or 'ordinal' in col: 
        filler_value[col] = np.unique(data_df[col])[0]
    else: 
        filler_value[col] = 0.0
        
data_df_filled = np.array(data_df.fillna(value=filler_value))

true_miss_mask = pd.notna(data_df).astype(int)

# Construct the data matrices
data_complete = []
for i in range(np.shape(data_df_filled)[1]):
    if types_dict[i]['type']=='cat':
        cat_data = [int(x) for x in data_df_filled[:, i]]
        _, indexes = np.unique(cat_data, return_inverse=True)
        # Transform categories to a vector of 0:n_categories
        new_categories = np.arange(int(types_dict[i]['dim']))
        cat_data = new_categories[indexes]
        # Create one-hot encoding for the categories 
        temp = np.zeros([np.shape(data_df_filled)[0], len(new_categories)])
        temp[np.arange(np.shape(data_df_filled)[0]), cat_data] = 1
        data_complete.append(temp)
    elif types_dict[i]['type']=='ordinal':
        cat_data = [int(x) for x in data_df_filled[:, i]]
        _, indexes = np.unique(cat_data, return_inverse=True)
        # Transform categories to a vector of 0:n_categories 
        new_categories = np.arange(int(types_dict[i]['dim']))
        cat_data = new_categories[indexes]
        # Create thermometer encoding for the categories 
        temp = np.zeros([np.shape(data_df_filled)[0], 1+len(new_categories)])
        temp[:, 0] = 1
        temp[np.arange(np.shape(data_df_filled)[0]), 1+cat_data] = -1
        temp = np.cumsum(temp, 1)
        data_complete.append(temp[:, :-1])
    elif types_dict[i]['type'] == 'count':
        if np.min(data_df_filled[:, i]) == 0:
            temp = data_df_filled[:, i] + 1
            data_complete.append(np.transpose([temp]))
        else:
            data_complete.append(np.transpose([data_df_filled[:, i]]))
    else:
        data_complete.append(np.transpose([data_df_filled[:, i]]))
data = np.concatenate(data_complete, 1)

# Read missing mask from .csv (contains positions of missing values)
n_samples = np.shape(data)[0]
n_variables = len(types_dict)
miss_mask = np.ones([np.shape(data)[0], n_variables])
# if there is no mask, assume all data is observed 
missing_positions = np.array(pd.read_csv(args.miss_file, header=None))       
miss_mask[missing_positions[:,0]-1, missing_positions[:,1]-1] = 0

# This cell returns: data, types_dict, miss_mask, true_miss_mask, n_samples

In [7]:
# get an integer number of batches 
if args.batch_size > n_samples:
    args.batch_size = n_samples
# Get an integer number of batches 
n_batches = int(np.floor(np.shape(data)[0]/args.batch_size))
# Compute the real miss_mask
real_miss_mask = np.array(np.multiply(miss_mask, true_miss_mask)).astype(float)


In [8]:
miss_mask.shape

(699, 10)

In [9]:
ftr_index = 0
normalized_data = []
normalization_parameters = []
for i in range(len(types_dict)):
    missing_data, observed_data = tf.dynamic_partition(data[:,ftr_index:ftr_index+types_dict[i]['dim']], real_miss_mask[:,i], num_partitions=2)
    condition_indices = tf.dynamic_partition(tf.range(tf.shape(data)[0]), real_miss_mask[:, i], num_partitions=2)

    if types_dict[i]['type'] == 'real':
        data_mean, data_var = tf.nn.moments(observed_data, 0)
        data_var = tf.clip_by_value(data_var, 1e-6, 1e20) # Avoid zero values
        norm_X = tf.nn.batch_normalization(observed_data, data_mean, data_var, offset=0.0, scale=1.0, variance_epsilon=1e-6)

        normalized_data.append(tf.dynamic_stitch(condition_indices, [missing_data, norm_X]))
        normalization_parameters.append([data_mean, data_var])

    elif types_dict[i]['type'] == 'pos':
        # we transform the log of the data to a gaussian with mean 0 and std 1
        observed_data_log = tf.log(1.0+observed_data)
        data_mean_log, data_var_log = tf.nn.moments(observed_data_log, 0)
        data_var_log = tf.clip_by_value(data_var_log, 1e-6, 1e20) 
        norm_X = tf.nn.batch_normalization(observed_data_log, data_mean_log, data_var_log, offset=0.0, scale=1.0, variance_epsilon=1e-6)

        normalized_data.append(tf.dynamic_stitch(condition_indices, [missing_data, norm_X]))
        normalization_parameters.append([data_mean_log, data_var_log])

    elif types_dict[i]['type'] == 'count':
        aux_X = tf.math.log(observed_data)

        normalized_data.append(tf.dynamic_stitch(condition_indices, [missing_data, aux_X]))
        normalization_parameters.append([0.0, 1.0])

    else:
        normalized_data.append(data[:, ftr_index:ftr_index+types_dict[i]['dim']])
        normalization_parameters.append([0.0, 1.0])
    ftr_index = ftr_index + types_dict[i]['dim']

normalized_data = tf.concat(normalized_data, 1)

# This block returns normalized_data, normalization_parameters    

In [10]:
# Set dimensionality of Y 
y_dim = args.dim_latent_y
y_dim_partition = args.dim_latent_y_partition
if y_dim_partition:
    y_dim_output = np.sum(y_dim_partition)
else:
    y_dim_partition = y_dim*np.ones(len(types_dict), dtype=int)
    y_dim_output = np.sum(y_dim_partition)

In [11]:
real_miss_mask.shape

(699, 10)

In [12]:
augmented_train_dataset = tf.data.Dataset.from_tensor_slices((normalized_data, miss_mask, real_miss_mask)).shuffle(1000)

In [13]:
# UTILITY AREA
# *****************************************************************************************************************
# Perform batch normalization of the data 

# normalizer = preprocessing.Normalization(input_shape=(data.shape[1],))
# normalizer.adapt(data) # normalizer is now a tf.keras layer and can be used as the first layer of your NN model!!
# mean = normalizer.mean.numpy()
# variance = normalizer.variance.numpy()
# *****************************************************************************************************************

In [13]:
class Sampling_s(tf.keras.layers.Layer):
    """ Uses Gumbel Softmax trick to sample s, the mixture component random variable that generates the latent variable z from a GMM. The Gumbel Softmax formulation 
    helps to implement the reparameterization trick for the discrete random variable 's' specified by activations, log_pi_aux. 
    """
    def __init__(self, tau):
        super(Sampling_s, self).__init__()
        self.tau = tau
        
    def call(self, inputs):
        input_activations = inputs
        no_of_samples = tf.shape(input_activations)[0]
        s_dim = tf.shape(input_activations)[1]
        U = -tf.math.log(-tf.math.log(tf.random.uniform([no_of_samples, s_dim])))
        samples_s = tf.nn.softmax((input_activations+U)/self.tau)
        return samples_s    

In [14]:
class Sampling_z_given_s(tf.keras.layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, given the samples of s. The random variable z is assumed to follow a GMM."""
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5*z_log_var)*epsilon
    

In [15]:
# Build the Encoder 

tau = 1.00
no_of_features = normalized_data.shape[1]
s_dim = args.dim_latent_s
z_dim = args.dim_latent_z

sample_s = Sampling_s(tau) # Create an object

encoder_inputs = tf.keras.Input(shape=(no_of_features, ))
log_pi = tf.keras.layers.Dense(units=s_dim, activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.05), name='layer_1_'+'enc_s')
x = log_pi(encoder_inputs)
log_pi_aux = tf.math.log(tf.clip_by_value(tf.nn.softmax(x), 1e-6, 1))

s = sample_s(log_pi_aux)

input_data_and_s = tf.concat([encoder_inputs, s], 1)
mean_qz = tf.keras.layers.Dense(units=z_dim, activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.05), name='layer_1_'+'mean_enc_z')(input_data_and_s)
log_var_qz = tf.keras.layers.Dense(units=z_dim, activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.05), name='layer_1_'+'logvar_enc_z')(input_data_and_s)

# Avoid numerical problems
log_var_qz = tf.clip_by_value(log_var_qz, -15.0, 15.0)
    
z = Sampling_z_given_s()([mean_qz, log_var_qz])

encoder = tf.keras.Model(encoder_inputs, [log_pi_aux, s, mean_qz, log_var_qz, z], name='encoder')  # check for reuse functionality of some layers here ????????
# encoder = tf.keras.Model(encoder_inputs, log_pi_aux, name='encoder')  # check for reuse functionality of some layers here ????????

encoder.summary()

Model: "encoder"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 92)]         0                                            
__________________________________________________________________________________________________
layer_1_enc_s (Dense)           (None, 4)            372         input_1[0][0]                    
__________________________________________________________________________________________________
tf.nn.softmax (TFOpLambda)      (None, 4)            0           layer_1_enc_s[0][0]              
__________________________________________________________________________________________________
tf.clip_by_value (TFOpLambda)   (None, 4)            0           tf.nn.softmax[0][0]              
____________________________________________________________________________________________

In [17]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
mse_loss_fn = tf.keras.losses.MeanSquaredError()

In [37]:
epochs = 5

encoder.compile(optimizer=optimizer, loss=mse_loss_fn)
encoder.fit(normalized_data, normalized_data[:, 0:4], batch_size=32, epochs=10)
# Iterate over epochs.
# for epoch in range(epochs):
#     print("Start of epoch %d" % (epoch,))

#     # Iterate over the batches of the dataset.
#     for step, x_batch_train in enumerate(train_dataset):
#         with tf.GradientTape() as tape:
#             reconstructed = encoder(x_batch_train)
#             # Compute reconstruction loss
#             loss = mse_loss_fn(x_batch_train[:, 0:4], reconstructed[0])
#             loss += sum(encoder.losses)  # Add KLD regularization loss

#         grads = tape.gradient(loss, encoder.trainable_weights)
#         optimizer.apply_gradients(zip(grads, encoder.trainable_weights))

#         loss_metric(loss)

#         if step % 100 == 0:
#             print("step %d: mean loss = %.4f" % (step, loss_metric.result()))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1e81031b148>

In [16]:
def y_partition(samples_y, types_list, y_dim_partition):
    grouped_samples_y = []
    partition_vector_cumsum = np.insert(np.cumsum(y_dim_partition), 0, 0)
    for i in range(len(types_list)):
        grouped_samples_y.append(samples_y[:, partition_vector_cumsum[i]:partition_vector_cumsum[i+1]])

In [17]:
def observed_data_layer(observed_data, missing_data, condition_indices, output_dim, name, bias):
    #Train a layer with the observed data and reuse it for the missing data
    output_layer = tf.keras.layers.Dense(units=output_dim, activation=None,
                         kernel_initializer=tf.random_normal_initializer(stddev=0.05),name=name,use_bias=bias)
    output_layer.trainable=True
    obs_output = output_layer(observed_data)
    
    output_layer.trainable = False
    miss_output = output_layer(missing_data) 
    #Join back the data
    output = tf.dynamic_stitch(condition_indices, [miss_output,obs_output])
    return output


def theta_real_s(observed_y, missing_y, observed_s, missing_s, condition_indices, types_list, nObs, i, reuse):
    #Mean layer    
    h2_mean = observed_data_layer(tf.concat([observed_y,observed_s],1), tf.concat([missing_y,missing_s],1), condition_indices, output_dim=types_list[i]['dim'], name='layer_h2' + str(i), reuse=reuse, bias=False)
    #Sigma Layer    
    h2_sigma = observed_data_layer(observed_s, missing_s, condition_indices, output_dim=types_list[i]['dim'], name='layer_h2_sigma' + str(i), reuse=reuse, bias=False)
    return [h2_mean, h2_sigma]

def theta_pos_s(observed_y, missing_y, observed_s, missing_s, condition_indices, types_list, nObs, i, reuse):
    
    #Mean layer
    h2_mean = observed_data_layer(tf.concat([observed_y,observed_s],1), tf.concat([missing_y,missing_s],1), condition_indices, output_dim=types_list[i]['dim'], name='layer_h2' + str(i), reuse=reuse, bias=False)
    
    #Sigma Layer
    h2_sigma = observed_data_layer(observed_s, missing_s, condition_indices, output_dim=types_list[i]['dim'], name='layer_h2_sigma' + str(i), reuse=reuse, bias=False)
    
    return [h2_mean, h2_sigma]

def theta_count_s(observed_y, missing_y, observed_s, missing_s, condition_indices, types_list, nObs, i, reuse):
    
    #Lambda Layer    
    h2_lambda = observed_data_layer(tf.concat([observed_y,observed_s],1), tf.concat([missing_y,missing_s],1), condition_indices, output_dim=types_list[i]['dim'], name='layer_h2' + str(i), reuse=reuse, bias=False)
    
    return h2_lambda

def theta_cat_s(observed_y, missing_y, observed_s, missing_s, condition_indices, types_list, nObs, i):
    
    #Log pi layer, with zeros in the first value to avoid the identificability problem   
    h2_log_pi_partial = observed_data_layer(tf.concat([observed_y,observed_s],1), tf.concat([missing_y,missing_s],1), condition_indices, output_dim=int(types_list[i]['dim'])-1, name='layer_h2' + str(i), bias=False)
    h2_log_pi = tf.concat([tf.zeros([nObs,1]), h2_log_pi_partial],1)
    return h2_log_pi

def theta_ordinal_s(observed_y, missing_y, observed_s, missing_s, condition_indices, types_list, nObs, i):
    
    #Theta layer, Dimension of ordinal - 1
#     print(observed_y.shape)
    h2_theta = observed_data_layer(observed_s, missing_s, condition_indices, output_dim=int(types_list[i]['dim'])-1, name='layer_h2' + str(i), bias=False)
    
    #Mean layer, a single value
    h2_mean = observed_data_layer(tf.concat([observed_y,observed_s],1), tf.concat([missing_y,missing_s],1), condition_indices, output_dim=1, name='layer_h2_sigma' + str(i), bias=False)
    
    return [h2_theta, h2_mean]

In [18]:
def theta_estimation_from_ys(samples_y, samples_s, types_list, miss_list):
    theta = []
    # Independent yd -> Compute p(xd|yd)
    for i, d in enumerate(samples_y):
        # Partition the data in missing data(0) and observed data(1)
        missing_y, observed_y = tf.dynamic_partition(d, miss_list[:, i], num_partitions=2)
        missing_s, observed_s = tf.dynamic_partition(samples_s, miss_list[:, i], num_partitions=2)
        condition_indices = tf.dynamic_partition(tf.range(tf.shape(d)[0]), miss_list[:, i], num_partitions=2)
        nObs = tf.shape(observed_y)[0]
        
        # Different layer models for each type of variable
        if types_list[i]['type']=='real':
            params = theta_real_s(observed_y, missing_y, observed_s, missing_s, condition_indices, types_list, nObs, i)
        
        elif types_list[i]['type']=='pos':
            params = theta_pos_s(observed_y, missing_y, observed_s, missing_s, condition_indices, types_list, nObs, i)
            
        elif types_list[i]['type']=='count':
            params = theta_count_s(observed_y, missing_y, observed_s, missing_s, condition_indices, types_list, nObs, i)
        
        elif types_list[i]['type']=='cat':
            params = theta_cat_s(observed_y, missing_y, observed_s, missing_s, condition_indices, types_list, nObs, i)
            
        elif types_list[i]['type']=='ordinal':
            params = theta_ordinal_s(observed_y, missing_y, observed_s, missing_s, condition_indices, types_list, nObs, i)
        theta.append(params)
    return theta

In [19]:
# Build the decoder 

decoder_inputs_s = tf.keras.Input(shape=(s_dim, ))
decoder_inputs_z = tf.keras.Input(shape=(z_dim, ))

# params of p(z|s): p_params
mean_pz = tf.keras.layers.Dense(units=z_dim, activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.05), name='layer_1_'+'mean_dec_z')(decoder_inputs_s)
log_var_pz = tf.zeros([tf.shape(decoder_inputs_z)[0], z_dim])
log_var_pz = tf.clip_by_value(log_var_pz, -15.0, 15.0)

# Create deterministic layer y
samples_y = tf.keras.layers.Dense(units=y_dim, activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.05), name='layer_h1_')(decoder_inputs_z)

grouped_samples_y = []
partition_vector_cumsum = np.insert(np.cumsum(y_dim_partition), 0, 0)
for i in range(len(types_dict)):
    grouped_samples_y.append(samples_y[:, partition_vector_cumsum[i]:partition_vector_cumsum[i+1]])

theta = theta_estimation_from_ys(grouped_samples_y, decoder_inputs_s, types_dict, miss_mask)

decoder = tf.keras.Model(inputs=[decoder_inputs_s, decoder_inputs_z], outputs=[mean_pz, log_var_pz, theta], name='decoder')
decoder.summary()

Model: "decoder"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 2)]          0                                            
__________________________________________________________________________________________________
layer_h1_ (Dense)               (None, 3)            9           input_3[0][0]                    
__________________________________________________________________________________________________
tf.__operators__.getitem_10 (Sl (None, 0)            0           layer_h1_[0][0]                  
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 4)]          0                                            
____________________________________________________________________________________________

In [64]:
epochs = 5

decoder.compile(optimizer=optimizer, loss=mse_loss_fn)
decoder.fit(normalized_data[:, 0:4], [normalized_data[:, 0:4], normalized_data[:, 5:6]], batch_size=32, epochs=10)


Epoch 1/10


AssertionError: in user code:

    c:\users\bsankaranarayanan2\pycharmprojects\tf_tutorials\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:806 train_function  *
        return step_function(self, iterator)
    c:\users\bsankaranarayanan2\pycharmprojects\tf_tutorials\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:796 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    c:\users\bsankaranarayanan2\pycharmprojects\tf_tutorials\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1211 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    c:\users\bsankaranarayanan2\pycharmprojects\tf_tutorials\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2585 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    c:\users\bsankaranarayanan2\pycharmprojects\tf_tutorials\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2945 _call_for_each_replica
        return fn(*args, **kwargs)
    c:\users\bsankaranarayanan2\pycharmprojects\tf_tutorials\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:789 run_step  **
        outputs = model.train_step(data)
    c:\users\bsankaranarayanan2\pycharmprojects\tf_tutorials\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:747 train_step
        y_pred = self(x, training=True)
    c:\users\bsankaranarayanan2\pycharmprojects\tf_tutorials\venv\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:985 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    c:\users\bsankaranarayanan2\pycharmprojects\tf_tutorials\venv\lib\site-packages\tensorflow\python\keras\engine\functional.py:386 call
        inputs, training=training, mask=mask)
    c:\users\bsankaranarayanan2\pycharmprojects\tf_tutorials\venv\lib\site-packages\tensorflow\python\keras\engine\functional.py:517 _run_internal_graph
        assert x_id in tensor_dict, 'Could not compute output ' + str(x)

    AssertionError: Could not compute output Tensor("Maximum_8:0", shape=(None, 2), dtype=float32)


In [20]:
y_dim

3

In [21]:
def data_as_feature_list(data, types_dict):
    
    data_list = []
    initial_index = 0
    for d in types_dict:
        dim = int(d['dim'])
        data_list.append(data[:,initial_index:initial_index+dim])
        initial_index += dim
    
    return data_list

In [22]:
def loglik_evaluation(normalized_data_as_list, types_dict, miss_mask, theta, tau2, normalization_params):
    log_p_x = []
    log_p_x_missing = []
    samples_x = []
    params_x = []

    #Independet yd -> Compute log(p(xd|yd))
    for i,d in enumerate(normalized_data_as_list):
        # Select the likelihood for the types of variables
        loglik_function = getattr(loglik_models_missing_normalize, 'loglik_' + types_dict[i]['type'])
        out = loglik_function([d,miss_mask[:,i]], types_dict[i], theta[i], normalization_params[i], tau2,
                                      kernel_initializer=tf.random_normal_initializer(stddev=0.05), name='layer_1_mean_dec_x' + str(i))
        log_p_x.append(out['log_p_x'])
    return log_p_x

In [28]:
# Define the VAE as a Model with a custom 'train_step'

tau2 = .001
class VAE(tf.keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder 
        
    def train_step(self, data):
        if isinstance(data, tuple):
            data = data[0]
        with tf.GradientTape() as tape: 
            log_pi_aux, s, mean_qz, log_var_qz, z = encoder(data)
            mean_pz, log_var_pz, theta = decoder([s, z])
            reconstruction = mean_pz

            # Computing the loss
            # KL(q(s|x)||p(s))
            log_pi = log_pi_aux
            pi_param = tf.nn.softmax(log_pi)
            KL_s = -tf.nn.softmax_cross_entropy_with_logits(labels=pi_param, logits=log_pi) + tf.math.log(float(s_dim))
            
            # KL(q(z|s,x)||p(z|s))
            KL_z = -0.5*z_dim + 0.5*tf.reduce_sum(tf.exp(log_var_qz-log_var_pz) + tf.square(mean_pz-mean_qz)/tf.exp(log_var_pz) - log_var_qz + log_var_pz, 1)
            
            #Eq[log_p(x|y)]
            normalized_data_as_list = data_as_feature_list(normalized_data, types_dict)
            
            log_p_x = loglik_evaluation(normalized_data_as_list, types_dict, miss_mask, theta, tau2, normalization_parameters)

            loss_reconstruction = tf.reduce_sum(log_p_x, 0)
            
            # Complete ELBO 
            ELBO = tf.reduce_mean(loss_reconstruction - KL_z - KL_s, 0)
            
        grads = tape.gradient(ELBO, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        return {
            'loss': ELBO,
            'reconstruction_loss': loss_reconstruction, 
            'kl_loss': KL_s + KL_z,
        }

In [29]:
# Train the VAE 

# (x_train, _), (x_test, _) = tf.keras.datasets.mnist.load_data()
# mnist_digits = np.concatenate([x_train, x_test], axis=0)
# mnist_digits = np.expand_dims(mnist_digits, -1).astype('float32')/255

vae = VAE(encoder, decoder)
vae.compile(optimizer=tf.keras.optimizers.Adam())

In [30]:
import datetime
log_dir = 'logs/vae/'+datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

# vae.summary()

In [31]:
normalized_data.shape

TensorShape([699, 92])

In [33]:
vae.fit(normalized_data, normalized_data, epochs=5, batch_size=100, callbacks=tensorboard_callback) 

Epoch 1/5


InvalidArgumentError:  Input to reshape is a tensor with 100 values, but the requested shape has 699
	 [[node softmax_cross_entropy_with_logits/Reshape_2 (defined at <ipython-input-28-8b15a227366d>:22) ]] [Op:__inference_train_function_15551]

Function call stack:
train_function


In [7]:
# Generator function for testing purposes before actually starting the training 
# Inputs: Raw_data, Normalized_data, miss_list, types_list, batch_size, z_dim, y_dim_output, y_dim_partition, s_dim, tau, tau2, normalization_params

samples_test = dict.fromkeys(['s', 'z', 'y', 'x'])
test_params = dict()
X = tf.concat(X_list, 1)

# Create the proposal of q(s|x^o)
_, params = s_proposal_multinomial_encoder(X, s_dim, tau)
samples_test['s'] = tf.one_hot(tf.argmax(params, 1), depth=s_dim)

# Create the proposal of q(z|s,x^o)
_, params = z_proposal_GMM_encoder(X, samples_test['s'], z_dim)
samples_test['z'] = params[0]

# Create deterministic layer y
samples_test['y'] = tf.keras.layers.Dense(units=y_dim, activation=None, kernel_initializer=tf.random_normal_initializer(stddev=0.05), name='layer_h1_', reuse=True)
grouped_samples_y = y_partition(samples_test['y'], types_list, y_dim_partition)

# Compute the parameters h_y
theta = theta_estimation_from_ys(grouped_samples_y, samples_test['s'], types_list, miss_list, batch_size, reuse=True)

# Compute loglik and output of the VAE
log_p_x, log_p_x_missing, samples_test['x'], test_params['x'] = loglik_evaluation(batch_data_list, types_list, miss_list, theta, tau2, normalization_params, reuse=True)

# Returns samples_test, test_params, log_p_x, log_p_x_missing

In [None]:
%tensorboard --logdir logs/vae