<a href="https://colab.research.google.com/github/Debottam/MachinLearningEx/blob/master/VAE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from keras.layers import Lambda, Input, Dense
from keras.models import Model
from keras.datasets import mnist
from keras.losses import mse, binary_crossentropy
from keras.utils import plot_model
from keras import backend as K

import numpy as np
import matplotlib.pyplot as plt
import argparse
import os

In [0]:
from keras.datasets import mnist
import numpy as np
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [88]:
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
print(x_train.shape)
print(x_test.shape)

(60000, 784)
(10000, 784)


In [0]:
x_train_bkg = x_train[np.where((y_train!=4)&(y_train!=5)&(y_train!=6))]

In [90]:
x_train_bkg.shape

(42819, 784)

In [0]:
from sklearn.model_selection import train_test_split
x_train_bkg,valid_x_train_bkg = train_test_split(x_train_bkg,
                                                 test_size=0.3, 
                                                 random_state=13)

In [92]:
valid_x_train_bkg.shape

(12846, 784)

In [93]:
original_dim = x_train_bkg.shape[1]
original_dim


784

In [0]:
# network parameters
input_shape = (original_dim, )
intermediate_dim = 512
batch_size = 128
latent_dim = 32
epochs = 50

In [0]:
# VAE model = encoder + decoder
# build encoder model
inputs = Input(shape=input_shape, name='encoder_input')
x = Dense(intermediate_dim, activation='relu')(inputs)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)

In [0]:
# use reparameterization trick to push the sampling out as input
# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

In [97]:
# instantiate encoder model
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
encoder.summary()
#plot_model(encoder, to_file='vae_mlp_encoder.png', show_shapes=True)

Model: "encoder"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      (None, 784)          0                                            
__________________________________________________________________________________________________
dense_21 (Dense)                (None, 512)          401920      encoder_input[0][0]              
__________________________________________________________________________________________________
z_mean (Dense)                  (None, 32)           16416       dense_21[0][0]                   
__________________________________________________________________________________________________
z_log_var (Dense)               (None, 32)           16416       dense_21[0][0]                   
____________________________________________________________________________________________

In [0]:
# build decoder model
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
x = Dense(intermediate_dim, activation='relu')(latent_inputs)
outputs = Dense(original_dim, activation='sigmoid')(x)

In [99]:
# instantiate decoder model
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()
#plot_model(decoder, to_file='vae_mlp_decoder.png', show_shapes=True)

Model: "decoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
z_sampling (InputLayer)      (None, 32)                0         
_________________________________________________________________
dense_22 (Dense)             (None, 512)               16896     
_________________________________________________________________
dense_23 (Dense)             (None, 784)               402192    
Total params: 419,088
Trainable params: 419,088
Non-trainable params: 0
_________________________________________________________________


In [100]:
# instantiate VAE model
outputs = decoder(encoder(inputs)[2])
vae = Model(inputs, outputs, name='vae_mlp')
vae.summary()

Model: "vae_mlp"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_input (InputLayer)   (None, 784)               0         
_________________________________________________________________
encoder (Model)              [(None, 32), (None, 32),  434752    
_________________________________________________________________
decoder (Model)              (None, 784)               419088    
Total params: 853,840
Trainable params: 853,840
Non-trainable params: 0
_________________________________________________________________


In [0]:
# Define loss
def kl_reconstruction_loss(inputs, outputs):
  # Reconstruction loss
  reconstruction_loss = binary_crossentropy(inputs, outputs) *original_dim
  # KL divergence loss
  kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
  kl_loss = K.sum(kl_loss, axis=-1)
  kl_loss *= -0.5
  # Total loss = 50% rec + 50% KL divergence loss
  return K.mean(reconstruction_loss + kl_loss)

In [0]:
vae.compile(optimizer='adam', loss=kl_reconstruction_loss)

In [116]:
vae.fit(x_train_bkg, x_train_bkg,
        shuffle=True,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(valid_x_train_bkg, valid_x_train_bkg))

Train on 29973 samples, validate on 12846 samples
Epoch 1/50

InvalidArgumentError: ignored

In [111]:
reconstruction_loss = binary_crossentropy(inputs, outputs)
reconstruction_loss *= original_dim
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
#reconstruction_loss = K.mean(reconstruction_loss)
vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer='adam')
vae.summary()


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "vae_mlp"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_input (InputLayer)   (None, 784)               0         
_________________________________________________________________
encoder (Model)              [(None, 32), (None, 32),  434752    
_________________________________________________________________
decoder (Model)              (None, 784)               419088    
Total params: 853,840
Trainable params: 853,840
Non-trainable params: 0
_________________________________________________________________


In [110]:
vae_loss.shape

TensorShape([])

In [102]:
vae.summary()

Model: "vae_mlp"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_input (InputLayer)   (None, 784)               0         
_________________________________________________________________
encoder (Model)              [(None, 32), (None, 32),  434752    
_________________________________________________________________
decoder (Model)              (None, 784)               419088    
Total params: 853,840
Trainable params: 853,840
Non-trainable params: 0
_________________________________________________________________


In [112]:
vae.fit(x_train_bkg,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(valid_x_train_bkg, None))

Train on 29973 samples, validate on 12846 samples
Epoch 1/50

InvalidArgumentError: ignored