In [1]:
from keras.layers import Lambda, Input, Dense, Conv2D, Flatten,Reshape,Conv2DTranspose
from keras.models import Model
from keras.datasets import mnist
from keras.losses import mse, binary_crossentropy
from keras.utils import plot_model
from keras import backend as K
import numpy as np
import tensorflow as tf
import keras_utils

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# Load dataset
dataset_zip = np.load('dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz')

print('Keys in the dataset:', dataset_zip.keys())
imgs = dataset_zip['imgs']
latents_values = dataset_zip['latents_values']
latents_classes = dataset_zip['latents_classes']

Keys in the dataset: KeysView(<numpy.lib.npyio.NpzFile object at 0x0000012BD23AE908>)


In [3]:
# MNIST dataset
from sklearn.model_selection import train_test_split
x_train, y_train, x_test, y_test = train_test_split(imgs,latents_values)

image_size = x_train.shape[1]
original_dim = image_size * image_size
x_train = np.reshape(x_train, [-1, 64,64,1])
x_test = np.reshape(x_test, [-1, 64,64,1])
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

In [22]:
def sampling(args):
    """Reparameterization trick by sampling from an isotropic unit Gaussian.

    # Arguments
        args (tensor): mean and log of variance of Q(z|X)

    # Returns
        z (tensor): sampled latent vector
    """

    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    # by default, random_normal has mean = 0 and std = 1.0
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

In [32]:
# network parameters
nc = 1
input_shape = (64,64,nc)
intermediate_dim = 512
batch_size = 128
latent_dim = 10
epochs = 1
beta = 20

# VAE model = encoder + decoder
# build encoder model
inputs = Input(shape=input_shape, name='encoder_input')
x = Conv2D(kernel_size = 4,filters=32,padding="same",strides=2,activation="relu",name = "Cov1")(inputs)
x = Conv2D(kernel_size = 4,filters=32,padding="same",strides=2,activation="relu",name = "Cov2")(x)
x = Conv2D(kernel_size = 4,filters=32,padding="same",strides=2,activation="relu",name = "Cov3")(x)
x = Conv2D(kernel_size = 4,filters=32,padding="same",strides=2,activation="relu",name = "Cov4")(x)
x = Flatten()(x)
x = Dense(256, name='Dense1',activation="relu")(x)
x = Dense(256, name='Dense2',activation="relu")(x)
z_mean = Dense(latent_dim,name = "z_mean")(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)

# use reparameterization trick to push the sampling out as input
# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

# instantiate encoder model
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
encoder.summary()
plot_model(encoder, to_file='vae_mlp_encoder.png', show_shapes=True)

# build decoder model
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
x = Dense(256, activation='relu',name = "deDense1")(latent_inputs)
x = Dense(256, activation='relu',name = "deDense2")(x)
x = Dense(32*4*4, activation='relu',name = "deDense3")(x)
x = Reshape((4,4,32))(x)
x = Conv2DTranspose(kernel_size = 4,padding="same",strides=2,filters = 32,name="DeCov1")(x)
x = Conv2DTranspose(kernel_size = 4,padding="same",strides=2,filters = 32,name="DeCov2")(x)
x = Conv2DTranspose(kernel_size = 4,padding="same",strides=2,filters = 32,name="DeCov3")(x)
outputs = Conv2DTranspose(kernel_size = 4,padding="same",strides=2,filters = nc,name="DeCov4")(x)

# instantiate decoder model
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()

# instantiate VAE model
outputs = decoder(encoder(inputs)[2])
vae = Model(inputs, outputs, name='vae_mlp')

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      (None, 64, 64, 1)    0                                            
__________________________________________________________________________________________________
Cov1 (Conv2D)                   (None, 32, 32, 32)   544         encoder_input[0][0]              
__________________________________________________________________________________________________
Cov2 (Conv2D)                   (None, 16, 16, 32)   16416       Cov1[0][0]                       
__________________________________________________________________________________________________
Cov3 (Conv2D)                   (None, 8, 8, 32)     16416       Cov2[0][0]                       
__________________________________________________________________________________________________
Cov4 (Conv

In [33]:
is_mse = False
# VAE loss = mse_loss or xent_loss + kl_loss
if is_mse:
    reconstruction_loss = mse(inputs, outputs)
else:
    reconstruction_loss = binary_crossentropy(inputs,
                                              outputs)

reconstruction_loss *= original_dim
reconstruction_loss = K.reshape(reconstruction_loss,(-1,64*64))
reconstruction_loss = K.mean(reconstruction_loss,axis=1)
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + beta*kl_loss)

vae.add_loss(vae_loss)
vae.compile(optimizer='adam')
vae.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_input (InputLayer)   (None, 64, 64, 1)         0         
_________________________________________________________________
encoder (Model)              [(None, 10), (None, 10),  252052    
_________________________________________________________________
decoder (Model)              (None, 64, 64, 1)         249953    
Total params: 502,005
Trainable params: 502,005
Non-trainable params: 0
_________________________________________________________________


In [34]:
# train the autoencoder
np.random.seed(2019)
tf.set_random_seed(2019)
model_filename = 'BetaVAE.{0:03d}.h5'
last_finished_epoch = 0
# vae = load_model(model_filename.format(last_finished_epoch))
vae.fit(x_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(x_test, None),
       callbacks=[keras_utils.ModelSaveCallback(model_filename),
                 keras_utils.TqdmProgressCallback()],
        verbose=0)
vae.save_weights('vae_beta_dsprites.h5')


Epoch 1/1


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Model saved in BetaVAE.000.h5



In [16]:
from keras_utils import reset_tf_session
from keras.models import load_model
s = reset_tf_session()  # clear default graph
K.set_learning_phase(0)  # disable dropout
last_finished_epoch = 0
model = load_model(model_filename.format(last_finished_epoch))

ValueError: When passing a list as loss, it should have one entry per model outputs. The model has 1 outputs, but you passed loss=[]

In [None]:
def find_maximum_stimuli(k,model, iterations=20, step=1., verbose=True):
    
    def image_values_to_rgb(x):
        # normalize x: center on 0 (np.mean(x_train2)), ensure std is 0.25 (np.std(x_train2))
        # so that it looks like a normalized image input for our network
        x = (x -np.mean(x))/np.std(x_train2) ### YOUR CODE HERE

        # do reverse normalization to RGB values: x = (x_norm + 0.5) * 255
        x = x*255### YOUR CODE HERE
    
        # clip values to [0, 255] and convert to bytes
        x = np.clip(x, 0, 255).astype('uint8')
        return x

    # this is the placeholder for the input image
    z_tar = Input(shape=(latent_dim,))
    input_img = model.input
    img_width, img_height = input_img.shape.as_list()[1:3]
    
    # find the layer output by name
    _,_,z = model.encoder(input_img)
    

    # we build a loss function that maximizes the activation
    # of the filter_index filter of the layer considered
    loss = mse(z_tar,z)

    # we compute the gradient of the loss wrt input image
    grads = K.gradients(loss, input_img)[0]  # [0] because of the batch dimension!

    # normalization trick: we normalize the gradient
    grads = grads / (K.sqrt(K.sum(K.square(grads))) + 1e-10)

    # this function returns the loss and grads given the input picture
    iterate = K.function([input_img,z_tar], [loss, grads])
    get_output = K.function([input_img], [z])

    # we start from a gray image with some random noise
    input_img_data = x_train[0]

    # we run gradient ascent
    I = np.eye(latent_dim)
    for i in range(iterations):
        z_ = get_output([input_img_data])
        if i == 0:
            z_ = z_ + I[k,:]
        loss, grads = iterate([input_img_data,z_])
        input_img_data += grads_value * step
        if verbose:
            print('Current loss value:', loss_value)

    # decode the resulting input image
    img = image_values_to_rgb(input_img_data[0])
    
    return img, loss_value

In [None]:
# sample maximum stimuli
def plot_filters_stimuli(model, iterations=20, step=1., verbose=False):
    fig = plt.figure()
    loss = -1e20
    for i in range(latent_dim):
        ax = fig.add_subplot(1, latent_dim, i)
        ax.grid('off')
        ax.axis('off')
        stimuli, loss = find_maximum_stimuli(i,model,iterations, step, verbose=verbose)
        ax.imshow(stimuli)
    plt.show()

In [None]:
plot_filters_stimuli(model=model)