# VAE

In [1]:
# Packages
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

import tensorflow as tf
import keras
from keras import layers

In [2]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

## Load images

In [7]:
image_path = "/Users/ivanliu/Desktop/gan-getting-started"
MONET_FILENAMES = tf.io.gfile.glob(str(image_path + '/monet_tfrec/*.tfrec'))
print('Monet TFRecord Files:', len(MONET_FILENAMES))

PHOTO_FILENAMES = tf.io.gfile.glob(str(image_path + '/photo_tfrec/*.tfrec'))
print('Photo TFRecord Files:', len(PHOTO_FILENAMES))

Monet TFRecord Files: 5
Photo TFRecord Files: 20


## Create a sampling layer

In [10]:
# Build the encoder

# Potential latent space, a 2D plane
latent_dim = 2

encoder_inputs = keras.Input(shape=(256, 256, 3))
# Use stride to reduce the size of the feature map
x = layers.Conv2D(32, 3, activation='relu', strides=2, padding='same')(encoder_inputs) 
x = layers.Conv2D(64, 3, activation='relu', strides=2, padding='same')(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation='relu')(x)
# The input image would be encoded as the two parameters
z_mean = layers.Dense(latent_dim, name='z_mean')(x)
z_log_var = layers.Dense(latent_dim, name='z_log_var')(x)
# Create an encoder with two outputs
encoder = keras.Model(encoder_inputs, [z_mean, z_log_var], name='encoder')
encoder.summary()

2024-02-19 20:18:20.679100: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2024-02-19 20:18:20.679127: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-02-19 20:18:20.679141: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-02-19 20:18:20.679212: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-02-19 20:18:20.679258: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, 256, 256, 3)]        0         []                            
                                                                                                  
 conv2d_2 (Conv2D)           (None, 128, 128, 32)         896       ['input_3[0][0]']             
                                                                                                  
 conv2d_3 (Conv2D)           (None, 64, 64, 64)           18496     ['conv2d_2[0][0]']            
                                                                                                  
 flatten (Flatten)           (None, 262144)               0         ['conv2d_3[0][0]']            
                                                                                            

In [None]:
# Create a sampling layer in the latent space
class Sampler(layers.Layer):

    def call(self, z_mean, z_log_var):
        batch_size = tf.shape(z_mean)[0]
        z_size = tf.shape(z_mean)[-1]
        epsilon = tf.random.normal(shape=(batch_size, z_size))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon # VAE formula to sample z points

In [13]:
# VAE decoders: to decode z points back to images

# The input is the sampling z points from the latent space
latent_inputs = keras.Input(shape=(latent_dim,))

# The unit is the same as the encoder "flatten" layer
x = layers.Dense(64**3, activation='relu')(latent_inputs)
# Restore the flatten operation
x = layers.Reshape((64, 64, 64))(x)
# Restore the Conv2D operation
x = layers.Conv2DTranspose(64, 3, activation='relu', strides=2, padding='same')(x)
x = layers.Conv2DTranspose(32, 3, activation='relu', strides=2, padding='same')(x)
# The final output shape is (256, 256, 1)
decoder_outputs = layers.Conv2D(1, 3, activation='sigmoid', padding='same')(x)
decoder = keras.Model(latent_inputs, decoder_outputs, name='decoder')
decoder.summary()

Model: "decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 2)]               0         
                                                                 
 dense_1 (Dense)             (None, 262144)            786432    
                                                                 
 reshape (Reshape)           (None, 64, 64, 64)        0         
                                                                 
 conv2d_transpose (Conv2DTr  (None, 128, 128, 64)      36928     
 anspose)                                                        
                                                                 
 conv2d_transpose_1 (Conv2D  (None, 256, 256, 32)      18464     
 Transpose)                                                      
                                                                 
 conv2d_4 (Conv2D)           (None, 256, 256, 1)       289 

In [None]:
# Create a custom VAE model

class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super().__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.sampler = Sampler()
        self.total_loss_tracker = keras.metrics.Mean(name='total_loss')
        self.reconstruction_loss = keras.metrics.Mean(name='reconstruction_loss')

        

In [12]:
64**3

262144