# Build format VAE network
Test VAE build before including it in larger training framework

### Imports
Install tensorflow:
``%pip install tensorflow``

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import os
tf.random.set_seed(2) 

### Create sampling layer

In [2]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon


### Build encoder


In [3]:
# def make_encoder():    
#     #filter_1 = 3 #32
#     #filter_2 = 2 #64
#     #kernel_size = 5 #3
#     dense_size = 16; 
#     encoder_inputs = keras.Input(shape=(20, 20,3)) # enter cut-out shape (20,20,3)
#     x = layers.Conv2D(filter_1, kernel_size, activation="relu", strides=2, padding="same")(encoder_inputs)
#     x = layers.Conv2D(filter_2, kernel_size, activation="relu", strides=2, padding="same")(x)
#     x = layers.Flatten()(x) # to vector
#     x = layers.Dense(dense_size, activation="relu")(x) # linked layer
#     z_mean = layers.Dense(latent_dim, name="z_mean")(x)
#     z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
#     z = Sampling()([z_mean, z_log_var])
#     encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
#     encoder.summary()
#     return encoder_inputs, encoder, z , z_mean, z_log_var


def make_encoder(cutout_size,n_bands,
                 filter_1,filter_2,
                 kernel_size_1,kernel_size_2,
                 dense_size,latent_dim):    
    encoder_inputs = keras.Input(shape=(cutout_size, cutout_size,n_bands)) # enter cut-out shape (20,20,3)
    x = layers.Conv2D(filter_1, kernel_size_1, activation="relu", strides=2, padding="same")(encoder_inputs)
    x = layers.Conv2D(filter_2, kernel_size_2, activation="relu", strides=2, padding="same")(x)
    x = layers.Conv2D(filter_2, kernel_size_2, activation="relu", strides=1, padding="same")(x)
    x = layers.Flatten()(x) # to vector
    x = layers.Dense(dense_size, activation="relu")(x) # linked layer
    z_mean = layers.Dense(latent_dim, name="z_mean")(x)
    z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
    z = Sampling()([z_mean, z_log_var])
    encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
    encoder.summary()
    
    return encoder_inputs, encoder, z , z_mean, z_log_var



### Build decoder

In [14]:
# def make_decoder(): 
#     latent_inputs = keras.Input(shape=(latent_dim,))
#     x = layers.Dense(5 * 5 * filter_2, activation="relu")(latent_inputs) # -- shape corresponding to encoder
#     x = layers.Reshape((5, 5, filter_2))(x)
#     x = layers.Conv2DTranspose(filter_2, kernel_size, activation="relu", strides=2, padding="same")(x)
#     x = layers.Conv2DTranspose(filter_1, kernel_size, activation="relu", strides=2, padding="same")(x)
#     decoder_outputs = layers.Conv2DTranspose(3, 3, activation="sigmoid", padding="same")(x) # (1,3) or (3,3)
#     decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
#     decoder.summary()
#     return decoder

def make_decoder(latent_dim,encoder,
                 filter_1,filter_2,
                 kernel_size_1,kernel_size_2,
                 n_bands): 
    latent_inputs = keras.Input(shape=(latent_dim,))
    # get shape of last layer in encoder before flattning
    flat_layer = [layer for layer in encoder.layers if 'flatten' in layer.name] 
    flat_input = flat_layer[-1].input_shape # input shape of flat layer to be used to reconstruct; (None, 5,5,16) or smth
    # x = layers.Dense(5 * 5 * filter_2, activation="relu")(latent_inputs) # -- shape corresponding to encoder
    # x = layers.Reshape((5, 5, filter_2))(x)
    x = layers.Dense(flat_input[1] * flat_input[2] * filter_2, activation="relu")(latent_inputs) # -- shape corresponding to encoder
    x = layers.Reshape((flat_input[1], flat_input[2], filter_2))(x)
    x = layers.Conv2DTranspose(filter_2, kernel_size_2, activation="relu", strides=1, padding="same")(x)
    x = layers.Conv2DTranspose(filter_2, kernel_size_2, activation="relu", strides=2, padding="same")(x)
    x = layers.Conv2DTranspose(filter_1, kernel_size_1, activation="relu", strides=2, padding="same")(x)
    # decoder_outputs = layers.Conv2DTranspose(3, 3, activation="sigmoid", padding="same")(x) # (1,3) or (3,3)
    decoder_outputs = layers.Conv2DTranspose(n_bands, n_bands, activation="sigmoid", padding="same")(x) # (1,3) or (3,3)
    decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
    decoder.summary()
    return decoder


## Define VAE as model
With custom train_step

Update: instead of defining VAE as class, use function-wise definition

In [5]:
# Define VAE model.
def make_vae(encoder_inputs, z, z_mean, z_log_var, decoder,alpha=5):
    outputs = decoder(z)
    vae = tf.keras.Model(inputs=encoder_inputs, outputs=outputs, name="vae")

    # Add KL divergence regularization loss.
    reconstruction = decoder(z)
    reconstruction_loss = tf.reduce_mean(
        tf.reduce_sum(
            keras.losses.binary_crossentropy(encoder_inputs, reconstruction), axis=(1, 2)
                )
            )
    kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
    kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))

    # Play witht different alpha: -2, 0 , 1 ,2 ; 0.2 ; -0.5 ; 50
    total_loss = reconstruction_loss +  alpha * kl_loss # alpha is custom
    # vae.add_loss(total_loss)
    
    vae.add_loss(reconstruction_loss)
    vae.add_loss(kl_loss)

    vae.add_metric(kl_loss, name='kl_loss', aggregation='mean')
    vae.add_metric(reconstruction_loss, name='mse_loss', aggregation='mean')


    return vae
    

## Test build

In [39]:
filter1 = 32 
filter2 = 16
kernelSize1 = 5
kernelSize2=kernelSize1
denseSize = 100 # 16 

sizeCutOut=100
bands = 3, 2, 1  # S2
# bands=[1] # S1

latentDim = 4

encoder_inputs, encoder, z, z_mean, z_log_var = make_encoder(
                            sizeCutOut,len(bands),
                            filter1,filter2,
                            kernelSize1,kernelSize2,
                            denseSize,latentDim)

# analyse encoder layers
for layer in encoder.layers:
    print(layer.name)
    print(layer.input_shape)
    
# layer_flatten_shape = encode
# print(encoder.layers)
flat_layer = [layer for layer in encoder.layers if 'flatten' in layer.name] # want flat_layer input_shape to usee in decoder
dense_layer = [layer for layer in encoder.layers if 'dense' in layer.name]
# print(flat_layer[-1].input_shape,dense_layer[-1].input_shape)



# print(encoder_inputs)
decoder = make_decoder(latentDim,encoder,
                       filter1,filter2,
                       kernelSize1,kernelSize2,
                       n_bands=len(bands))
vae = make_vae(encoder_inputs, z, z_mean, z_log_var, decoder)

vae.compile(optimizer=keras.optimizers.Adam(learning_rate = 0.001))

# vae.metrics_tensors.append(kl_loss)
# vae.metrics_names.append("kl_loss")

# vae.metrics_tensors.append(reconstruction_loss)
# vae.metrics_names.append("mse_loss")


# vae.save('./test_model')


Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_21 (InputLayer)          [(None, 100, 100, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d_30 (Conv2D)             (None, 50, 50, 32)   2432        ['input_21[0][0]']               
                                                                                                  
 conv2d_31 (Conv2D)             (None, 25, 25, 16)   12816       ['conv2d_30[0][0]']              
                                                                                                  
 conv2d_32 (Conv2D)             (None, 25, 25, 16)   6416        ['conv2d_31[0][0]']        

In [None]:
# test 0 works:
# filter1 = 64 
# filter2 = 16 
# kernelSize = 5
# denseSize = 16


# # this did not work (out of memory)
# filter1 = 128 
# filter2 = 32 
# kernelSize1 = 8
# kernelSize2= 5
# denseSize = 16

filter1 = 64 
filter2 = 32 
kernelSize1 = 5
kernelSize2= 5
denseSize = 16

sizeCutOut=20
bands = 3, 2, 1 

latentDim = 4

encoder_inputs, encoder, z, z_mean, z_log_var = make_encoder(
                            sizeCutOut,len(bands),
                            filter1,filter2,
                            kernelSize1,kernelSize2,
                            denseSize,latentDim)

decoder = make_decoder(latentDim,filter1,filter2,kernelSize1,kernelSize2)
vae = make_vae(encoder_inputs, z, z_mean, z_log_var, decoder)
vae.compile(optimizer=keras.optimizers.Adam())

# number of training steps in 1 epoch 
model.fit( steps_per_epoch=None):  When training with input tensors such as TensorFlow data tensors, the default None is equal to the number of samples in your dataset divided by the batch size, or 1 if that cannot be determined

So I have  204,259 samples (window size =100 )
batch_size = 32 (default; do not set it yourself when using tf.datasets
Number of steps: 2766

In [None]:
204259 / 32
204259 / 2766
# 204259 / 64

## Test read config file


In [24]:

import configparser

def parse_config(config):
    """ Parse input arguments from dictionary or config file """
    if not isinstance(config, dict):
        parser = configparser.ConfigParser()
        parser.read(config)
        config = parser["train-VAE"]

    catPath = config['catalogPath']
    labPath = config['labelsPath']
    outputDir = config['outputDirectory']
    sizeTestSet = int(config['sizeTestSet'])
    sizeValSet = int(config['sizeValidationSet'])
    roiFile = config['ROIFile']
    #
    bands = [int(i) for i in config['bands'].split(" ")]
    bands_names = [int(i) for i in config['bands'].split(" ")]
    sizeCutOut = int(config['sizeCutOut'])
    sizeStep = int(config['sizeStep'])
    stride = int(config['stride'])
    #DATA
    # balanceRatio = float(config['balanceRatio'])
    file_DMGinfo = config['tiledDamagePixelsCountFile']
    # normThreshold = float(config['normalizationThreshold'])
    normThreshold = [float(i) for i in config['normalizationThreshold'].split(" ")]
    # MODEL
    filter1 = int(config['filter1'])
    filter2 = int(config['filter2'])
    kernelSize1 = int(config['kernelSize1'])
    kernelSize2 = int(config['kernelSize2'])
    denseSize = int(config['denseSize'])
    latentDim = int(config['latentDim'])
    #vae:
    alpha = 5
    batchSize = int(config['batchSize'])
    nEpochMax = int(config['nEpochData'])
    nEpochTrain = int(config['nEpochTrain'])
    learnRate = float(config['learningRate'])
#     validationSplit = float(config['validationSplit'])

    return (catPath, labPath, outputDir, sizeTestSet, sizeValSet, roiFile,
            bands, sizeCutOut, nEpochMax, nEpochTrain, sizeStep, stride, file_DMGinfo, normThreshold,
            filter1, filter2, kernelSize1,kernelSize2, denseSize, latentDim,
            alpha, batchSize,learnRate)

# parse input arguments
# config = config if config is not None else "train-vae.ini"
config = '/Users/tud500158/Library/Mobile Documents/com~apple~CloudDocs/Documents/Documents - TUD500158/github/AutomatedDamageDetection/scripts/train-vae/'
config = os.path.join(config,'train-vae-S1.ini')
catPath, labPath, outputDir, sizeTestSet, sizeValSet, roiFile, bands, \
    sizeCutOut, nEpochmax, nEpochTrain, sizeStep, stride, file_DMGinfo, normThreshold, \
    filter1, filter2, kernelSize1, kernelSize2, denseSize, latentDim, \
    alpha, batchSize,learnRate = parse_config(config)

In [25]:
# import os
learnRate

0.1

In [32]:
len(normThreshold)
normThreshold = [5]

In [33]:
a,b=normThreshold

ValueError: not enough values to unpack (expected 2, got 1)

In [30]:
a
b

0.0

In [34]:
print('Normalised to {:.1f}-{:.1f}'.format(a, b) )

Normalised to -15.0-0.0
