VAE - Pawsitive Aging
==============
https://towardsdatascience.com/understanding-variational-autoencoders-vaes-f70510919f73

https://keras.io/examples/generative/vae/

https://www.cs.toronto.edu/~hinton/absps/science.pdf

https://www.youtube.com/watch?v=iwEzwTTalbg

Reparameterization Trick
https://www.youtube.com/watch?v=vy8q-WnHa9A

VAE is an architecture consisting of an encoder-->latent space-->decoder
- a VAE takes in an image, compresses it to a latent space then decodes the data to a similar image, the main difference between a VAE and a regular autoencoder is that we sample the latent space(z) from a Gaussian distribution, there is a issue with sampling. That is sampling is not continuous (there is no derivative - backpropagation will not compute an estimate of the derivative)
- therefore we use Reparameterization trick: z ~ N(mu, sigma): mu = mean, sigma = standard deviation


Library Includes and Function Definitions
=============

In [200]:
# library includes
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
from keras.preprocessing.image import img_to_array, load_img
from sklearn.model_selection import train_test_split

In [15]:
print(keras.version())

# int_shape in keras.backend was deprecated - using self defined function
def int_shape(x):
    try:
        shape = x.shape
        if not isinstance(shape, tuple):
            shape = tuple(shape.as_list())
        return shape
    except ValueError:
        return None
    
# random_normal was deprecated    
def random_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None):
    if dtype is None:
        dtype = keras.backend.floatx()
    if seed is None:
        seed = np.random.randint(10e6)
    return tf.random.normal(
        shape, mean=mean, stddev=stddev, dtype=dtype, seed=seed
    )    

3.0.5


Data Processing and Loading
=========================

In [3]:
# data loading from CSV
data_path = "./data_sources/"
processedDogs_path = "./data_sources/images/processed_dogs/"
data_csv_path = "./data_sources/dog_data.csv"

dogCSV = pd.read_csv(data_csv_path)

dogImagesPaths = []
for fileDirectory in dogCSV["fileDirectory"]:
    dogImagesPaths.append(processedDogs_path + fileDirectory)

dogCSV["fullFilePath"] = dogImagesPaths

# removing all the rows that are labeled other than "middle-aged", "old", "young"
properLabels = ["old", "young", "middle-aged"]
dogCSV = dogCSV[dogCSV["label"].isin(properLabels)]
dogCSV

Unnamed: 0,index,fileDirectory,label,fullFilePath
0,0,dog000000.jpg,middle-aged,./data_sources/images/processed_dogs/dog000000...
1,1,dog000001.jpg,middle-aged,./data_sources/images/processed_dogs/dog000001...
2,2,dog000002.jpg,middle-aged,./data_sources/images/processed_dogs/dog000002...
3,3,dog000003.jpg,old,./data_sources/images/processed_dogs/dog000003...
4,4,dog000004.jpg,middle-aged,./data_sources/images/processed_dogs/dog000004...
...,...,...,...,...
5570,5570,dog005570.jpg,young,./data_sources/images/processed_dogs/dog005570...
5571,5571,dog005571.jpg,middle-aged,./data_sources/images/processed_dogs/dog005571...
5572,5572,dog005572.jpg,middle-aged,./data_sources/images/processed_dogs/dog005572...
5573,5573,dog005573.jpg,young,./data_sources/images/processed_dogs/dog005573...


In [4]:
# data preprocessing
loaded_images = []
r_data = []
g_data = []
b_data = []

for imageFilePath in dogCSV["fullFilePath"]:
    img = load_img(imageFilePath, target_size=(150, 150), color_mode='rgb')
    img_array = img_to_array(img) / 255.0
    imageData = np.array(img_array)
    loaded_images.append(imageData)
    r_data.append(np.array(imageData[:, :, 0]).flatten())
    g_data.append(np.array(imageData[:, :, 1]).flatten())
    b_data.append(np.array(imageData[:, :, 2]).flatten())    
    
dogCSV["RGB_data"] = loaded_images
dogCSV["R_data"] = r_data
dogCSV["G_data"] = g_data
dogCSV["B_data"] = b_data

dogCSV

Unnamed: 0,index,fileDirectory,label,fullFilePath,RGB_data,R_data,G_data,B_data
0,0,dog000000.jpg,middle-aged,./data_sources/images/processed_dogs/dog000000...,"[[[0.49411765, 0.43137255, 0.43529412], [0.501...","[0.49411765, 0.5019608, 0.39607844, 0.43137255...","[0.43137255, 0.4392157, 0.33333334, 0.36862746...","[0.43529412, 0.44313726, 0.3372549, 0.37254903..."
1,1,dog000001.jpg,middle-aged,./data_sources/images/processed_dogs/dog000001...,"[[[0.28235295, 0.39215687, 0.4862745], [0.3098...","[0.28235295, 0.30980393, 0.30980393, 0.2784314...","[0.39215687, 0.41960785, 0.41960785, 0.3882353...","[0.4862745, 0.5137255, 0.5137255, 0.48235294, ..."
2,2,dog000002.jpg,middle-aged,./data_sources/images/processed_dogs/dog000002...,"[[[0.32156864, 0.078431375, 0.13725491], [0.32...","[0.32156864, 0.32156864, 0.32156864, 0.3098039...","[0.078431375, 0.078431375, 0.07058824, 0.05882...","[0.13725491, 0.13725491, 0.13333334, 0.1215686..."
3,3,dog000003.jpg,old,./data_sources/images/processed_dogs/dog000003...,"[[[1.0, 0.99607843, 1.0], [0.9529412, 0.933333...","[1.0, 0.9529412, 1.0, 0.9490196, 0.9843137, 1....","[0.99607843, 0.93333334, 0.99215686, 0.9137255...","[1.0, 0.9490196, 1.0, 0.93333334, 0.95686275, ..."
4,4,dog000004.jpg,middle-aged,./data_sources/images/processed_dogs/dog000004...,"[[[0.011764706, 0.011764706, 0.011764706], [0....","[0.011764706, 0.011764706, 0.011764706, 0.0078...","[0.011764706, 0.011764706, 0.011764706, 0.0078...","[0.011764706, 0.011764706, 0.011764706, 0.0078..."
...,...,...,...,...,...,...,...,...
5570,5570,dog005570.jpg,young,./data_sources/images/processed_dogs/dog005570...,"[[[0.39607844, 0.40392157, 0.4], [0.39607844, ...","[0.39607844, 0.39607844, 0.4, 0.4, 0.40392157,...","[0.40392157, 0.40392157, 0.40784314, 0.4078431...","[0.4, 0.4, 0.40392157, 0.40392157, 0.40784314,..."
5571,5571,dog005571.jpg,middle-aged,./data_sources/images/processed_dogs/dog005571...,"[[[0.023529412, 0.023529412, 0.023529412], [0....","[0.023529412, 0.023529412, 0.023529412, 0.0235...","[0.023529412, 0.023529412, 0.023529412, 0.0235...","[0.023529412, 0.023529412, 0.023529412, 0.0235..."
5572,5572,dog005572.jpg,middle-aged,./data_sources/images/processed_dogs/dog005572...,"[[[0.6117647, 0.39607844, 0.1764706], [0.63137...","[0.6117647, 0.6313726, 0.72156864, 0.43529412,...","[0.39607844, 0.40392157, 0.50980395, 0.3058823...","[0.1764706, 0.21568628, 0.36078432, 0.18431373..."
5573,5573,dog005573.jpg,young,./data_sources/images/processed_dogs/dog005573...,"[[[0.007843138, 0.007843138, 0.007843138], [0....","[0.007843138, 0.003921569, 0.003921569, 0.0039...","[0.007843138, 0.003921569, 0.003921569, 0.0039...","[0.007843138, 0.003921569, 0.003921569, 0.0039..."


Defining the VAE 
=================

In [178]:
# Initializing parameters for the VAE neural network
input_shape = (150, 150, 3)
batch_size = 32
latent_dimension = 10

# Adapted from https://medium.com/@judyyes10/generate-images-using-variational-autoencoder-vae-4d429d9bdb5 and ENSC 413 provided VAE code
class SamplingLayer(keras.layers.Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = z_mean.shape[0]
        epsilon = keras.random.normal(shape = (1, latent_dimension), mean=0.0, stddev=1.0)
        # print(epsilon)
        
        return z_mean + keras.ops.exp(0.5 * z_log_var) * epsilon

In [186]:
# Encoder Definition
encoder_input = keras.layers.Input(input_shape)
encoder = keras.layers.Conv2D(filters=180, kernel_size=5, padding="same", activation="relu")(encoder_input)
encoder = keras.layers.Conv2D(filters=256, kernel_size=5, strides=(2, 2), padding="same", activation="relu")(encoder)
encoder = keras.layers.Conv2D(filters=256, kernel_size=5, strides=(3, 3), padding="same", activation="relu")(encoder)
encoder = keras.layers.Conv2D(filters=256, kernel_size=3, padding="same", activation="relu")(encoder)

encoderOutputShape = encoder.shape
print("Encoder output shape: {}".format(encoderOutputShape))

encoder = keras.layers.Flatten()(encoder)
encoder = keras.layers.Dense(180, activation="relu")(encoder)

Encoder output shape: (None, 25, 25, 256)


In [189]:
# latent vector (both z_mean and the log_variance is determined by the input conv2D layers)
z_mean = keras.layers.Dense(latent_dimension, name="z_mean")(encoder)
z_log_var = keras.layers.Dense(latent_dimension, name="z_log_var")(encoder)

print("Shape of z_mean: {}".format(z_mean.shape))
print("Shape of z_log_var: {}".format(z_log_var.shape))

# sampling from latent vector
z = SamplingLayer()([z_mean, z_log_var])

print("Shape of z: {}\n\n".format(z.shape))

encoderModel = keras.models.Model(encoder_input, [z_mean, z_log_var, z], name="encoder")
encoderModel.summary()

Shape of z_mean: (None, 10)
Shape of z_log_var: (None, 10)
Shape of z: (None, 10)




In [188]:
# Decoder Definition
decoder_input = keras.layers.Input(z.shape[1:])
decoder = keras.layers.Dense(np.prod(encoderOutputShape[1:]), activation="relu")(decoder_input)
decoder = keras.layers.Reshape(encoderOutputShape[1:])(decoder)
decoder = keras.layers.Conv2DTranspose(filters = 256, kernel_size= 3, padding="same", activation="relu")(decoder)
decoder = keras.layers.Conv2DTranspose(filters = 256, kernel_size= 5, padding="same", activation="relu", strides=(3, 3))(decoder)
decoder = keras.layers.Conv2DTranspose(filters = 180, kernel_size= 5, padding="same", activation="relu", strides=(2, 2))(decoder)
decoder = keras.layers.Conv2DTranspose(filters = 3, kernel_size=3, padding="same", activation="sigmoid")(decoder)

decoderModel = keras.models.Model(decoder_input, decoder, name="decoder")
decoderModel.summary()

In [194]:
# Adapted from then ENSC413 sample VAE code https://keras.io/examples/generative/vae/

# class OutputVariationalLayer(keras.layers.Layer):
#     def vae_loss(self, actual_Image, predicted_Image):
#         crossEntropyLoss = keras.losses.binary_crossentropy(actual_Image, predicted_Image)
#         kullback_Leibler_Loss = -5e-4 * keras.ops.mean(1 + z_log_var - keras.ops.square(z_mean) - keras.ops.exp(z_log_var), axis=-1)
#         print("Cross Entropy Loss: {}".format(crossEntropyLoss))
#         print("Kullback_Leibler_Loss: {}".format(kullback_Leibler_Loss))
#         return keras.ops.mean(crossEntropyLoss + kullback_Leibler_Loss)

#     def call(self, inputs):
#         trueImage = inputs[0]
#         predictedImage = inputs[1]
#         loss = self.vae_loss(trueImage, predictedImage)
#         print("LOSS: {}".format(loss))
#         self.add_loss(loss)
#         # self.add_loss(loss, inputs=inputs)
#         # We don't use this output.
#         return trueImage

class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super().__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(name="reconstruction_loss")
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
        
    def metrics(self):
        return [self.total_loss_tracker, self.reconstruction_loss_tracker, self.kl_loss_tracker]
    
    def train_step(self, data):
        with tf.GradientTape as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = keras.reduce_mean(tf.reduce_sum(keras.losses.binary_crossentropy(data, reconstruction), axis=(1,2)))
            kl_loss = -0.5 * (1 + z_log_var - keras.ops.square(z_mean) - keras.ops.exp(z_log_var))
            kl_loss = keras.reduce_mean(keras.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss
            gradients = tape.gradient(total_loss, self.trainable_weights)
            self.optimizer.apply_gradients(zip(gradients, self.trainable_weights))
            self.total_loss_tracker.update_state(total_loss)
            self.reconstruction_loss_tracker.update_state(reconstruction_loss)
            self.kl_loss_tracker.update_state(kl_loss)
            return {"loss":self.total_loss_tracker.result(), "reconstruction_loss":self.reconstruction_loss_tracker.result(), "kl_loss":self.kl_loss_tracker.result()} 

In [224]:
# splitting the test set and training set (25% test set, 75% training set)
imagesData = dogCSV["RGB_data"]
imageLabels = dogCSV["label"]

X_train, X_test, y_train, y_test = train_test_split(imagesData, imageLabels, test_size=0.25, random_state=42)

trainingSet_images = []
trainingSet_labels = []

for image, label in zip(X_train, y_train):
    

In [208]:
# instatiating a VAE
vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam())
vae.fit(X_train, epochs=30, batch_size=256)

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).

NameError: name 'vaeOutput' is not defined

In [212]:
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
mnist_digits = np.concatenate([x_train, x_test], axis=0)
mnist_digits = np.expand_dims(mnist_digits, -1).astype("float32") / 255

len(mnist_digits)

70000

In [229]:
test = np.array(X_train)


ValueError: cannot reshape array of size 4089 into shape (4089,150,150,3)

In [230]:
y_train

456           young
5068          young
3576    middle-aged
3501    middle-aged
3872    middle-aged
           ...     
3808          young
5296          young
5338            old
5512            old
865     middle-aged
Name: label, Length: 4089, dtype: object

In [231]:
X_train

456     [[[0.62352943, 0.6039216, 0.5803922], [0.63529...
5068    [[[0.92941177, 0.92941177, 0.92941177], [0.929...
3576    [[[0.8392157, 0.77254903, 0.67058825], [0.8392...
3501    [[[0.3372549, 0.32156864, 0.2784314], [0.30196...
3872    [[[0.50980395, 0.54901963, 0.58431375], [0.490...
                              ...                        
3808    [[[0.27058825, 0.13725491, 0.13333334], [0.286...
5296    [[[0.54509807, 0.5882353, 0.3529412], [0.53725...
5338    [[[0.38039216, 0.27450982, 0.15686275], [0.439...
5512    [[[0.07450981, 0.15686275, 0.13725491], [0.074...
865     [[[0.7019608, 0.7411765, 0.7764706], [0.705882...
Name: RGB_data, Length: 4089, dtype: object