# Problem Introduction
    The goal of this project was to get familiarized with building neural networks that have varying architectures using available python libraries and to experiment with the networks’ success on a dataset. The dataset that will be used is a modified version of the FairFace dataset (https://github.com/joojs/fairface) which contains 86,744 training images, 10,954 validation images, and the classifications for race, age, and gender. These images have been reduced to grayscale and resized to 32 x 32 pixels. The image inputs are normalized with Min-Max scaling and use Categorical Cross-Entropy as the loss function.  

### Imports

In [8]:
#Tensor imports
import tensorflow as tf
from tensorflow import optimizers
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.callbacks import TensorBoard

#Pillow Imports
from PIL import Image

#Import Pandas
import pandas as pd

#Import Numpy
import numpy as np

#Sci_Kit Imports
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix

#Import datetime
import datetime

### Load TensorBoard

In [9]:
%load_ext tensorboard
%reload_ext tensorboard
#log_folder = "logs"

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


# Task 1 Introduction
    Task 1 uses a fully connected feed forward neural network with three hidden layers: 1024 neurons with hyperbolic tangent activation functions, 512 neurons with sigmoid activation functions, and 100 neurons with rectified linear activation functions. This network architecture will be used to attempt to classify the images into the age and race categories.

In [10]:
def createFeedFoward(inputShape, outputSize, lr):
    model = Sequential()
    model.add(layers.Dense(1024, input_shape=inputShape, activation='tanh'))
    model.add(layers.Dense(512, activation='sigmoid'))
    model.add(layers.Dense(100, activation='relu'))
    model.add(layers.Dense(outputSize, activation='softmax'))
    opt = optimizers.SGD(learning_rate=lr)
    model.compile(loss='CategoricalCrossentropy', optimizer=opt, metrics=['accuracy'])
    return model

In [11]:
def createXY(imgPath, labelFile, num):
    #Create and normalize X
    X = []
    for i in range(num):
        fileName = imgPath + str(i+1) + '.jpg'
        img = Image.open(fileName)
        X.append(list(img.getdata()))
    scaler = MinMaxScaler()
    scaler.fit(X)
    X = scaler.transform(X)
    
    #Get labels
    label_df = pd.read_csv(labelFile)
    
    #Find unique labels and output size
    age_labels = label_df['age'].unique()
    race_labels = label_df['race'].unique()
    
    #Create Binary y arrays
    lb_age = LabelBinarizer(sparse_output=False)
    lb_race = LabelBinarizer(sparse_output=False)
    lb_age.fit(age_labels)
    lb_race.fit(race_labels)
    y_age = list(label_df['age'].head(num))
    y_race = list(label_df['race'].head(num))
    y_age = lb_age.transform(y_age)
    y_race = lb_race.transform(y_race)
    
    
    return X, y_age, y_race, age_labels, race_labels 

In [12]:
def printResults(predictions, labels, trueLabels):
    print(len(predictions))
    print(len(predictions[1]))
    print(len(labels))
    print(len(trueLabels))
    for i in range(len(predictions)):
        print("Label: " + trueLabels[i])
        for j in range(len(labels)):
            print("{:12}: {:10.2f}%".format(labels[j], (predictions[i][j] * 100)))

In [13]:
def getMax(values):
    maxes = [np.argmax(val) for val in values ]
    return maxes

In [14]:
def createCallbacks(log_folder):
    callbacks = [TensorBoard(log_dir=log_folder,
                         histogram_freq=1,
                         write_graph=True,
                         write_images=True,
                         update_freq='epoch',
                         profile_batch=2,
                         embeddings_freq=1)]
    return callbacks

In [15]:
X_train, y_age_train, y_race_train, age_labels, race_labels = createXY('project3_COSC525/train/', 'project3_COSC525/fairface_label_train.csv', 86744)
X_test, y_age_test, y_race_test, _ , _ = createXY('project3_COSC525/val/', 'project3_COSC525/fairface_label_val.csv', 10954)

In [16]:
def taskOne(X_train, y_train, X_test, y_test, lr, numEpochs, batchSize, log_folder):
    model = createFeedFoward((1024,), len(y_train[0]), lr)
    callbacks = createCallbacks(log_folder)
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=numEpochs, batch_size=batchSize, callbacks=callbacks)
    y_true = getMax(y_test)
    y_pred = getMax(model.predict(X_test))
    eval = tf.keras.metrics.Accuracy()
    eval.update_state(y_true, y_pred)
    print('Accuracy: ', eval.result().numpy())
    c_matrix = confusion_matrix(y_true, y_pred)
    print(c_matrix)

In [18]:
log_folder = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
taskOne(X_train, y_age_train, X_test, y_age_test, 0.005, 150, 100, log_folder)
%tensorboard --logdir logs --port=6020

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

# Task 1 Results (Age)
    The fully connected network used for classifying the age was trained over 200 epochs with a learning rate of 0.005 and a batch size of 100. The overall accuracy of the classifier was 37.92%, while not great, is to be expected from a shallow network with relatively small images. The epoch-loss graph shows that the training loss was still falling but the validation loss was starting to taper out. This indicates that further training would not improve the overall performance of the network. The confusion matrix for this network shows that it never predicted any person as being over the age of 70. This could indicate an imbalance in the dataset used.
### Epoch-Loss Graph:
![Epoch-Loss Graph](graphs/task1_age_epoch_loss.PNG)
### Epoch-Accuracy Graph:
![Epoch-Accuracy Graph](graphs/task1_age_epoch_accuracy.PNG)
    

In [20]:
log_folder = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
taskOne(X_train, y_race_train, X_test, y_race_test, 0.001, 200, 100, log_folder)
%tensorboard --logdir logs --port=6021

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

# Task 1 Results (Race)
    The fully connected network used for classifying the race was trained over 200 epochs with a learning rate of 0.001 and a batch size of 100. The learning rate decrease with the increase in the number of epochs was an attempt to get a higher accuracy out of the network. However, the network’s overall accuracy was 38.24%. Increasing the number of epochs even further could possibly increase the accuracy since the accuracy and loss graphs had not started to taper off, but it is doubtful that the accuracy would improve significantly with this network.
### Epoch-Loss Graph:
![Epoch-Loss Graph](graphs/task1_race_epoch_loss.PNG)
### Epoch-Accuracy Graph:
![Epoch-Accuracy Graph](graphs/task1_race_epoch_accuracy.PNG)

# Task 2 Introduction
    Task 2 uses a small convolutional network comprised of three hidden layers: a convolution layer using rectified linear activation functions with 40 feature detectors and a 5x5 kernel size, a max pooling layer with a 2x2 pooling size, and a fully connected layer with 100 neurons using rectified linear activation functions. This network architecture will be used to attempt to classify the images into the age and race categories.

In [21]:
# Setup CNN network model
def createSmallCNN(inputShape, outputSize, lr):
    model = Sequential()
    model.add(layers.Conv2D(40, (5, 5), activation='relu', input_shape=inputShape))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(100, activation='relu'))
    model.add(layers.Dense(outputSize, activation='softmax'))
    opt = optimizers.SGD(learning_rate=lr)
    model.compile(loss='CategoricalCrossentropy', optimizer=opt, metrics=['accuracy'])
    return model

In [22]:
# Get normalized data
#X_train, y_age_train, y_race_train, age_labels, race_labels = createXY('project3_COSC525/train/', 'project3_COSC525/fairface_label_train.csv', 10000)
#X_test, y_age_test, y_race_test, _ , _ = createXY('project3_COSC525/val/', 'project3_COSC525/fairface_label_val.csv', 1000)

In [23]:
# Perform task two; train and evaluate CNN for given train/test data
def taskTwo(X_train, y_train, X_test, y_test, lr, numEpochs, batchSize, log_folder):
    model = createSmallCNN((32, 32, 1), len(y_train[0]), lr)
    callbacks = createCallbacks(log_folder)
    model.fit(np.reshape(X_train, (X_train.shape[0], 32, 32)), y_train, validation_data=(np.reshape(X_test, (X_test.shape[0], 32, 32)), y_test), epochs=numEpochs, batch_size=batchSize, callbacks=callbacks)
    y_true = getMax(y_test)
    y_pred = getMax(model.predict(np.reshape(X_test, (X_test.shape[0], 32, 32))))
    eval = tf.keras.metrics.Accuracy()
    eval.update_state(y_true, y_pred)
    print('Accuracy: ', eval.result().numpy())
    c_matrix = confusion_matrix(y_true, y_pred)
    print(c_matrix)

In [None]:
log_folder = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
taskTwo(X_train, y_age_train, X_test, y_age_test, 0.003, 200, 100, log_folder)
%tensorboard --logdir logs --port=6022

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [None]:
log_folder = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
taskTwo(X_train, y_race_train, X_test, y_race_test, 0.003, 120, 100, log_folder)
%tensorboard --logdir logs --port=6023

## Task 3: Your own Convolutional Neural Network
1. Build another convolutional neural network, where you choose all the parameters to see if you can get a higher accuracy.
2. Using Min-Max scaling to scale the training dataset and using the same Min and Max values from the training set scale the test dataset ( X−Xmin/Xmax−Xmin ).
3. Using mini-batch gradient descent to optimize the loss function: “categorical cross-entropy” on the training dataset. Please record the loss value for each of the epochs and create an epoch-loss plot and an accuracy-loss plot for both the training and validation set.
4. Report the following:
    - Final classification accuracy.
    - The n-class confusion matrix

In [None]:
# Setup CNN network model
def createOwnCNN(inputShape, outputSize, lr):
    model = Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=inputShape))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(100, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(outputSize, activation='softmax'))
    opt = optimizers.SGD(learning_rate=lr)
    model.compile(loss='CategoricalCrossentropy', optimizer=opt, metrics=['accuracy'])
    return model

In [None]:
# Get normalized data
#X_train, y_age_train, y_race_train, age_labels, race_labels = createXY('project3_COSC525/train/', 'project3_COSC525/fairface_label_train.csv', 10000)
#X_test, y_age_test, y_race_test, _ , _ = createXY('project3_COSC525/val/', 'project3_COSC525/fairface_label_val.csv', 1000)

In [None]:
# Perform task three; train and evaluate CNN for given train/test data
def taskThree(X_train, y_train, X_test, y_test, lr, numEpochs, batchSize, log_folder):
    model = createOwnCNN((32, 32, 1), len(y_train[0]), lr)
    callbacks = createCallbacks(log_folder)
    model.fit(np.reshape(X_train, (X_train.shape[0], 32, 32)), y_train, validation_data=(np.reshape(X_test, (X_test.shape[0], 32, 32)), y_test), epochs=numEpochs, batch_size=batchSize, callbacks=callbacks)
    y_true = getMax(y_test)
    y_pred = getMax(model.predict(np.reshape(X_test, (X_test.shape[0], 32, 32))))
    eval = tf.keras.metrics.Accuracy()
    eval.update_state(y_true, y_pred)
    print('Accuracy: ', eval.result().numpy())
    c_matrix = confusion_matrix(y_true, y_pred)
    print(c_matrix)


In [None]:
log_folder = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
taskThree(X_train, y_age_train, X_test, y_age_test, 0.001, 120, 100, log_folder)
%tensorboard --logdir logs --port=6024

In [None]:
log_folder = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
taskThree(X_train, y_race_train, X_test, y_race_test, 0.001, 120, 100, log_folder
%tensorboard --logdir logs --port=6025

## Task 4: Your own Convolutional Neural Network on both Tasks Simultaneously
1. Build another convolutional neural network, where you try and classify both tasks with a single network. After your flatten layer have two more fully connected layers for each “branch”. Note that in order to do so you will not be able to use the Sequential model.
2. Using Min-Max scaling to scale the training dataset and using the same Min and Max values from the training set scale the test dataset ( X−Xmin/Xmax−Xmin ).
3. Using mini-batch gradient descent to optimize the loss function: “categorical cross-entropy” on the training dataset. Please record the loss value for each of the epochs and create an epoch-loss plot and an accuracy-loss plot for both the training and validation set.
4. Report the following:
    - Final classification accuracy.
    - The n-class confusion matrix

In [None]:
# Setup CNN network model
def createOwnCNNTwoTasks(inputShape, outputSizes, lr):
    input = layers.Input(shape = inputShape)
    conv1 = layers.Conv2D(32, (3, 3), activation='relu', name='conv1')(input)
    max1 = layers.MaxPooling2D((2, 2), name='max1')(conv1)
    conv2 = layers.Conv2D(64, (3, 3), activation='relu', name='conv2')(max1)
    max2 = layers.MaxPooling2D((2, 2), name='max2')(conv2)
    flatten = layers.Flatten()(max2)

    # Branch 1
    fc11 = layers.Dense(100, activation='relu', name='fc11')(flatten)
    fc12 = layers.Dense(outputSizes[0], activation='softmax', name='fc12')(fc11)

    # Branch 2
    fc21 = layers.Dense(100, activation='relu', name='fc21')(flatten)
    fc22 = layers.Dense(outputSizes[1], activation='softmax', name='fc22')(fc21)

    # Concatenate output of branches
    output = layers.concatenate([fc12, fc22])
    model = keras.Model(inputs=input, outputs=output)

    opt = optimizers.SGD(learning_rate=lr)
    model.compile(loss='CategoricalCrossentropy', optimizer=opt, metrics=['accuracy'])
    return model

In [None]:
# Get normalized data
#X_train, y_age_train, y_race_train, age_labels, race_labels = createXY('project3_COSC525/train/', 'project3_COSC525/fairface_label_train.csv', 10000)
#X_test, y_age_test, y_race_test, _ , _ = createXY('project3_COSC525/val/', 'project3_COSC525/fairface_label_val.csv', 1000)

In [None]:
# Perform task four; train and evaluate CNN for given train/test data
def taskFour(X_train, y_trains, X_test, y_tests, lr, numEpochs, batchSize, log_folder):
    callbacks = createCallbacks(log_folder)
    model = createOwnCNNTwoTasks((32, 32, 1), (len(y_trains[0][0]), len(y_trains[1][0])), lr)
    model.fit(np.reshape(X_train, (X_train.shape[0], 32, 32)), np.concatenate((y_trains[0], y_trains[1]), axis=1), validation_data=(np.reshape(X_test, (X_test.shape[0], 32, 32)), np.concatenate((y_tests[0], y_tests[1]), axis=1)), epochs=numEpochs, batch_size=batchSize, callbacks=callbacks)
    y_true = getMax(np.concatenate((y_tests[0], y_tests[1]), axis=1))
    y_pred = getMax(model.predict(np.reshape(X_test, (X_test.shape[0], 32, 32))))
    eval = tf.keras.metrics.Accuracy()
    eval.update_state(y_true, y_pred)
    print('Accuracy: ', eval.result().numpy())
    c_matrix = confusion_matrix(y_true, y_pred)
    print(c_matrix)

In [None]:
log_folder = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
taskFour(X_train, (y_age_train, y_race_train), X_test, (y_age_test, y_race_test), 0.001, 120, 100)
%tensorboard --logdir logs --port=6026

## Task 5: Variational Auto Encoder (COSC 525 only)
1. Build a variational autoencoder with the following specifications (in this one you have a little more flexibility):
    - Should have at least two convolution layers in the encoder and 2 deconvolution layers in the decoder.
    - Latent dimension should be at least 5.
    - Loss should be either MSE or binary cross entropy.
2. Using Min-Max scaling to scale the training dataset and using the same Min and Max values from the training set scale the test dataset ( X−Xmin/Xmax−Xmin ).
3. Using mini-batch gradient descent to optimize the loss function on the training dataset. Please record the loss value for each of the epochs and create an epoch-loss plot and an accuracy-loss plot for both the training and validation set.
4. Qualitatively evaluate your model by generating a set of faces by randomly choosing 10 latent vectors and presenting the resulting images

VAE code adapted from https://keras.io/examples/generative/vae/

In [None]:
# Sampling layer
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [None]:
# Encoder model
latent_dim = 15

encoder_inputs = keras.Input(shape=(32, 32, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
encoder.summary()

In [None]:
# Decoder model
latent_inputs = keras.Input(shape=(latent_dim,))
x = layers.Dense(2 * 2 * 64, activation="relu")(latent_inputs)
x = layers.Reshape((2, 2, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
decoder.summary()

In [None]:
# Variational AutoEncoder model class
class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super().__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    keras.losses.mean_squared_error(data, reconstruction), axis=(1, 2)
                )
            )
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

In [None]:
# Create and train VAE on all face images
X_train, _, _, _, _ = createXY('project3_COSC525/train/', 'project3_COSC525/fairface_label_train.csv', 'age', 10000)
X_test, _, _ , _, _ = createXY('project3_COSC525/val/', 'project3_COSC525/fairface_label_val.csv', 'age', 1000)
all_faces = np.concatenate([X_train, X_test], axis=0)

vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.SGD(learning_rate=0.0005))
vae.fit(np.reshape(all_faces, (11000, 32, 32, 1)), epochs=10000, batch_size=128, verbose=0)

In [None]:
# Test generating some (non-random) faces
preds = vae.encoder.predict(np.reshape(all_faces, (10, 32, 32, 1)))[0]
# gen = vae.decoder.predict(np.reshape(np.mean(preds, axis=0), (1, 5)))[0]
gen = vae.decoder.predict(np.reshape(preds[4], (1, latent_dim)))[0]
gen = np.reshape(gen, (32, 32))
im = Image.fromarray(np.uint8(gen * 255), 'L')
im.save("test.png")
comp_im = Image.open("project3_COSC525/train/5.jpg")
comp_im.save("test_comp.png")