## Check the GPU

In [None]:
%tensorflow_version 2.x
import tensorflow as tf
import timeit

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')

def cpu():
  with tf.device('/cpu:0'):
    random_image_cpu = tf.random.normal((100, 100, 100, 3))
    net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu)
    return tf.math.reduce_sum(net_cpu)

def gpu():
  with tf.device('/device:GPU:0'):
    random_image_gpu = tf.random.normal((100, 100, 100, 3))
    net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
    return tf.math.reduce_sum(net_gpu)
  
cpu()
gpu()

# Run the op several times.
print('Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
      '(batch x height x width x channel). Sum of ten runs.')
print('CPU (s):')
cpu_time = timeit.timeit('cpu()', number=10, setup="from __main__ import cpu")
print(cpu_time)
print('GPU (s):')
gpu_time = timeit.timeit('gpu()', number=10, setup="from __main__ import gpu")
print(gpu_time)
print('GPU speedup over CPU: {}x'.format(int(cpu_time/gpu_time)))

# Load the dataset

In [None]:
from google.colab import files
# Upload your kaggle.json file with your username and your Kaggle API token.
files.upload() 

In [None]:
# Let's make sure the kaggle.json file is present. 
!ls -lha kaggle.json
# Next, install the Kaggle API client. 
!pip install -q kaggle
# The Kaggle API client expects this file to be in ~/.kaggle, 
# so move it there. 
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
# This permissions change avoids a warning on Kaggle tool startup. 
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
%cd /content/drive/My\ Drive/ConditionalVAE_DL_Project3

In [None]:
!python3 dataloader.py

# Build the dataset

In [None]:
from celeba import CelebADataset

# Training configuration
learning_rate = 0.001
train_size = 0.01
batch_size = 32
save_test_set = True # S# True: the test set image IDs and other useful information will be stored in a pickle file to further uses (e.g. Image_Generation.ipynb) 


dataset = CelebADataset(train_size = train_size, batch_size = batch_size, save_test_set = save_test_set)

# Define the model

In [3]:
# Hyper-parameters
label_dim = 40
image_dim = [64, 64, 3]
latent_dim = 128
beta = 0.65


In [4]:
import tensorflow as tf
from ConvolutionalCondVAE import ConvCVAE, Decoder, Encoder

# Model
encoder = Encoder(latent_dim)
decoder = Decoder()
model = ConvCVAE(
                encoder,
                decoder,
                label_dim = label_dim,
                latent_dim = latent_dim,
                beta = beta,
                image_dim = image_dim)

# Optiizer
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

# Checkpoint

In [5]:
import os

# Checkpoint path
checkpoint_root = "./CVAE{}_{}_checkpoint".format(latent_dim, beta)
checkpoint_name = "model"
save_prefix = os.path.join(checkpoint_root, checkpoint_name)

# Define the checkpoint
checkpoint = tf.train.Checkpoint(module=model)


In [None]:
# Restore the latest checkpoint
latest = tf.train.latest_checkpoint(checkpoint_root)

if latest is not None:
    checkpoint.restore(latest)
    print("Checkpoint restored:", latest)
else:
  print("No checkpoint!")

# Training

In [None]:
import numpy as np
import time
from utils import train_step

train_losses = []
train_recon_errors = []
train_latent_losses = []
loss = []
reconstruct_loss = []
latent_loss = []

step_index = 0
n_batches = int(dataset.train_size / batch_size)
n_epochs = 30

print("Number of epochs: {},  number of batches: {}".format(n_epochs, n_batches))

# Epochs Loop
for epoch in range(5):
    start_time = time.perf_counter()
    dataset.shuffle() # Shuffling

    # Train Step Loop
    for step_index, inputs in enumerate(dataset):
      total_loss, recon_loss, lat_loss = train_step(inputs, model, optimizer)
      train_losses.append(total_loss)
      train_recon_errors.append(recon_loss)
      train_latent_losses.append(lat_loss)

      if step_index + 1 == n_batches:
          break

    loss.append(np.mean(train_losses, 0))
    reconstruct_loss.append(np.mean(train_recon_errors, 0))
    latent_loss.append(np.mean(train_latent_losses, 0))

    exec_time = time.perf_counter() - start_time
    print("Execution time: %0.3f \t Epoch %i: loss %0.4f | reconstr loss %0.4f | latent loss %0.4f"
                        % (exec_time, epoch, loss[epoch], reconstruct_loss[epoch], latent_loss[epoch])) 


    # Save progress every 5 epochs
    if (epoch + 1) % 5 == 0:
      checkpoint.save(save_prefix + "_" + str(epoch + 1))
      print("Model saved:", save_prefix)
            
# Save the final model                
checkpoint.save(save_prefix)
print("Model saved:", save_prefix)


# Loss Visualization

In [None]:
import matplotlib.pyplot as plt

plt.plot(reconstruct_loss, 'g', marker ='o')
plt.grid()
plt.show();
plt.plot(latent_loss, 'b', marker = 'o')
plt.grid()
plt.show();
plt.plot(loss, 'r', marker ='o')
plt.grid()
plt.show();