# Project \#3 - Face generation

### Deep Learning course -  A.Y. 2019-2020

Students:
- Simone Gayed Said
- Pierpasquale Colagrande


## Import of fundamental libraries
Herw we import fundamental libraries as TensorFlow, Numpy etc.

In [5]:
%tensorflow_version 2.x
import tensorflow as tf
print("Tensorflow version " + tf.__version__)

Tensorflow version 2.2.0


In [0]:
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, MaxPooling2D, UpSampling2D, Input, Flatten, Dense, Reshape, Concatenate, Lambda, Concatenate, Layer, Add
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras import metrics
import numpy as np
import matplotlib.pyplot as plt
import os


## Input pipeline

Our input data is stored on Google Cloud Storage. To more fully use the parallelism TPUs offer us, and to avoid bottlenecking on data transfer, we've stored our input data in TFRecord files, 2025 images per file.

Below, we make heavy use of `tf.data.experimental.AUTOTUNE` to optimize different parts of input loading.


In [0]:
BATCH_SIZE = 128
BUFFER_SIZE = 60000
ORIGINAL_IMAGE_SIZE = (218, 178, 3)
TARGET_IMAGE_SIZE = (64, 64, 3)
INTERMEDIATE_SIZE = 3072
ATTRIBUTES_SIZE = 40
LATENT_DIM = 64

In [0]:
AUTO = tf.data.experimental.AUTOTUNE

gcs_pattern = 'gs://celeba-test/tfrecord_*.tfrec'

filenames = tf.io.gfile.glob(gcs_pattern)

def parse_attribute_list(example):
  features = {
      "names": tf.io.FixedLenFeature([], tf.string),
  }

  example = tf.io.parse_single_example(example, features)
  attributes_names = example['names']
  return attributes_names

def get_names():
  record = tf.data.TFRecordDataset('gs://celeba-test/attribute_list.tfrec')
  attributes = record.map(parse_attribute_list)
  att_names = next(attributes.as_numpy_iterator()).decode("utf-8")
  att_names_list = [elem.strip()[1:-1] for elem in att_names.split(',')]
  return att_names_list

att_names_list = get_names()

feature_dict = {
      "image": tf.io.FixedLenFeature([], tf.string),
      "labels": tf.io.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
  }

def parse_tfrecord(example):
  features = feature_dict
  example = tf.io.parse_single_example(example, features)
  decoded = tf.image.decode_image(example['image'])  
  normalized = tf.cast(decoded, tf.float32) / 255.0 # convert each 0-255 value to floats in [0, 1] range
  image_tensor = tf.reshape(normalized, [ORIGINAL_IMAGE_SIZE[0], ORIGINAL_IMAGE_SIZE[1], ORIGINAL_IMAGE_SIZE[2]])
  image_tensor = tf.image.resize(image_tensor[45:173,25:153], (TARGET_IMAGE_SIZE[0], TARGET_IMAGE_SIZE[1])) # crop and reshape the image 
  labels = example['labels']
  labels = tf.cast(labels,tf.float32)
  return  {"encoder_input":image_tensor,"labels": labels}


def load_dataset(filenames):
  # Read from TFRecords. For optimal performance, we interleave reads from multiple files.
  records = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO)
  return records.map(parse_tfrecord, num_parallel_calls=AUTO)


all_dataset = load_dataset(filenames)

test_dataset = all_dataset.take(50000) 
temp_dataset = all_dataset.skip(50000)
validation_dataset = temp_dataset.take(50000)
training_dataset = temp_dataset.skip(50000)

In [0]:
def prepare_for_training(ds, cache=True, shuffle_buffer_size=1000):

  if cache:
    if isinstance(cache, str):
      ds = ds.cache(cache)
    else:
      ds = ds.cache()

  ds = ds.shuffle(buffer_size=shuffle_buffer_size)

  ds = ds.shuffle(buffer_size=shuffle_buffer_size)

  # Repeat forever
  ds = ds.repeat()

  ds = ds.batch(BATCH_SIZE)

  # `prefetch` lets the dataset fetch batches in the background while the model
  # is training.
  ds = ds.prefetch(buffer_size=AUTO)

  return ds

training_dataset = prepare_for_training(training_dataset)
test_dataset = prepare_for_training(test_dataset)

Let's take a peek at the dataset we've created:

In [0]:
def show_batch(image_batch):
  plt.figure(figsize=(10,10))
  for n in range(25):
    ax = plt.subplot(5,5,n+1)
    plt.imshow(image_batch[n])
    plt.axis('off')

In [0]:
image_batch = next(iter(training_dataset))
show_batch(image_batch["encoder_input"].numpy())

## Network model
Here, we build the network model.

In [0]:
class Sampling(Layer):
  def call(self, inputs):
    z_mean, z_log_var = inputs
    batch = tf.shape(z_mean)[0]
    dim = tf.shape(z_mean)[1]
    epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [0]:
def create_vae():
  # Define encoder model.
  input_img = Input(shape =(64, 64, 3), name='encoder_input')
  y = Input(shape =(40,), name='labels')

  x = Conv2D(filters = 32, kernel_size = 3, strides = 2, padding = 'same', activation = 'relu', name = 'encoder_conv_0')(input_img)
  x = Conv2D(filters = 64, kernel_size = 3, strides = 2, padding = 'same', activation = 'relu', name = 'encoder_conv_1')(x)
  x = Conv2D(filters = 128, kernel_size = 3, strides = 2, padding = 'same', activation = 'relu', name = 'encoder_conv_2')(x)
  x = Conv2D(filters = 256, kernel_size = 3, strides = 2, padding = 'same', activation = 'relu', name = 'encoder_conv_3')(x)

  shape_before_flattening = K.int_shape(x)[1:]

  x = Flatten()(x)

  z_mean = Dense(LATENT_DIM, name='z_mean')(x)
  z_log_var = Dense(LATENT_DIM, name='z_log_var')(x)


  # Define decoder
  z = Sampling()((z_mean, z_log_var))

  zy = Concatenate()([z,y])

  # To get an exact mirror image of the encoder
  dec1 = Dense(np.prod(shape_before_flattening))(zy)
  dec1 = Reshape(shape_before_flattening)(dec1)

  dec1 = Conv2DTranspose(filters = 128, kernel_size = 3, strides = 2,  padding = 'same', activation = 'relu', name = 'decoder_conv_0')(dec1)
  dec1 = Conv2DTranspose(filters = 64, kernel_size = 3, strides = 2,  padding = 'same', activation = 'relu', name = 'decoder_conv_1')(dec1)
  dec1 = Conv2DTranspose(filters = 32, kernel_size = 3, strides = 2,  padding = 'same', activation = 'relu', name = 'decoder_conv_2')(dec1)
  x_hat = Conv2DTranspose(filters = 3, kernel_size = 3, strides = 2,  padding = 'same', activation = 'sigmoid', name = 'decoder_conv_3')(dec1)


  # Add KL divergence regularization loss.
  rec_loss =  12288 * tf.keras.losses.binary_crossentropy(Flatten()(input_img), Flatten()(x_hat))
  kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
  vae_loss = K.mean(rec_loss + kl_loss)

  vae = Model(inputs=[input_img,y], outputs=[x_hat], name = "vae")
  vae.add_loss(vae_loss)
  return vae, shape_before_flattening

vae, shape_before_flattening = create_vae()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
vae.compile(optimizer)

vae.summary()

In [0]:
vae.fit(training_dataset, steps_per_epoch=202599//BATCH_SIZE, verbose = 1, epochs = 20)

In [0]:
predictions = vae.predict(test_dataset, steps= 10)
show_batch(predictions)

In [0]:
# build a digit generator that can sample from the learned distribution
noise = Input(shape=(LATENT_DIM,))
label = Input(shape=(40,))
xy = Concatenate()([noise,label])
dec_out = Dense(np.prod(shape_before_flattening))(xy)
dec_out = Reshape(shape_before_flattening)(dec_out)
dec_out = Conv2DTranspose(filters = 128, kernel_size = 3, strides = 2,  padding = 'same', activation = 'relu', name = 'decoder_conv_0')(dec_out)
dec_out = Conv2DTranspose(filters = 64, kernel_size = 3, strides = 2,  padding = 'same', activation = 'relu', name = 'decoder_conv_1')(dec_out)
dec_out = Conv2DTranspose(filters = 32, kernel_size = 3, strides = 2,  padding = 'same', activation = 'relu', name = 'decoder_conv_2')(dec_out)
dec_out = Conv2DTranspose(filters = 3, kernel_size = 3, strides = 2,  padding = 'same', activation = 'sigmoid', name = 'decoder_conv_3')(dec_out)
generator = Model(inputs=[noise,label], outputs=[dec_out])



In [0]:
z_sample = np.expand_dims(np.random.normal(size=LATENT_DIM),axis=0)
label = np.array([-1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1])
label = np.expand_dims(label, axis=0)
label = tf.convert_to_tensor(label, dtype=tf.float32)
x_decoded = generator.predict([z_sample,label])
plt.figure(figsize=(10, 10))
plt.imshow(x_decoded[0])
plt.show()