This notebook illustrates how you would apply style transfer and custom optimization using TensorFlow 2 compatible code. This example has been adapted from François Chollet.

In [None]:
%tensorflow_version 2.x

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image
from tensorflow.keras import backend as K
from tensorflow.keras.applications import vgg19
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from matplotlib import pyplot as plt
import numpy as np
from PIL import Image
from skimage import io

These first few lines are the same as the other style transfer notebook.

In [None]:
base_image_path  = 'http://www.petsexperience.com/wp-content/uploads/2018/05/What-Are-Hybrid-Cat-Breeds.jpg'
style_image_path = 'https://assets.saatchiart.com/saatchi/467/art/2717235/1787128-EQGJYLSZ-7.jpg'

In [None]:
def preprocess_image(path, resize=False):
  image_original  = sk_load_img(path)
  image_processed = image_original.copy()
  if resize is not False: image_processed = image_processed.resize(resize)
  image_processed = image.img_to_array(image_processed)
  image_processed = np.expand_dims(image_processed, axis=0)
  image_processed = vgg19.preprocess_input(image_processed)
  return image_original, image_processed

def deprocess_image(arr, size):
  x = np.copy(arr).reshape(size + (3, ))
  # VGG19 preprocess by normalizing each channel with the ImageNet means, so we just add these back up
  # Want to know where this comes from? See https://github.com/keras-team/keras-applications/blob/master/keras_applications/imagenet_utils.py#L135
  x[:, :, 0] += 103.939
  x[:, :, 1] += 116.779
  x[:, :, 2] += 123.68
  # We also need to convert back from (blue, green, red) to (red, green, blue)
  x = x[:, :, ::-1]
  # Finally, we clip all values between 0 and 255
  x = np.clip(x, 0, 255).astype('uint8')
  return x

In [None]:
img_ncols, img_nrows   = 800, 600

In [None]:
# Helper function to read an image from an URL
def sk_load_img(url):
  return Image.fromarray(io.imread(url))

In [None]:
base_img_original, base_img_processed   = preprocess_image(base_image_path, (img_ncols, img_nrows))
style_img_original, style_img_processed = preprocess_image(style_image_path, (img_ncols, img_nrows))

In [None]:
# Difference between base and combined image, using SSE
def content_loss(base, combination):
  return K.sum(K.square(combination - base))

# Difference between combined image and style image, using SSE on difference of their gram matrices, normalized
def style_loss(style, combination):
  assert K.ndim(style) == 3
  assert K.ndim(combination) == 3
  S = gram_matrix(style)
  C = gram_matrix(combination)
  channels = 3
  size = img_nrows * img_ncols
  return K.sum(K.square(S - C)) / (4. * (channels ** 2) * (size ** 2))

# Gram matrix: dot product between a matrix and its transposed one
def gram_matrix(x):
  assert K.ndim(x) == 3
  # Put the channels / depth in front and flatten to a 2d array
  features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
  gram = K.dot(features, K.transpose(features))
  return gram

# Distance between neighboring pixels, without taking the border into account
def total_variation_loss(x, p=1.25):
  assert K.ndim(x) == 4
  a = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :])
  b = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :])
  return K.sum(K.pow(a + b, p))

These lines are comparable as well: we load in a pretrained VGG19 model, and construct a quick helper function to get intermediate outputs.

In [None]:
model = vgg19.VGG19(weights='imagenet', include_top=False)

outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])

feature_extractor = Model(inputs=model.inputs, outputs=outputs_dict)

Now we see a couple of differences. We construct function here which returns our custom loss value given the three image sets.

In [None]:
style_layer_names = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']
content_layer_name = 'block5_conv2'

total_variation_weight = 1e-6
style_weight = 2e-6
content_weight = 2e-8

def compute_loss(combination_image, base_image, style_reference_image):
  input_tensor = tf.concat([base_image,
                            style_reference_image,
                            combination_image], axis=0)
  features = feature_extractor(input_tensor)

  # Initialize the loss
  loss = tf.zeros(shape=())

  # Add content loss
  layer_features = features[content_layer_name]
  base_image_features = layer_features[0, :, :, :]
  combination_features = layer_features[2, :, :, :]
  loss = loss + content_weight * content_loss(base_image_features, combination_features)
  # Add style loss
  for layer_name in style_layer_names:
    layer_features = features[layer_name]
    style_reference_features = layer_features[1, :, :, :]
    combination_features = layer_features[2, :, :, :]
    sl = style_loss(style_reference_features, combination_features)
    loss += (style_weight / len(style_layer_names)) * sl
  
  # Add total variation loss
  loss += total_variation_weight * total_variation_loss(combination_image)
  return loss

Now for the main part. We use the `tf.function` decorator here so TensorFlow can compile this function to speed things up. This function returns the loss as well as the gradients. The latter is done using the `GradientTape` mechanism, which we have briefly visited before.

In [None]:
@tf.function
def compute_loss_and_grads(combination_image, base_image, style_reference_image):
  with tf.GradientTape() as tape:
    loss = compute_loss(combination_image, base_image, style_reference_image)
  grads = tape.gradient(loss, combination_image)
  return loss, grads

To perform our optimization, we won't use SciPy here but directly use one of the Keras optimizers (standard SGD in this case). Note that given our hyperparameters here, we need to run for a couple more iterations here. Also note the use of `optimizer.apply_gradients`, which is the actual call changing the combination image based on the gradients we have calculated.

In [None]:
optimizer = SGD(
  ExponentialDecay(initial_learning_rate=100., decay_steps=1000, decay_rate=0.96)
)

combination_image = tf.Variable(base_img_processed.copy())

iterations = 5000

for i in range(iterations):
  loss, grads = compute_loss_and_grads(combination_image, base_img_processed, style_img_processed)
  optimizer.apply_gradients([(grads, combination_image)])
  if i % 100 == 0:
    print('Iteration', i, ' loss = ', loss)
    combined_image_deprocessed = deprocess_image(combination_image.numpy(), (img_nrows, img_ncols))
    plt.figure(figsize=(4, 5))
    plt.imshow(combined_image_deprocessed)
    plt.show()

Output hidden; open in https://colab.research.google.com to view.