# Neural style transfer

### Mount google drive with data

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


### Load pretrained model and construct its custom version

In [0]:
import tensorflow as tf
tf.enable_eager_execution()

Prepare style and content layers' names

In [0]:
style_layers = [
    'block1_conv1',
    'block2_conv1',
    'block3_conv1',
    'block4_conv1',
    'block5_conv1'
]

content_layers = ['block4_conv2']

n_content = len(content_layers)
n_style = len(style_layers)

In [0]:
def get_model():
  """
  Load VGG19 pre-trained on ImageNet (without fc layers) as
  a feature extractor.
  Return: custom model which returns extracted content and style features
  """
  
  # Base network
  vgg = tf.keras.applications.vgg19.VGG19(include_top=False,
                                          weights='imagenet',
                                          input_shape=[512, 512, 3])
  vgg.trainable = False
  
  # Extract content and style features
  content_features = [vgg.get_layer(l).output for l in content_layers]
  style_features = [vgg.get_layer(l).output for l in style_layers]
  model_output = content_features + style_features
  
  return tf.keras.models.Model(vgg.input, model_output)

### Image preprocessing

In [0]:
def get_and_preprocess_image(image_path):
  # Read image and transform it to tensor
  image = tf.read_file(image_path)
  image = tf.image.decode_image(image)
  image = tf.image.resize_images(image, [512, 512])
  
  image = tf.cast(image, 'float32')
  
  # Add batch dimension
  image = tf.expand_dims(image, 0)
  
  return image

The result image (with style transfered from another image) will have the same shape as input image. So, style image will be resized to the size of input image.

### Feature extraction

In [0]:
import numpy as np


def get_features(model, input_image_path, style_image_path):
  """  
  Return: content and style features as 2 tensors
  """
  
  content_image = get_and_preprocess_image(input_image_path)
  style_image = get_and_preprocess_image(style_image_path)
  
  # Concatenate images to pass them through the model at once
  images = np.concatenate([content_image, style_image], axis=0)
  model_outputs = model(images)
  
  content_features = [layer[0] for layer in model_outputs[:n_content]]
  style_features = [layer[1] for layer in model_outputs[n_content:]]
  
  return content_features, style_features

### Loss

In [0]:
# Content loss
def get_content_loss(acquired, target):
  return tf.reduce_mean(tf.square(acquired - target))

In [0]:
# Gram matrix
def get_gram_matrix(A):
  A = A - 1
  
  channels = int(A.shape[-1])
  a = tf.reshape(A, [-1, channels])
  n = tf.shape(a)[0]
  gram = tf.matmul(a, a, transpose_a=True)
  return gram / tf.cast(n, tf.float32)

# Style loss
def get_style_loss(acquired, target):
  A = get_gram_matrix(acquired)
  G = get_gram_matrix(target)
  N = tf.keras.backend.int_shape(acquired)[0]
  M = tf.keras.backend.int_shape(acquired)[1]
  
  return tf.reduce_mean(tf.square(A - G))

In [0]:
def compute_loss(model, loss_weights, init_image, target_style, target_content):
  """
  Compute total loss between:
      content/style of init_image and content/style of target image
      (target_style, target_content)
  """
  
  # Unpack loss weights
  style_weight, content_weight = loss_weights
  
  # Initialize losses
  style_loss = 0
  content_loss = 0
  
  # Pass init_image through model
  model_outputs = model(init_image)
  init_content = model_outputs[:n_content]
  init_style = model_outputs[n_content:]
  
  # Compute style loss
  style_layer_weight = 1 / n_style
  for style, t_style in zip(init_style, target_style):
    style_loss += style_layer_weight*get_style_loss(style, t_style)
    
  # Compute content loss
  content_layer_weight = 1 / n_content
  for content, t_content in zip(init_content, target_content):
    content_loss += content_layer_weight*get_content_loss(content, t_content)
    
  style_loss *= style_weight
  content_loss *= content_weight
  total_loss = style_loss + content_loss
  
  return total_loss, style_loss, content_loss

### Gradients

In [0]:
def compute_gradients(cfg):
  with tf.GradientTape() as tape:
    loss = compute_loss(**cfg)
  
  total_loss = loss[0]
  return tape.gradient(total_loss, cfg['init_image']), loss

### Run training

In [0]:
def deprocess_image(image):
  """
  Helper function to deprocess tf.Variable to image
  
  Args:
    image: tf.Variable
    
  Returns:
    png image (array of bytes)
  """
  
  arr = image.numpy()
  arr = tf.squeeze(arr, axis=0).numpy()
  img = tf.image.encode_png(tf.cast(arr, tf.uint8)).numpy()
  
  return img

In [0]:
model = get_model()

for layer in model.layers:
  layer.trainable = False

In [0]:
# Load images
GDRIVE = '/content/gdrive/My Drive/neural_style_transfer/'
CONTENT_PATH = GDRIVE + 'content/2.jpg'
STYLE_PATH = GDRIVE + 'styles/scream.jpg'
INIT_PATH = GDRIVE + 'content/2.jpg'
init_image = get_and_preprocess_image(INIT_PATH)
init_image = tf.Variable(init_image, dtype=tf.float32)

# Extract target features
content_features_target, style_features_target = get_features(model, CONTENT_PATH, STYLE_PATH)

In [0]:
num_iterations = 1000

# For displaying
display_num = 100

In [0]:
best_loss = float('inf')
best_img = None

In [0]:
optimizer = tf.train.AdamOptimizer(learning_rate=8.0, beta1=0.99, epsilon=1e-1)
loss_weights = (1e3, 1)

In [0]:
# Config for gradients computing
cfg = {
    'model': model,
    'loss_weights': loss_weights,
    'init_image': init_image,
    'target_style': style_features_target,
    'target_content': content_features_target
}

In [0]:
import IPython.display as display
import time

In [130]:
# Values clipping
norm_means = np.array([103.939, 116.779, 123.68])
min_vals = -norm_means
max_vals = 255 - norm_means


# Training
time_init = time.time()
for i in range(num_iterations):
  grads, loss = compute_gradients(cfg)
  total_loss, style_loss, content_loss = loss
  optimizer.apply_gradients([(grads, init_image)])
  
  # Clipping
  clipped = tf.clip_by_value(init_image, min_vals, max_vals)
  init_image.assign(clipped)
  
  # Update loss and image
  if total_loss < best_loss:
    best_loss = total_loss
    best_img = init_image
  
  # Display every 100th iteration result
  if i % display_num == 0:
    time_100 = time.time()
    image_out = deprocess_image(init_image)
    
    print('Iteration: {}\nTime spent: {} min'.format(i, (time_100 - time_init)/60))
    display.display(display.Image(image_out))

Output hidden; open in https://colab.research.google.com to view.

In [0]:
# Save best image to gdrive
best_image = deprocess_image(best_img)

with open(GDRIVE + 'output/2+scream.png', 'wb') as f:
  f.write(best_image)