# Deep Learning and Art Neural Style Transfer

> Using neural networks to create images by style transfer.

- toc: true 
- badges: true
- comments: true
- categories: [tensorflow, deep learning, jupyter]
- author: Thomas Simm

![](ghtop_images/header2.png)

## Introduction 

Use two images a content image and style image to create a new image of the content image in the style of the style image.

Source used, Deep Learning Specialization Week 4
https://www.coursera.org/lecture/convolutional-neural-networks/what-is-neural-style-transfer-SA5H8

> youtube: https://youtu.be/R39tWYYKNcI

From [original NST paper](https://arxiv.org/abs/1508.06576) published by the Visual Geometry Group at University of Oxford in 2014 

## Code

Some imports

In [1]:
import os
import sys
import scipy.io
import scipy.misc
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from PIL import Image
import numpy as np
import tensorflow as tf
from tensorflow.python.framework.ops import EagerTensor
import pprint
%matplotlib inline

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Set the style image

In [1]:
style_image = np.array(Image.open("/content/drive/MyDrive/Colab Notebooks/Tiles.jpg"))

imshow(style_image)

![](ghtop_images/mosaic.jpg)

And the content image

In [2]:

content_image = np.array(Image.open("/content/drive/MyDrive/Colab Notebooks/boat.jpg"))

print(np.shape(content_image))
imshow(content_image)   

![](ghtop_images/house.jpg)

Resize the images

In [4]:
#gonna reduce to a square image of size in pixels of
img_size = 1100

# get image as array, then resize
content_image = Image.fromarray(content_image)
content_image =np.array(content_image.resize((img_size, img_size)))

# create content image as tf tensor
content_image = tf.constant(np.reshape(content_image, ((1,) + content_image.shape)))



In [5]:
# same for style image

style_image = Image.fromarray(style_image)
style_image = np.array(style_image.resize((img_size, img_size)))

style_image = tf.constant(np.reshape(style_image, ((1,) + style_image.shape)))



Load parameters from the VGG model. A pretrained model for image classification

https://www.robots.ox.ac.uk/~vgg/research/very_deep/

https://gist.github.com/ksimonyan/3785162f95cd2d5fee77#file-readme-md


In [33]:
tf.random.set_seed(272)
pp = pprint.PrettyPrinter(indent=4)


vgg = tf.keras.applications.VGG19(include_top=False,
                                  input_shape=(img_size, img_size, 3),
                                  weights='/content/drive/MyDrive/Colab Notebooks/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5')

vgg.trainable = False
pp.pprint(vgg)

<keras.engine.functional.Functional object at 0x7f84e154af90>


Now choose layers to represent the style of the image and assign style costs:
Lower number more basic features

In [34]:
STYLE_LAYERS = [
    ('block1_conv1', .2),
    ('block2_conv1', .2),
    ('block3_conv1', .2),
    ('block4_conv1', .2),
    ('block5_conv1', .2)]

Compute the "content cost" using TensorFlow.

In [35]:
def compute_content_cost(content_output, generated_output):
    """
    Computes the content cost
    
    Arguments:
    a_C -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing content of the image C 
    a_G -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing content of the image G
    
    Returns: 
    J_content -- scalar that you compute using equation 1 above.
    """
    a_C = content_output[-1]
    a_G = generated_output[-1]
       
    
    # Retrieve dimensions from a_G 
    m, n_H, n_W, n_C = a_G.get_shape().as_list()
    
    # Reshape a_C and a_G 
    a_C_unrolled = tf.reshape(a_C, shape=[m, n_H * n_W, n_C])
    a_G_unrolled = tf.reshape(a_G, shape=[m, n_H * n_W, n_C])
    
    # compute the cost with tensorflow 
    J_content =  (1/(4*n_H*n_W*n_C) )*tf.reduce_sum(tf.square( tf.subtract(a_C_unrolled, a_G_unrolled ) ))
    
    
    
    return J_content

the gram matrix of A is 𝐺𝐴=𝐴𝐴𝑇.

In [36]:
def gram_matrix(A):
    """
    Argument:
    A -- matrix of shape (n_C, n_H*n_W)
    
    Returns:
    GA -- Gram matrix of A, of shape (n_C, n_C)
    """  
    
    
    GA = tf.linalg.matmul(
    A, A, transpose_b=True)
    

    return GA

Compute the style cost for a single layer. 


In [37]:
def compute_layer_style_cost(a_S, a_G):
    """
    Arguments:
    a_S -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing style of the image S 
    a_G -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing style of the image G
    
    Returns: 
    J_style_layer -- tensor representing a scalar value, style cost defined above by equation (2)
    """

    
    # Retrieve dimensions from a_G 
    m, n_H, n_W, n_C = a_G.get_shape().as_list()
    
    # Reshape the images from (n_H * n_W, n_C) to have them of shape (n_C, n_H * n_W) 
    a_S = tf.transpose(a_S)
    a_S=tf.reshape(a_S, shape=[n_C, n_H * n_W])
    a_G = tf.transpose(a_G)#, shape=[n_C, n_H * n_W])
    a_G=tf.reshape(a_G, shape=[n_C, n_H * n_W])
    
    print(np.shape(a_S))
    
    # Computing gram_matrices for both images S and G 
    GS = gram_matrix(a_S)
    GG = gram_matrix(a_G)
    
    # Computing the loss (≈1 line)
    J_style_layer = J_content =  (1/(4*(n_H*n_W)**2*n_C**2) )*tf.reduce_sum(tf.square( tf.subtract(GS, GG ) ))
    
    
    
    return J_style_layer

Compute style cost function, 
Calls individual layers cost funcxtion and applies a weight based on variable STYLE_LAYERS

In [38]:
def compute_style_cost(style_image_output, generated_image_output, STYLE_LAYERS=STYLE_LAYERS):
    """
    Computes the overall style cost from several chosen layers
    
    Arguments:
    style_image_output -- our tensorflow model
    generated_image_output --
    STYLE_LAYERS -- A python list containing:
                        - the names of the layers we would like to extract style from
                        - a coefficient for each of them
    
    Returns: 
    J_style -- tensor representing a scalar value, style cost defined above by equation (2)
    """
    
    # initialize the overall style cost
    J_style = 0

    # Set a_S to be the hidden layer activation from the layer we have selected.
    # The last element of the array contains the content layer image, which must not to be used.
    a_S = style_image_output[:-1]

    # Set a_G to be the output of the choosen hidden layers.
    # The last element of the array contains the content layer image, which must not to be used.
    a_G = generated_image_output[:-1]
    for i, weight in zip(range(len(a_S)), STYLE_LAYERS):  
        # Compute style_cost for the current layer
        J_style_layer = compute_layer_style_cost(a_S[i], a_G[i])

        # Add weight * J_style_layer of this layer to overall style cost
        J_style += weight[1] * J_style_layer

    return J_style

A total cost function including both style and content costs

In [39]:
@tf.function()
def total_cost(J_content, J_style, alpha = 10, beta = 40):
    """
    Computes the total cost function
    
    Arguments:
    J_content -- content cost coded above
    J_style -- style cost coded above
    alpha -- hyperparameter weighting the importance of the content cost
    beta -- hyperparameter weighting the importance of the style cost
    
    Returns:
    J -- total cost as defined by the formula above.
    """
    
    J = alpha*J_content +beta*J_style
    
    
    return J

<a name='5-3'></a>
### 5.3 Randomly Initialize the Image to be Generated
Now, you get to initialize the "generated" image as a noisy image created from the content_image.

* The generated image is slightly correlated with the content image.
* By initializing the pixels of the generated image to be mostly noise but slightly correlated with the content image, this will help the content of the "generated" image more rapidly match the content of the "content" image. 

In [6]:
generated_image = tf.Variable(tf.image.convert_image_dtype(content_image, tf.float32))
# noise = tf.random.uniform(tf.shape(generated_image), 0, 0.5)
# generated_image = tf.add(generated_image, noise)
generated_image = tf.clip_by_value(generated_image, clip_value_min=0.0, clip_value_max=1.0)


define a function which loads the VGG19 model and returns a list of the outputs for the middle layers.

In [41]:
def get_layer_outputs(vgg, layer_names):
    """ Creates a vgg model that returns a list of intermediate output values."""
    outputs = [vgg.get_layer(layer[0]).output for layer in layer_names]

    model = tf.keras.Model([vgg.input], outputs)
    return model

Now, define the content layer and build the model.

In [42]:
content_layer = [('block5_conv4', 1)]

vgg_model_outputs = get_layer_outputs(vgg, STYLE_LAYERS + content_layer)

Save the outputs for the content and style layers in separate variables.

In [43]:
content_target = vgg_model_outputs(content_image)  # Content encoder
style_targets = vgg_model_outputs(style_image)     # Style enconder

In [44]:
# Assign the content image to be the input of the VGG model.  
# Set a_C to be the hidden layer activation from the layer we have selected
preprocessed_content =  tf.Variable(tf.image.convert_image_dtype(content_image, tf.float32))
a_C = vgg_model_outputs(preprocessed_content)

# Set a_G to be the hidden layer activation from same layer. Here, a_G references model['conv4_2'] 
# and isn't evaluated yet. Later in the code, we'll assign the image G as the model input.
a_G = vgg_model_outputs(generated_image)

# Compute the content cost
J_content = compute_content_cost(a_C, a_G)

print(J_content)

tf.Tensor(0.0, shape=(), dtype=float32)


sets a_S to be the tensor giving the hidden layer activation for STYLE_LAYERS.

In [45]:
# Assign the input of the model to be the "style" image 
preprocessed_style =  tf.Variable(tf.image.convert_image_dtype(style_image, tf.float32))
a_S = vgg_model_outputs(preprocessed_style)

# Compute the style cost
J_style = compute_style_cost(a_S, a_G)
print(J_style)

(64, 1210000)
(128, 302500)
(256, 75625)
(512, 18769)
(512, 4624)
tf.Tensor(2067.7974, shape=(), dtype=float32)


Utils that you will need to display the images generated by the style transfer model.

In [46]:
def clip_0_1(image):
    """
    Truncate all the pixels in the tensor to be between 0 and 1
    
    Arguments:
    image -- Tensor
    J_style -- style cost coded above

    Returns:
    Tensor
    """
    return tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)

def tensor_to_image(tensor):
    """
    Converts the given tensor into a PIL image
    
    Arguments:
    tensor -- Tensor
    
    Returns:
    Image: A PIL image
    """
    tensor = tensor * 255
    tensor = np.array(tensor, dtype=np.uint8)
    if np.ndim(tensor) > 3:
        assert tensor.shape[0] == 1
        tensor = tensor[0]
    return Image.fromarray(tensor)

### Train a step
learning rate lower slower

In [47]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.03)

@tf.function()
def train_step(generated_image):
    with tf.GradientTape() as tape:
        # In this function you must use the precomputed encoded images a_S and a_C
        # Compute a_G as the vgg_model_outputs for the current generated image
        

        a_G = vgg_model_outputs(generated_image)
        # Compute the style cost
     
        J_style = compute_style_cost(a_S, a_G)

        
        # Compute the content cost
        J_content = compute_content_cost(a_C, a_G)
        # Compute the total cost
        J = total_cost(J_content, J_style, alpha = 10, beta = 40)  
        
        
        
    grad = tape.gradient(J, generated_image)

    optimizer.apply_gradients([(grad, generated_image)])
    generated_image.assign(clip_0_1(generated_image))
  
    return J



### Train the Model


In [7]:
# Show the generated image at some epochs
# Uncoment to reset the style transfer process. You will need to compile the train_step function again 
generated_image = tf.Variable(tf.image.convert_image_dtype(content_image, tf.float32))
epochs = 5001
for i in range(epochs):
    train_step(generated_image)
    if i % 100 == 0:
        print(f"Epoch {i} ")
    if i % 100 == 0:
        image = tensor_to_image(generated_image)
        imshow(image)
        image.save(f"image_{i}.jpg")
         

## Some Examples

![](ghtop_images/image_400.jpg)

![](ghtop_images/image_200.jpg)