## Options
#### Color Preservation:
- No preservation (this will allow the style image to override the content image's color)
- Lumiance synthesis (this will maintain the content image's color with a focus on maintaining accurate colors)
- Color synthesis (this will maintain the content image's color with a focus on maintaining accurate texture from the style image)

#### Style weighting
- Adjust the ratio to determine how heavily the algorithm should weigh maintaining the content of the content image vs incorporating more of the style image

#### Masked style transfer
- Choose between applying style to background, foreground, or both

#### Automatic upsampling
- Automatically apply upsampling algorithm after generated the new image. Useful if you're going to print or display at a higher resolution anyways, but may reduce quality

In [1]:
# Imports
import cv2
import tensorflow as tf
from tensorflow.keras.applications import VGG19, vgg19
import numpy as np
import matplotlib.pyplot as plt
import util
import IPython.display as display
import PIL.Image

In [2]:
### Settings
# Color preservation
color_preserve = False

# Style weighting
style_weight = 8e-5

# Masking
mask = None

# Upsampling
upsample = False

# Starting Image (content, style, random)
starter_image = 'random'

# Image paths
style_image_path = './Style images/shipwreck 2.jpg'
content_image_path = './Content images/mom.jpg'
mask_path = './Mask images/mom mask.jpg'

# RGB Images
b, g, r = cv2.split(cv2.imread(style_image_path))
style_image = cv2.merge((r, g, b))
b, g, r = cv2.split(cv2.imread(content_image_path))
content_image = cv2.merge((r, g, b))

b, g, r = cv2.split(cv2.imread(mask_path))
mask_image = cv2.merge((r, g, b))
mask_image = tf.where((tf.image.rgb_to_grayscale(mask_image) != 255), 1, 0)

# Display selected images
window_name = 'test_view'

In [3]:
max_size = 500 # Largest number of pixels for longest side of image
largest_dim = np.argmax(content_image.shape)
if content_image.shape[largest_dim] > max_size:
    # Rescale content image
    curr_size = content_image.shape[largest_dim]
    scale_ratio = curr_size / max_size
    new_dims = [int(content_image.shape[0] / scale_ratio), int(content_image.shape[1] / scale_ratio)]
    content_image = np.array(tf.image.resize(content_image, new_dims, preserve_aspect_ratio=True), dtype=np.uint8)
    
    if mask_path:
        # Rescale mask image
        mask = tf.Variable(tf.image.resize(mask_image, new_dims, preserve_aspect_ratio=True))[:,:,0]

In [10]:
print(mask)
grad = train_step(image)[1]
print(grad)

tf.Tensor(
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]], shape=(500, 375), dtype=float32)
tf.Tensor(
[[[ 2.5082187e-04  2.1170831e-04  2.0137939e-04]
  [ 2.4693375e-03  2.6559313e-03  2.6231294e-03]
  [ 3.5637739e-04 -5.8971928e-04  1.6507764e-04]
  ...
  [ 3.9118258e-05 -6.0485024e-04 -3.0342329e-05]
  [ 1.3496235e-04 -4.4515100e-04  3.6013712e-06]
  [ 1.1623058e-04 -1.8395182e-04  3.0859897e-05]]

 [[-2.3813157e-04 -5.7723618e-04 -4.1522831e-04]
  [ 2.6404136e-03  2.5273254e-03  2.6610638e-03]
  [-1.5341262e-03 -3.6069597e-03 -2.2925185e-03]
  ...
  [ 2.3564060e-04 -7.4398040e-04  3.5195152e-04]
  [ 2.4127909e-04 -6.8801898e-04  1.6183048e-04]
  [ 1.0562186e-04 -4.2726865e-04 -6.5979507e-06]]

 [[-1.9194557e-04 -5.7436310e-04 -3.9164542e-04]
  [ 3.5474810e-03  3.6927522e-03  3.6597839e-03]
  [ 1.3972468e-03  4.5355748e-05  7.5699372e-04]
  ...
  [ 6.9051073e-04  8.4234052e-0

In [25]:
def mask_grad(grad, mask):
    channels = tf.split(grad, 3, axis=2)
    for i in range(len(channels)):
        channels[i] = tf.expand_dims(tf.multiply(channels[i][:,:,0],mask), 2)
    return tf.concat(channels, 2)

<tf.Tensor: shape=(500, 375), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>

In [36]:
print("Equal:", np.sum(mask_grad(grad, mask) == grad))
print("Masked:", np.sum(mask == 1))

Equal: 18444.0
Masked: 18444


In [5]:
# Preprocessing
if color_preserve:
    # Adjust luminance of style image to better match content before transferring style
    style_image = util.transfer_luminance(content_image, style_image)
    
# Adjust size of style image to match that of content image so they can both pass through the same network
content_image_dimensions = content_image.shape
height, width, channels = content_image_dimensions
style_image = np.array(tf.image.resize(style_image, content_image_dimensions[:2]), dtype=np.uint8)

#style_image = vgg19.preprocess_input(style_image)
#content_image = vgg19.preprocess_input(content_image)

In [6]:
#r, g, b = cv2.split(content_image)
#cv2.imshow("content", cv2.merge((b, g, r)))
#cv2.waitKey(0)
#cv2.destroyAllWindows()

#r, g, b = cv2.split(style_image)
#cv2.imshow('style', cv2.merge((b, g, r)))
#cv2.waitKey(0)
#cv2.destroyAllWindows()

In [7]:
# Reference: https://arxiv.org/pdf/1508.06576.pdf (pages 9 and 10)
# Which layers are used for style loss
s_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']

# Which layers are used for content loss (try with 'conv5_2')
c_layers = ['block4_conv2']

### Loss functions
def content_loss(content_features_dict, generated):
    generated = generated['content']
    l = tf.add_n([0.5*tf.reduce_sum(tf.square(tf.subtract(content_features_dict[name], generated[name])))]
                      for name in content_features_dict.keys())
    l = l / num_content_layers
    return tf.cast(l, tf.float64)

def style_loss(style_features_dict, generated):
    generated = generated['style']
    layer_losses = []
    for layer in style_features_dict.keys():
        dividend = tf.cast(tf.reduce_sum(tf.square(tf.subtract(style_features_dict[layer], generated[layer]))), tf.float64)
        divisor = 4*(tf.cast(tf.math.square(generated[layer].shape[1]), tf.float64)**2)
        layer_losses.append(dividend / divisor)
    l = tf.add_n(layer_losses)
    l = l / num_style_layers
    return l

def total_loss(content_features, style_features, generated):
    l = content_weight*content_loss(content_features, generated) + style_weight*style_loss(style_features, generated)
    return l

### Old loss functions (unused)

def style_loss_old(style_features_dict, generated):
    generated = generated['style']
    l = tf.math.add_n([tf.math.divide(tf.cast(tf.math.reduce_sum(tf.math.square(tf.math.subtract(style_features_dict[name], 
                                                                                         generated[name]))), tf.float64),
                                     tf.cast(4*(tf.cast(tf.math.square(generated[name].shape[1]), tf.float64)**2), tf.float64))]
                      for name in style_features_dict.keys())
    l = l / num_style_layers
    return l

def style_content_loss(outputs):
    style_outputs = outputs['style']
    content_outputs = outputs['content']
    style_loss = tf.add_n([tf.reduce_mean(abs((style_outputs[name]-style_targets[name]))) 
                           for name in style_outputs.keys()])
    style_loss *= style_weight / num_style_layers

    content_loss = tf.add_n([tf.reduce_mean(abs((content_outputs[name]-content_targets[name]))) 
                             for name in content_outputs.keys()])
    content_loss *= content_weight / num_content_layers
    loss = style_loss + content_loss
    return loss

In [8]:
# Network definition
model = VGG19(include_top = False, pooling= 'max', weights = 'imagenet', input_shape = content_image_dimensions)
class NSTModel(tf.keras.models.Model):
  def __init__(self, style_layers, content_layers, model):
    super(NSTModel, self).__init__()
    self.style_layers = style_layers
    self.content_layers = content_layers
    self.num_style_layers = len(style_layers)
    outputs = [model.get_layer(name).output for name in s_layers+c_layers]
    model = tf.keras.Model([model.input], outputs)
    self.model = model
    self.model.trainable = False

  def call(self, inputs):
    "Expects float input in [0,1]"
    #inputs = inputs*255.0
    #preprocessed_input = tf.keras.applications.vgg19.preprocess_input(inputs)
    preprocessed_input = tf.reshape(inputs, (1, height, width, channels))
    outputs = self.model(preprocessed_input)
    style_outputs, content_outputs = (outputs[:self.num_style_layers],
                                      outputs[self.num_style_layers:])

    style_outputs = [util.gram_matrix(style_output)
                     for style_output in style_outputs]

    content_dict = {content_name: value
                    for content_name, value
                    in zip(self.content_layers, content_outputs)}

    style_dict = {style_name: value
                  for style_name, value
                  in zip(self.style_layers, style_outputs)}

    return {'content': content_dict, 'style': style_dict}

In [9]:
extractor = NSTModel(s_layers, c_layers, model)

white_noise = np.random.uniform(size=content_image_dimensions)
style_image = util.scale_image(tf.Variable(tf.cast(tf.convert_to_tensor(style_image), dtype=tf.float32), trainable=True))
content_image = util.scale_image(tf.Variable(tf.cast(tf.convert_to_tensor(content_image), dtype=tf.float32), trainable=True))
image = tf.Variable(tf.cast(tf.convert_to_tensor(style_image), dtype=tf.float32), trainable=True)

# style_grams = [util.gram_matrix(elem) for elem in extractor(style_image)['style'].values()]
# style_targets = dict(zip(s_layers, style_grams))
style_targets = extractor(style_image)['style']
content_targets = extractor(content_image)['content']

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=1500,
    decay_rate=0.95)
opt = tf.optimizers.Adam(learning_rate=0.001, epsilon=0.1)

ratio = 1e-2 # Equal to alpha/beta (content/style) in paper - larger number emphasizes content more
content_weight = 1e-3
style_weight = content_weight / ratio
total_variation_weight = 8e-4
num_style_layers = len(s_layers)
num_content_layers = len(c_layers)

@tf.function()
def train_step(image):
    with tf.GradientTape() as tape:
        outputs = extractor(image)
        loss = total_loss(content_targets, style_targets, outputs)
        #loss += total_variation_weight*tf.cast(tf.image.total_variation(image), tf.float64)
        grad = tape.gradient(loss, image)
        if loss < 30000000:
            grad = tf.clip_by_norm(grad, 5)
        opt.apply_gradients([(grad, image)])
    image.assign(util.scale_image(image))
    return(loss, grad)

In [None]:
import time
start = time.time()

epochs = 200
steps_per_epoch = 50
step_list = np.linspace(start=steps_per_epoch, stop=steps_per_epoch*epochs, num=epochs, dtype=np.int32)
loss_list = []

step = 0
for n in range(epochs):
    for m in range(steps_per_epoch):
        step += 1
        l = train_step(image)
        print(".", end='', flush=True)
    display.clear_output(wait=True)
    display.display(PIL.Image.fromarray(np.array(image*255, dtype=np.uint8)))
    loss_list.append(l)
    loss_diff = l - loss_list[len(loss_list)-2]
    print("Train step: %d   Total Loss %d   Change in Loss %d"%(step, l, loss_diff))

end = time.time()
print("Total time: {:.1f}".format(end-start))

In [None]:
mask = np.random.choice([True, False], size=[450, 600])
tensor = tf.Variable(train_step(image)[1])
#tensor[:,:,0]
tf.multiply(tensor[:,:,2],mask)
for i in range(3):
    tensor[:,:,i].assign(tf.multiply(tensor[:,:,i],mask))
tensor
#np.sum(mask==0)
#tf.boolean_mask(tensor[:,:,0], mask)

In [None]:
tf.concat(tf.split(tensor, 3, 2), 2)

In [None]:
print("Content loss: %f" % content_loss(content_targets, extractor(image)), 
      "\nStyle Loss: %f" % style_loss(style_targets, extractor(image)),
      "\nTotal Variation: %f" % (tf.image.total_variation(image)*total_variation_weight))

In [None]:
print("Content loss: %f" % content_loss(content_targets, extractor(image)), 
      "\nStyle Loss: %f" % style_loss(style_targets, extractor(image)),
      "\nTotal Variation: %f" % (tf.image.total_variation(image)*total_variation_weight))

In [None]:
with tf.GradientTape() as tape:
    outputs = extractor(image)
    loss = total_loss(content_targets, style_targets, outputs)
    grad = tf.clip_by_norm(tape.gradient(loss, image), 1)
    print(grad)

In [None]:
#r, g, b = cv2.split(np.array(image*255, dtype=np.uint8))
#bgr_stylized = cv2.merge((b, g, r))
#cv2.imwrite('stylized.png', bgr_stylized)