In [3]:
from __future__ import print_function
from keras.preprocessing.image import load_img, img_to_array
from scipy.misc import imsave
import numpy as np
from scipy.optimize import fmin_l_bfgs_b
import time
import argparse

from keras.applications import vgg19
from keras import backend as K
import os

In [4]:
DATA_DIR = './data'

In [41]:
base_image_path = os.path.join(DATA_DIR, 'content.jpg')
style_reference_image_path = os.path.join(DATA_DIR, 'style.jpg')

In [42]:
# dimensions of the generated picture.
width, height = load_img(base_image_path).size
img_nrows = 400
img_ncols = int(width * img_nrows / height)

# util function to open, resize and format pictures into appropriate tensors


def preprocess_image(image_path):
    img = load_img(image_path, target_size=(img_nrows, img_ncols))
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = vgg19.preprocess_input(img)
    return img

In [43]:
def deprocess_image(x):
    if K.image_data_format() == 'channels_first':
        x = x.reshape((3, img_nrows, img_ncols))
        x = x.transpose((1, 2, 0))
    else:
        x = x.reshape((img_nrows, img_ncols, 3))
    # Remove zero-center by mean pixel
    x[:, :, 0] += 103.939
    x[:, :, 1] += 116.779
    x[:, :, 2] += 123.68
    # 'BGR'->'RGB'
    x = x[:, :, ::-1]
    x = np.clip(x, 0, 255).astype('uint8')
    return x


In [44]:
base_image = K.variable(preprocess_image(base_image_path))
style_reference_image = K.variable(preprocess_image(style_reference_image_path))

In [51]:
# this will contain our generated image
if K.image_data_format() == 'channels_first':
    combination_image = K.placeholder((1, 3, img_nrows, img_ncols))
else:
    combination_image = K.placeholder((1, img_nrows, img_ncols, 3))
combination_image

<tf.Tensor 'Placeholder_1:0' shape=(1, 400, 400, 3) dtype=float32>

In [52]:
combination_image

<tf.Tensor 'Placeholder_1:0' shape=(1, 400, 400, 3) dtype=float32>

In [53]:
base_image

<tf.Variable 'Variable_1:0' shape=(1, 400, 400, 3) dtype=float32_ref>

In [55]:
# combine the 3 images into a single Keras tensor
input_tensor = K.concatenate([base_image,
                              style_reference_image,
                              combination_image], axis=0)
input_tensor

<tf.Tensor 'concat_1:0' shape=(3, 400, 400, 3) dtype=float32>

In [56]:
# build the VGG16 network with our 3 images as input
# the model will be loaded with pre-trained ImageNet weights
model = vgg19.VGG19(input_tensor=input_tensor,
                    weights='imagenet', include_top=False)

In [58]:
outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
outputs_dict

{'block1_conv1': <tf.Tensor 'block1_conv1/Relu:0' shape=(3, 400, 400, 64) dtype=float32>,
 'block1_conv2': <tf.Tensor 'block1_conv2/Relu:0' shape=(3, 400, 400, 64) dtype=float32>,
 'block1_pool': <tf.Tensor 'block1_pool/MaxPool:0' shape=(3, 200, 200, 64) dtype=float32>,
 'block2_conv1': <tf.Tensor 'block2_conv1/Relu:0' shape=(3, 200, 200, 128) dtype=float32>,
 'block2_conv2': <tf.Tensor 'block2_conv2/Relu:0' shape=(3, 200, 200, 128) dtype=float32>,
 'block2_pool': <tf.Tensor 'block2_pool/MaxPool:0' shape=(3, 100, 100, 128) dtype=float32>,
 'block3_conv1': <tf.Tensor 'block3_conv1/Relu:0' shape=(3, 100, 100, 256) dtype=float32>,
 'block3_conv2': <tf.Tensor 'block3_conv2/Relu:0' shape=(3, 100, 100, 256) dtype=float32>,
 'block3_conv3': <tf.Tensor 'block3_conv3/Relu:0' shape=(3, 100, 100, 256) dtype=float32>,
 'block3_conv4': <tf.Tensor 'block3_conv4/Relu:0' shape=(3, 100, 100, 256) dtype=float32>,
 'block3_pool': <tf.Tensor 'block3_pool/MaxPool:0' shape=(3, 50, 50, 256) dtype=float32>,
 

In [59]:
# the gram matrix of an image tensor (feature-wise outer product)


def gram_matrix(x):
    assert K.ndim(x) == 3
    if K.image_data_format() == 'channels_first':
        features = K.batch_flatten(x)
    else:
        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram

In [60]:
# the "style loss" is designed to maintain
# the style of the reference image in the generated image.
# It is based on the gram matrices (which capture style) of
# feature maps from the style reference image
# and from the generated image


def style_loss(style, combination):
    assert K.ndim(style) == 3
    assert K.ndim(combination) == 3
    S = gram_matrix(style)
    C = gram_matrix(combination)
    channels = 3
    size = img_nrows * img_ncols
    return K.sum(K.square(S - C)) / (4. * (channels ** 2) * (size ** 2))

In [61]:
# an auxiliary loss function
# designed to maintain the "content" of the
# base image in the generated image


def content_loss(base, combination):
    return K.sum(K.square(combination - base))

In [62]:
# the 3rd loss function, total variation loss,
# designed to keep the generated image locally coherent


def total_variation_loss(x):
    assert K.ndim(x) == 4
    if K.image_data_format() == 'channels_first':
        a = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1])
        b = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:])
    else:
        a = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :])
        b = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :])

In [63]:
# combine these loss functions into a single scalar
loss = K.variable(0.)
loss

In [64]:
layer_features = outputs_dict['block5_conv2']
layer_features

<tf.Tensor 'block5_conv2/Relu:0' shape=(3, 25, 25, 512) dtype=float32>

In [72]:
base_image_features = layer_features[0, :, :, :]
base_image_features

<tf.Tensor 'strided_slice_23:0' shape=(25, 25, 512) dtype=float32>

In [66]:
combination_features = layer_features[2, :, :, :]
combination_features

<tf.Tensor 'strided_slice_21:0' shape=(25, 25, 512) dtype=float32>

In [68]:
total_variation_weight = 1.0
style_weight = 1.0
content_weight = 0.025

loss += content_weight * content_loss(base_image_features,
                                      combination_features)
loss

<tf.Tensor 'add:0' shape=() dtype=float32>

In [70]:
feature_layers = ['block1_conv1', 'block2_conv1',
                  'block3_conv1', 'block4_conv1',
                  'block5_conv1']

In [73]:
for layer_name in feature_layers:
    layer_features = outputs_dict[layer_name]
    style_reference_features = layer_features[1, :, :, :]
    combination_features = layer_features[2, :, :, :]
    sl = style_loss(style_reference_features, combination_features)
    loss += (style_weight / len(feature_layers)) * sl
loss += total_variation_weight * total_variation_loss(combination_image)

TypeError: unsupported operand type(s) for *: 'float' and 'NoneType'

In [77]:
m = K.variable([[1, 2, 3], [4, 5, 6]])

In [78]:
K.eval(m)

array([[ 1.,  2.,  3.],
       [ 4.,  5.,  6.]], dtype=float32)

In [79]:
tm = K.transpose(m)

In [80]:
K.eval(tm)

array([[ 1.,  4.],
       [ 2.,  5.],
       [ 3.,  6.]], dtype=float32)

In [82]:
s = K.dot(m, tm)

In [83]:
K.eval(s)

array([[ 14.,  32.],
       [ 32.,  77.]], dtype=float32)

In [84]:
m = K.variable([[[1, 2, 3], [4, 5, 6]], [[10, 11, 12], [14, 15, 16]]])

In [85]:
K.eval(m)

array([[[  1.,   2.,   3.],
        [  4.,   5.,   6.]],

       [[ 10.,  11.,  12.],
        [ 14.,  15.,  16.]]], dtype=float32)

In [86]:
m

<tf.Variable 'Variable_6:0' shape=(2, 2, 3) dtype=float32_ref>

In [87]:
tmp = K.batch_flatten(m)

In [88]:
K.eval(tmp)

array([[  1.,   2.,   3.,   4.,   5.,   6.],
       [ 10.,  11.,  12.,  14.,  15.,  16.]], dtype=float32)