In [1]:
import os
import sys
import numpy as np
import scipy.io
import scipy.misc
import tensorflow as tf  
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from PIL import Image
import imageio
%matplotlib inline

In [3]:
# Output image directory
OUTPUT_DIR = '/Users/til018/Desktop/style/output/'
# Style image
STYLE_IMAGE = '/Users/til018/Desktop/style/style_image1.jpg'
#Resize it down to match the content image's size
img = Image.open(STYLE_IMAGE)
img = img.resize((458,326), Image.ANTIALIAS)
img.save('/Users/til018/Desktop/style/style_image3.jpg')
STYLE_IMAGE = '/Users/til018/Desktop/style/style_image3.jpg'

# Content image
CONTENT_IMAGE = '/Users/til018/Desktop/style/content_image1.jpg'

IMAGE_WIDTH = 458
IMAGE_HEIGHT = 326
COLOR_CHANNELS = 3

In [4]:
# Noise ratio
NOISE_RATIO = 0.5
# Constant for putting content content loss
CONTENT_LOSS_RATIO = 5
# Constant for putting style loss
STYLE_LOSS_RATIO = 100
#VGG19 model
VGG_MODEL = 'imagenet-vgg-verydeep-19.mat'

# VGG19 mean value to train
MEAN_VALUES = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))

In [5]:
def load_vgg_model(path):
    #Load VGG19 model
    vgg19 = scipy.io.loadmat('/Users/til018/Desktop/style/imagenet-vgg-verydeep-19.mat')

    vgg_layers = vgg19['layers']
    
    #Return weight and bias for the given layer
    def _weights(layer, expected_layer_name):
        W = vgg_layers[0][layer][0][0][0][0][0]
        b = vgg_layers[0][layer][0][0][0][0][1]
        layer_name = vgg_layers[0][layer][0][0][-2]
        assert layer_name == expected_layer_name
        return W, b
    
    #Build relu neurons
    def _relu(conv2d_layer):
        return tf.nn.relu(conv2d_layer)

    #Build convolutional layer
    def _conv2d(prev_layer_name, layer, layer_name):
        W, b = _weights(layer, layer_name)
        W = tf.constant(W)
        b = tf.constant(np.reshape(b, (b.size)))
        return tf.nn.conv2d(
            prev_layer_name, filter=W, strides=[1, 1, 1, 1], padding='SAME') + b
    
    #Build convolutional layer + relu
    def _conv2d_relu(prev_layer_name, layer, layer_name):
        return _relu(_conv2d(prev_layer_name, layer, layer_name))

    #Build average pool layer
    def _avgpool(prev_layer_name):
        return tf.nn.avg_pool(prev_layer_name, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

    # Constructs the graph model.
    layer = {}
    layer['input']   = tf.Variable(np.zeros((1, IMAGE_HEIGHT, IMAGE_WIDTH, COLOR_CHANNELS)), dtype = 'float32')
    layer['conv1_1']  = _conv2d_relu(layer['input'], 0, 'conv1_1')
    layer['conv1_2']  = _conv2d_relu(layer['conv1_1'], 2, 'conv1_2')
    layer['avgpool1'] = _avgpool(layer['conv1_2'])
    layer['conv2_1']  = _conv2d_relu(layer['avgpool1'], 5, 'conv2_1')
    layer['conv2_2']  = _conv2d_relu(layer['conv2_1'], 7, 'conv2_2')
    layer['avgpool2'] = _avgpool(layer['conv2_2'])
    layer['conv3_1']  = _conv2d_relu(layer['avgpool2'], 10, 'conv3_1')
    layer['conv3_2']  = _conv2d_relu(layer['conv3_1'], 12, 'conv3_2')
    layer['conv3_3']  = _conv2d_relu(layer['conv3_2'], 14, 'conv3_3')
    layer['conv3_4']  = _conv2d_relu(layer['conv3_3'], 16, 'conv3_4')
    layer['avgpool3'] = _avgpool(layer['conv3_4'])
    layer['conv4_1']  = _conv2d_relu(layer['avgpool3'], 19, 'conv4_1')
    layer['conv4_2']  = _conv2d_relu(layer['conv4_1'], 21, 'conv4_2')
    layer['conv4_3']  = _conv2d_relu(layer['conv4_2'], 23, 'conv4_3')
    layer['conv4_4']  = _conv2d_relu(layer['conv4_3'], 25, 'conv4_4')
    layer['avgpool4'] = _avgpool(layer['conv4_4'])
    layer['conv5_1']  = _conv2d_relu(layer['avgpool4'], 28, 'conv5_1')
    layer['conv5_2']  = _conv2d_relu(layer['conv5_1'], 30, 'conv5_2')
    layer['conv5_3']  = _conv2d_relu(layer['conv5_2'], 32, 'conv5_3')
    layer['conv5_4']  = _conv2d_relu(layer['conv5_3'], 34, 'conv5_4')
    layer['avgpool5'] = _avgpool(layer['conv5_4'])
    return layer

In [6]:
#Compute content loss
def content_loss_func(sess, model):
    def _content_loss(p, x):
        #Number of filters
        filter_num = p.shape[3]
        #Feature map size
        feature_map = p.shape[1] * p.shape[2]
        #The original paper uses the following function
        #   0.5 * tf.reduce_sum(tf.pow(x - p, 2)) 
        #But according to http://www.chioka.in/tensorflow-implementation-neural-algorithm-of-artistic-style
        # "This form is very slow in "painting" and thus could be missing
        # out some constants "
        #So the following function is implemented
        return (1 / (4 * filter_num * feature_map)) * tf.reduce_sum(tf.pow(x - p, 2))
    return _content_loss(sess.run(model['conv4_2']), model['conv4_2'])


In [7]:
#Style layers
STYLE_LAYERS = [
    ('conv1_1', 0.5),
    ('conv2_1', 1.0),
    ('conv3_1', 1.5),
    ('conv4_1', 3.0),
    ('conv5_1', 4.0),
]

#Style loss
def style_loss_func(sess, model):
    
    #Construct gram matrix
    def _gram_matrix(F, filter_num, feature_map):      
        gm = tf.reshape(F, (feature_map, filter_num))
        return tf.matmul(tf.transpose(gm), gm)

    def _style_loss(a, x):
        # N is the number of filters (at layer l).
        filter_num = a.shape[3]
        # M is the height times the width of the feature map (at layer l).
        feature_map = a.shape[1] * a.shape[2]
        # A is the style representation of the original image (at layer l).
        style_original = _gram_matrix(a, filter_num, feature_map)
        # G is the style representation of the generated image (at layer l).
        style_new = _gram_matrix(x, filter_num, feature_map)
        result = (1 / (4 * filter_num**2 * feature_map**2)) * tf.reduce_sum(tf.pow(style_new - style_original, 2))
        return result

    E = [_style_loss(sess.run(model[layer_name]), model[layer_name]) for layer_name, _ in STYLE_LAYERS]
    W = [w for _, w in STYLE_LAYERS]
    loss = sum([W[l] * E[l] for l in range(len(STYLE_LAYERS))])
    return loss

In [8]:
#Generate new image with noise
def generate_noise_image(content_image, noise_ratio = NOISE_RATIO):
    #White noise picture
    noise_image = np.random.uniform(-20, 20, (1, 326, 458, COLOR_CHANNELS)).astype('float32')
    #White noise picture with content image weight
    new_image = noise_image * noise_ratio + content_image * (1 - noise_ratio)
    return new_image

#Helper function for loading image
def load_image(path):
    image = imageio.imread(path)
    image = np.reshape(image, ((1,) + image.shape))
    #Subtract VGG mean from image picture
    image = image - MEAN_VALUES
    return image

#Helper function for writing out image
def save_image(path, image):
    #Add back vgg mean to output picture
    image = image + MEAN_VALUES
    image = image[0]
    image = np.clip(image, 0, 255).astype('uint8')
    imageio.imwrite(path,image)

In [9]:
sess = tf.InteractiveSession()

In [10]:
model = load_vgg_model(VGG_MODEL)


In [22]:
content_image = load_image(CONTENT_IMAGE)


In [23]:
style_image = load_image(STYLE_IMAGE)


In [24]:
input_image = generate_noise_image(content_image)


In [14]:
sess.run(tf.global_variables_initializer())

In [15]:
#Compute content loss
sess.run(model['input'].assign(content_image))
loss_content = content_loss_func(sess, model)


In [16]:
#Compute style loss
sess.run(model['input'].assign(style_image))
loss_style = style_loss_func(sess, model)

In [17]:
#Compute totale loss
loss_total = CONTENT_LOSS_RATIO * loss_content + STYLE_LOSS_RATIO * loss_style

In [18]:
optimizer = tf.train.AdamOptimizer(2.0)
train_step = optimizer.minimize(loss_total)

In [19]:
sess.run(tf.global_variables_initializer())
sess.run(model['input'].assign(input_image))

array([[[[ -3.75210838e+01,  -3.42395363e+01,  -1.63943920e+01],
         [ -4.38560028e+01,  -3.14248943e+01,  -1.40299482e+01],
         [ -4.76287041e+01,  -2.27805214e+01,  -1.29070234e+01],
         ..., 
         [ -2.38486366e+01,   6.06310129e+00,   1.08051996e+01],
         [ -1.03555174e+01,  -6.96556377e+00,   9.79166031e+00],
         [ -1.33655300e+01,  -6.40230751e+00,   9.72085953e+00]],

        [[ -5.48408737e+01,  -3.81320686e+01,  -2.92360859e+01],
         [ -5.43678627e+01,  -4.27705269e+01,  -2.17258434e+01],
         [ -6.01690407e+01,  -2.61104927e+01,  -2.92723885e+01],
         ..., 
         [ -2.08203678e+01,  -1.45154285e+01,   2.71671772e+00],
         [ -3.01684875e+01,  -3.56768727e+00,  -7.76742411e+00],
         [ -2.46977730e+01,  -2.80330110e+00,  -2.69993600e-02]],

        [[ -5.87749100e+01,  -3.01926365e+01,  -3.66005096e+01],
         [ -4.41099548e+01,  -2.75546513e+01,  -2.02811832e+01],
         [ -4.90691757e+01,  -3.80448418e+01,  -1.202179

In [20]:
#500 iterations for testing
ITERATIONS = 500

In [21]:
sess.run(tf.global_variables_initializer())
sess.run(model['input'].assign(input_image))
for it in range(ITERATIONS):
    sess.run(train_step)
    
    if it % 10 == 0:
        output_image = sess.run(model['input'])
        print('Iteration %d' % (it))
        print('sum : ', sess.run(tf.reduce_sum(output_image)))
        print('cost: ', sess.run(loss_total))
        #if it%100 == 0:
        #if not os.path.exists(OUTPUT_DIR):
        #    os.mkdir(OUTPUT_DIR)

        filename = '/Users/til018/Desktop/style/output2/%d.png' % (it)
        save_image(filename, output_image)

Iteration 0
sum :  1.79307e+06
cost:  1.87843e+11
Iteration 10
sum :  1.83378e+06
cost:  3.69226e+10
Iteration 20
sum :  1.86438e+06
cost:  1.40952e+10
Iteration 30
sum :  1.86048e+06
cost:  6.68703e+09
Iteration 40
sum :  1.81871e+06
cost:  4.42749e+09
Iteration 50
sum :  1.7645e+06
cost:  3.27165e+09
Iteration 60
sum :  1.72238e+06
cost:  2.62198e+09
Iteration 70
sum :  1.6907e+06
cost:  2.20032e+09
Iteration 80
sum :  1.66452e+06
cost:  1.90731e+09
Iteration 90
sum :  1.64205e+06
cost:  1.68656e+09
Iteration 100
sum :  1.62142e+06
cost:  1.51235e+09
Iteration 110
sum :  1.60196e+06
cost:  1.37081e+09
Iteration 120
sum :  1.58345e+06
cost:  1.25258e+09
Iteration 130
sum :  1.56573e+06
cost:  1.15248e+09
Iteration 140
sum :  1.54835e+06
cost:  1.0664e+09
Iteration 150
sum :  1.53117e+06
cost:  9.91602e+08
Iteration 160
sum :  1.51416e+06
cost:  9.2599e+08
Iteration 170
sum :  1.49719e+06
cost:  8.67926e+08
Iteration 180
sum :  1.48002e+06
cost:  8.16204e+08
Iteration 190
sum :  1.4624