In [1]:

import numpy as np
import tensorflow as tf
import vgg16
from scipy.misc import imread, imresize

sess = tf.Session()

opt_img = tf.Variable( tf.truncated_normal( [1,224,224,3],
                                        dtype=tf.float32,
                                        stddev=1e-1), name='opt_img' )

tmp_img = tf.clip_by_value( opt_img, 0.0, 255.0 )

vgg = vgg16.vgg16( tmp_img, 'vgg16_weights.npz', sess )

style_img = imread( 'style.png', mode='RGB' )
style_img = imresize( style_img, (224, 224) )
style_img = np.reshape( style_img, [1,224,224,3] )

content_img = imread( 'content.png', mode='RGB' )
content_img = imresize( content_img, (224, 224) )
content_img = np.reshape( content_img, [1,224,224,3] )

layers = [ 'conv1_1', 'conv1_2',
           'conv2_1', 'conv2_2',
           'conv3_1', 'conv3_2', 'conv3_3',
           'conv4_1', 'conv4_2', 'conv4_3',
           'conv5_1', 'conv5_2', 'conv5_3' ]

ops = [ getattr( vgg, x ) for x in layers ]

content_activation = vgg.conv4_2
style_activations = [vgg.conv1_1, vgg.conv2_1, vgg.conv3_1, vgg.conv4_1, vgg.conv5_1]

# styles = [getattr(vgg, x) for x in style_activations]
# content = getattr(vgg, content_activation)

content_acts = sess.run( content_activation, feed_dict={vgg.imgs: content_img } )  # target
style_acts = sess.run( style_activations, feed_dict={vgg.imgs: style_img} ) # target

target_content = tf.convert_to_tensor(content_acts)
target_styles = [tf.convert_to_tensor(l) for l in style_acts]



# ops is list of activation nodes for each layer
#
# --- construct your cost function here
#

### Content Loss ###
# F = tf.convert_to_tensor(content_acts)
# P = content_activation
# Cast numpy array back into a tensor
# tf. constant , initialize with numpy array.
# if constant doesn't work, use tf.convert to tensor


content_loss_sub = tf.subtract(target_content, content_activation)
content_loss_square = tf.square(content_loss_sub)
content_loss = 0.5 * tf.reduce_sum(content_loss_square)

def computeGram(v):
#     height = v.get_shape().as_list()[1]
#     width = v.get_shape().as_list()[2]
#     channels = v.get_shape().as_list()[3]
    _, height, width, channels = v.get_shape().as_list()
    v = tf.reshape(v, [height*width, channels])
    return tf.matmul(tf.transpose(v), v)

### Style Loss ###
style_G = [computeGram(l) for l in style_activations]
target_style_G = [computeGram(l) for l in target_styles]

style_loss_sub = [tf.subtract(i, j) for i, j in zip(target_style_G, style_G)]
style_loss_square = [tf.square(a) for a in style_loss_sub]
style_loss_rsum = [tf.reduce_sum(a) for a in style_loss_square]
M = [style_activations[i].get_shape().as_list()[1] * style_activations[i].get_shape().as_list()[2] for i,l in enumerate(style_activations)]
N = [style_activations[i].get_shape().as_list()[3] for i,l in enumerate(style_activations)]
style_loss_list = [rs / (4.0 * (m * n)**2) for m, n, rs in zip(M, N, style_loss_rsum)]

style_loss = sum(style_loss_list) / 5.0

# Relevant snippets from the paper:
#   For the images shown in Fig 2 we matched the content representation on layer 'conv4_2'
#   and the style representations on layers 'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1' and 'conv5_1'
#   The ratio alpha/beta was  1x10-3
#   The factor w_l was always equal to one divided by the number of active layers (ie, 1/5)
alpha = 1
beta = 1000

Loss = (alpha * content_loss) + (beta * style_loss)
# Loss = content_loss

# --- place your adam optimizer call here
#     (don't forget to optimize only the opt_img variable)
train_step = tf.train.AdamOptimizer(0.1).minimize(Loss, var_list=[opt_img])


# this clobbers all VGG variables, but we need it to initialize the
# adam stuff, so we reload all of the weights...
sess.run( tf.initialize_all_variables() )
vgg.load_weights( 'vgg16_weights.npz', sess )

# initialize with the content image
sess.run( opt_img.assign( content_img ))

# --- place your optimization loop here

EPOCHS = 10
# No placeholders in this lab
print "ITER\t\tLOSS\t\t\tSTYLE_LOSS\t\t\tCONTENT LOSS"

for i in range(EPOCHS+1):
    loss, c_loss, s_loss, _ = sess.run([Loss, content_loss, style_loss, train_step])
    
    if ((i%50) == 0):
        # save tmp_img
        # use imsave
        fname = epoch_ + str(i)
        imsave(fname, tmp_img)
        print str(i) + "\t\t" + str(loss) + "\t\t" + str(s_loss) + "\t\t" + str(c_loss)




0 conv1_1_W (3, 3, 3, 64)
1 conv1_1_b (64,)
2 conv1_2_W (3, 3, 64, 64)
3 conv1_2_b (64,)
4 conv2_1_W (3, 3, 64, 128)
5 conv2_1_b (128,)
6 conv2_2_W (3, 3, 128, 128)
7 conv2_2_b (128,)
8 conv3_1_W (3, 3, 128, 256)
9 conv3_1_b (256,)
10 conv3_2_W (3, 3, 256, 256)
11 conv3_2_b (256,)
12 conv3_3_W (3, 3, 256, 256)
13 conv3_3_b (256,)
14 conv4_1_W (3, 3, 256, 512)
15 conv4_1_b (512,)
16 conv4_2_W (3, 3, 512, 512)
17 conv4_2_b (512,)
18 conv4_3_W (3, 3, 512, 512)
19 conv4_3_b (512,)
20 conv5_1_W (3, 3, 512, 512)
21 conv5_1_b (512,)
22 conv5_2_W (3, 3, 512, 512)
23 conv5_2_b (512,)
24 conv5_3_W (3, 3, 512, 512)
25 conv5_3_b (512,)
Instructions for updating:
Use `tf.global_variables_initializer` instead.
0 conv1_1_W (3, 3, 3, 64)
1 conv1_1_b (64,)
2 conv1_2_W (3, 3, 64, 64)
3 conv1_2_b (64,)
4 conv2_1_W (3, 3, 64, 128)
5 conv2_1_b (128,)
6 conv2_2_W (3, 3, 128, 128)
7 conv2_2_b (128,)
8 conv3_1_W (3, 3, 128, 256)
9 conv3_1_b (256,)
10 conv3_2_W (3, 3, 256, 256)
11 conv3_2_b (256,)
12 conv3_3_W