In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import scipy
import scipy.io
from PIL import Image
%matplotlib inline

In [2]:
def content_cost(a_C, a_G):
    m, n_h, n_w, n_c = a_C.shape

    #2d vs 1d unroll?
    unrolled_a_C = tf.reshape(a_C, [-1])
    unrolled_a_G = tf.reshape(a_G, [-1])
    
    sigma = tf.reduce_sum(tf.square(tf.subtract(unrolled_a_C, unrolled_a_G)));
    
    return sigma/(4 * n_h * n_w * n_c)

In [3]:
def gram_matrix(A):
    return tf.matmul(A, A, transpose_b=True)

In [4]:
def layer_style_cost(a_S, a_G):
    m, n_h, n_w, n_c = a_S.shape
    
    # n_c, n_h * n_w
    unrolled_a_S = tf.transpose(tf.reshape(a_S, [n_h * n_w, n_c]))
    unrolled_a_G = tf.transpose(tf.reshape(a_G, [n_h * n_w, n_c]))
    
    
    G_a_S = gram_matrix(unrolled_a_S)
    G_a_G = gram_matrix(unrolled_a_G)
    
    sigma = tf.reduce_sum(tf.square(tf.subtract(G_a_S, G_a_G)))
    
    
    return sigma/(4 * n_h**2 * (n_w * n_c)**2)
    

In [5]:
def style_cost(model, STYLE_LAYERS):
    
    J_style = 0
    
    for name, coeff in STYLE_LAYERS:
        
        out = model[name]
        
        a_S = sess.run(out)
        a_G = out
        
        J_style += coeff * layer_style_cost(a_S, a_G)
    
    return J_style

In [6]:
STYLE_LAYERS = [
    ('conv1_1', 0.2),
    ('conv2_1', 0.2),
    ('conv3_1', 0.2),
    ('conv4_1', 0.2),
    ('conv5_1', 0.2)]

In [7]:
def total_cost(J_content, J_style, alpha=.7, beta=.4):
    return J_content * alpha + J_style * beta

In [8]:
def get_model(path):
       
    vgg = scipy.io.loadmat(path)

    vgg_layers = vgg['layers']
    
    def _weights(layer, expected_layer_name):
        wb = vgg_layers[0][layer][0][0][2]
        W = wb[0][0]
        b = wb[0][1]
        layer_name = vgg_layers[0][layer][0][0][0][0]
        return W, b

    def _conv2d(prev_layer, layer, layer_name):
        W, b = _weights(layer, layer_name)
        W = tf.constant(W)
        b = tf.constant(np.reshape(b, (b.size)))
        return tf.nn.conv2d(prev_layer, filter=W, strides=[1, 1, 1, 1], padding='SAME') + b
    

    def _relu(conv2d_layer):
        return tf.nn.relu(conv2d_layer)

    def _conv2d_relu(prev_layer, layer, layer_name):
        return _relu(_conv2d(prev_layer, layer, layer_name))

    def _avgpool(prev_layer):
        return tf.nn.avg_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

    graph = {}
    graph['input']   = tf.Variable(np.zeros((1, image_size[0], image_size[1], 3)), dtype = 'float32')
    graph['conv1_1']  = _conv2d_relu(graph['input'], 0, 'conv1_1')
    graph['conv1_2']  = _conv2d_relu(graph['conv1_1'], 2, 'conv1_2')
    graph['avgpool1'] = _avgpool(graph['conv1_2'])
    graph['conv2_1']  = _conv2d_relu(graph['avgpool1'], 5, 'conv2_1')
    graph['conv2_2']  = _conv2d_relu(graph['conv2_1'], 7, 'conv2_2')
    graph['avgpool2'] = _avgpool(graph['conv2_2'])
    graph['conv3_1']  = _conv2d_relu(graph['avgpool2'], 10, 'conv3_1')
    graph['conv3_2']  = _conv2d_relu(graph['conv3_1'], 12, 'conv3_2')
    graph['conv3_3']  = _conv2d_relu(graph['conv3_2'], 14, 'conv3_3')
    graph['conv3_4']  = _conv2d_relu(graph['conv3_3'], 16, 'conv3_4')
    graph['avgpool3'] = _avgpool(graph['conv3_4'])
    graph['conv4_1']  = _conv2d_relu(graph['avgpool3'], 19, 'conv4_1')
    graph['conv4_2']  = _conv2d_relu(graph['conv4_1'], 21, 'conv4_2')
    graph['conv4_3']  = _conv2d_relu(graph['conv4_2'], 23, 'conv4_3')
    graph['conv4_4']  = _conv2d_relu(graph['conv4_3'], 25, 'conv4_4')
    graph['avgpool4'] = _avgpool(graph['conv4_4'])
    graph['conv5_1']  = _conv2d_relu(graph['avgpool4'], 28, 'conv5_1')
    graph['conv5_2']  = _conv2d_relu(graph['conv5_1'], 30, 'conv5_2')
    graph['conv5_3']  = _conv2d_relu(graph['conv5_2'], 32, 'conv5_3')
    graph['conv5_4']  = _conv2d_relu(graph['conv5_3'], 34, 'conv5_4')
    graph['avgpool5'] = _avgpool(graph['conv5_4'])
    
    return graph

    

In [9]:
img_mean = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))
def reshape_and_normalize_image(image):
    image = np.reshape(image, ((1,) + image.shape))
    image = image - img_mean
    return image

def save_image(path, image):
    image = image + img_mean
    image = np.clip(image[0], 0, 255).astype('uint8')
    image = Image.fromarray(image)
    image.save(path)

In [10]:
tf.reset_default_graph()
sess = tf.InteractiveSession()

In [11]:
image_size = (380, 380)

In [12]:
content_img = mpimg.imread("images/content.jpg")
content_img = reshape_and_normalize_image(content_img)

In [13]:
style_img = mpimg.imread("images/style17.jpeg")
style_img = reshape_and_normalize_image(style_img)

In [14]:
def generate_noise_image(content_image, noise_ratio =.8):
    noise_image = np.random.uniform(-20, 20, (1, image_size[0], image_size[1], 3)).astype('float32')
    input_image = noise_image * noise_ratio + content_image * (1 - noise_ratio)
    
    return input_image

In [15]:
generated_img = generate_noise_image(content_img)

In [16]:
model = get_model("vgg16/imagenet-vgg-verydeep-19.mat")

In [17]:
sess.run(model['input'].assign(content_img))
out = model['conv4_2']
a_C = sess.run(out)
a_G = out
J_content = content_cost(a_C, a_G)

In [18]:
sess.run(model['input'].assign(style_img))
J_style = style_cost(model, STYLE_LAYERS)

In [19]:
J = total_cost(J_content, J_style)

In [20]:
optimizer = tf.train.AdamOptimizer(2.0)
train_step = optimizer.minimize(J)

In [21]:
def model_nn(sess, input_image, num_iterations = 500):
    
    sess.run(tf.global_variables_initializer())

    sess.run(model["input"].assign(input_image))
    
    for i in range(num_iterations):
    
        sess.run(train_step)
        generated_image = sess.run(model['input'])

        if i%20 == 0:
            Jt, Jc, Js = sess.run([J, J_content, J_style])
            print("Iteration " + str(i) + " :")
            print("total cost = " + str(Jt))
            print("content cost = " + str(Jc))
            print("style cost = " + str(Js))
            
            save_image("output/" + str(i) + ".jpg", generated_image)
    
    save_image('output/generated_image.jpg', generated_image)
    
    return generated_image

In [22]:
model_nn(sess, generated_img)

Iteration 0 :
total cost = 74764090.0
content cost = 20706.459
style cost = 186873980.0
Iteration 20 :
total cost = 27883420.0
content cost = 23629.246
style cost = 69667200.0
Iteration 40 :
total cost = 14945924.0
content cost = 24886.945
style cost = 37321256.0
Iteration 60 :
total cost = 9594389.0
content cost = 25384.666
style cost = 23941550.0
Iteration 80 :
total cost = 6810007.5
content cost = 25624.148
style cost = 16980176.0
Iteration 100 :
total cost = 5043969.0
content cost = 25732.902
style cost = 12564890.0
Iteration 120 :
total cost = 3779967.0
content cost = 25735.154
style cost = 9404881.0
Iteration 140 :
total cost = 2872944.0
content cost = 25770.639
style cost = 7137261.0
Iteration 160 :
total cost = 2218989.5
content cost = 25752.883
style cost = 5502406.0
Iteration 180 :
total cost = 1739431.6
content cost = 25735.393
style cost = 4303542.0
Iteration 200 :
total cost = 1389082.2
content cost = 25733.111
style cost = 3427672.8
Iteration 220 :
total cost = 1135101.2


array([[[[104.65849  ,  84.35758  ,  78.292076 ],
         [ 25.24112  ,  47.26096  ,  22.872252 ],
         [ 47.489117 ,  57.305836 ,  47.275394 ],
         ...,
         [ 19.811752 ,  25.29337  ,   5.6567483],
         [  8.491561 ,  35.08746  ,  11.81553  ],
         [ -7.101299 ,  32.929398 ,  11.810356 ]],

        [[ 48.601173 ,  10.316775 ,  34.20858  ],
         [ -5.178976 , -18.49542  , -42.708637 ],
         [ -6.8978996, -57.2401   , -24.570187 ],
         ...,
         [ 14.9425125,   6.048366 ,  18.99626  ],
         [ 13.37573  ,  17.87961  ,   8.514842 ],
         [ 17.106684 ,  33.07383  ,  11.855846 ]],

        [[ 40.378613 ,  46.13308  ,  30.95661  ],
         [ -3.0625417,  -9.44101  ,   9.4996395],
         [  2.701304 ,  24.009508 ,  27.612526 ],
         ...,
         [ 30.359495 ,  16.813828 ,   9.163417 ],
         [  7.752055 ,  13.416243 ,   6.580846 ],
         [ 28.526432 ,  27.44254  ,  18.654036 ]],

        ...,

        [[ 50.119385 ,  21.369043 ,  3