In [1]:
import tensorflow as tf
import scipy.io
import numpy as np
from PIL import Image
import os

In [2]:
VGG_path = os.path.join('data', 'imagenet-vgg-verydeep-19.mat')
VGG_raw = scipy.io.loadmat(VGG_path)
VGG_net = VGG_raw['layers'][0]
mean_pixels = VGG_raw['meta'][0][0][2][0][0][2][0][0]

'''
VGG_net[x][0][0][y][0]

x: Layer number

y = 0: Name of layer
y = 1: Type of layer {conv, relu, pool}
y = 2: Value in layer {weights and biases, 0, type of pooling}
'''

def get_weights(vgg, layer):
    weights = tf.constant(vgg[layer][0][0][2][0][0])
    temp = vgg[layer][0][0][2][0][1]
    biases = tf.constant(np.reshape(temp, (temp.size)))
    return weights, biases

def pooling_type(vgg, layer):
    return vgg[layer][0][0][2][0]

In [3]:
def convolution(layer_input, weights, biases):
    conv = tf.nn.conv2d(layer_input, weights, [1, 1, 1, 1], padding='SAME')
    conv = conv + biases
    return conv

def relu(layer_input):
    reLU = tf.nn.relu(layer_input)
    return reLU

def pool(layer_input, pooling_type):
    if pooling_type == 'max':
        pooling = tf.nn.max_pool(layer_input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    else:
        pooling = tf.nn.avg_pool(layer_input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    return pooling

In [4]:
'''
Process images
'''
style_img_path = os.path.join('images', 'starry_night.jpg')
content_img_path = os.path.join('images', 'new_york.jpg')

def preprocess(img):
    # Change BGR to RGB
    img = img[...,::-1]
    # Reshape (h, w, d) to (1, h, w, d)
    img = img[np.newaxis,:,:,:]
    img = img - np.array(mean_pixels).reshape((1,1,1,3))
    return img

def postprocess(img):
    img += np.array(mean_pixels).reshape((1,1,1,3))
    # Reshape (1, h, w, d) to (h, w, d)
    img = img[0]
    img = np.clip(img, 0, 255).astype('uint8')
    # RGB to BGR
    img = img[...,::-1]
    return img

def read_img(img_path):
    img = Image.open(img_path)
    img = np.asarray(img, dtype=np.uint8)
    img = preprocess(img)
    return img

style_image = read_img(style_img_path)
content_image = read_img(content_img_path)

style_shape = np.shape(style_image)
content_shape = np.shape(content_image)

In [5]:
'''
Build network
    conv1_1, relu1_1, conv1_2, relu1_2, pool1
    conv2_1, relu2_1, conv2_2, relu2_2, pool2
    conv3_1, relu3_1, conv3_2, relu3_2, conv3_3, relu3_3, conv3_4, relu3_4, pool2
    conv4_1, relu4_1, conv4_2, relu4_2, conv4_3, relu4_3, conv4_4, relu4_4, pool2
    conv5_1, relu5_1, conv5_2, relu5_2, conv5_3, relu5_3, conv5_4, relu5_4, pool2
'''

def build_network(image, vgg):
    net = {}
    net['input'] = image
    
    # Layer 1
    print("Building LAYER 1...")
    W, b = get_weights(vgg, 0)
    net['conv1_1'] = convolution(net['input'], W, b)
    net['relu1_1'] = relu(net['conv1_1'])
    W, b = get_weights(vgg, 2)
    net['conv1_2'] = convolution(net['relu1_1'], W, b)
    net['relu1_2'] = relu(net['conv1_2'])
    pooling = pooling_type(vgg, 4)
    net['pool1'] = pool(net['relu1_2'], pooling)
    
    # Layer 2
    print("Building LAYER 2...")
    W, b = get_weights(vgg, 5)
    net['conv2_1'] = convolution(net['pool1'], W, b)
    net['relu2_1'] = relu(net['conv2_1'])
    W, b = get_weights(vgg, 7)
    net['conv2_2'] = convolution(net['relu2_1'], W, b)
    net['relu2_2'] = relu(net['conv2_2'])
    pooling = pooling_type(vgg, 9)
    net['pool2'] = pool(net['relu2_2'], pooling)
    
    # Layer 3
    print("Building LAYER 3...")
    W, b = get_weights(vgg, 10)
    net['conv3_1'] = convolution(net['pool2'], W, b)
    net['relu3_1'] = relu(net['conv3_1'])
    W, b = get_weights(vgg, 12)
    net['conv3_2'] = convolution(net['relu3_1'], W, b)
    net['relu3_2'] = relu(net['conv3_2'])
    W, b = get_weights(vgg, 14)
    net['conv3_3'] = convolution(net['relu3_2'], W, b)
    net['relu3_3'] = relu(net['conv3_3'])
    W, b = get_weights(vgg, 16)
    net['conv3_4'] = convolution(net['relu3_3'], W, b)
    net['relu3_4'] = relu(net['conv3_4'])
    pooling = pooling_type(vgg, 18)
    net['pool3'] = pool(net['relu3_4'], pooling)
    
    # Layer 4
    print("Building LAYER 4...")
    W, b = get_weights(vgg, 19)
    net['conv4_1'] = convolution(net['pool3'], W, b)
    net['relu4_1'] = relu(net['conv4_1'])
    W, b = get_weights(vgg, 21)
    net['conv4_2'] = convolution(net['relu4_1'], W, b)
    net['relu4_2'] = relu(net['conv4_2'])
    W, b = get_weights(vgg, 23)
    net['conv4_3'] = convolution(net['relu4_2'], W, b)
    net['relu4_3'] = relu(net['conv4_3'])
    W, b = get_weights(vgg, 25)
    net['conv4_4'] = convolution(net['relu4_3'], W, b)
    net['relu4_4'] = relu(net['conv4_4'])
    pooling = pooling_type(vgg, 27)
    net['pool4'] = pool(net['relu4_4'], pooling)
    
    # Layer 5
    print("Building LAYER 5...")
    W, b = get_weights(vgg, 28)
    net['conv5_1'] = convolution(net['pool4'], W, b)
    net['relu5_1'] = relu(net['conv5_1'])
    W, b = get_weights(vgg, 30)
    net['conv5_2'] = convolution(net['relu5_1'], W, b)
    net['relu5_2'] = relu(net['conv5_2'])
    W, b = get_weights(vgg, 32)
    net['conv5_3'] = convolution(net['relu5_2'], W, b)
    net['relu5_3'] = relu(net['conv5_3'])
    W, b = get_weights(vgg, 34)
    net['conv5_4'] = convolution(net['relu5_3'], W, b)
    net['relu5_4'] = relu(net['conv5_4'])
    pooling = pooling_type(vgg, 36)
    net['pool5'] = pool(net['relu5_4'], pooling)
    
    
    print("... Finished Building")
    
    return net

In [8]:
CONTENT_LAYERS = ('relu4_2', 'relu5_2')
STYLE_LAYERS = ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')

content = {}
style = {}

# Graph to compute content and style features
feature_g = tf.Graph()
with feature_g.as_default(), tf.device('/cpu:0'):
    
    # Placeholder for images
    content_img = tf.placeholder(tf.float32, shape=content_shape)
    style_img = tf.placeholder(tf.float32, shape=style_shape)
    
    # VGG nets
    content_net = build_network(content_img, VGG_net)
    style_net = build_network(style_img, VGG_net)
    
# Run graph and compute features
with tf.Session(graph=feature_g) as session:
    for layer in CONTENT_LAYERS:
        content[layer] = content_net[layer].eval(feed_dict={content_img: content_image, style_img: style_image})
    for layer in STYLE_LAYERS:
        style[layer] = style_net[layer].eval(feed_dict={content_img: content_image, style_img: style_image})

Building LAYER 1...
Building LAYER 2...
Building LAYER 3...
Building LAYER 4...
Building LAYER 5...
... Finished Building
Building LAYER 1...
Building LAYER 2...
Building LAYER 3...
Building LAYER 4...
Building LAYER 5...
... Finished Building


In [None]:
iterations = 500

# Graph for stylizing image
style_g = tf.Graph()
with style_g.as_default(), tf.device('/cpu:0'):
    
    # Initialize output image
    output_img = tf.Variable(tf.random_normal(content_shape))
    net = build_network(output_img, VGG_net)
    
    # Content Loss
    content_losses = []
    for content_layer in CONTENT_LAYERS:
        content_losses.append(tf.nn.l2_loss(net[content_layer] - content[content_layer]))
    L_content = tf.reduce_sum(content_losses)
    
    # Style Loss
    style_losses = []
    for style_layer in STYLE_LAYERS:
        layer = net[style_layer]
        _, height, width, number = map(lambda i: i.value, layer.get_shape())
        size = height * width * number
        features = tf.reshape(layer, (-1, number))
        gram = tf.matmul(tf.transpose(features), features) / size
        style_features = tf.reshape(style[style_layer], (-1, number))
        style_gram = tf.matmul(tf.transpose(style_features), style_features) / size
        style_losses.append(0.5 * tf.nn.l2_loss(gram - style_gram) / np.asarray(style_gram).size)
    L_style = tf.reduce_sum(style_losses)
    
    # Total Loss
    alpha = 0.5 # content weight
    beta = 0.5 # style weight
    L_total = alpha * L_content + beta * L_style
    
    # Optimization
    learning_rate = 1e1
    beta1 = 0.9
    beta2 = 0.99
    epsilon = 1e-08
    optimizer = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(L_total)
    
    def progress():
        print("\tContent Loss: %g", L_content.eval())
        print("\t  Style Loss: %g", L_style.eval())
        print("\t  Total Loss: %g", L_total.eval())
        
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        print("Initialized")
        for step in range(iterations):
            print("Iteration: ", step)
            optimizer.run()
            if (steps % 50 == 0):
                progress()
    

Building LAYER 1...
Building LAYER 2...
Building LAYER 3...
Building LAYER 4...
Building LAYER 5...
... Finished Building
Initialized
Iteration: %d 0
