In [1]:
import numpy as np
import tensorflow as tf

import sys
sys.path.insert(1, '../')
from utils import *

In [2]:
ENCODER_LAYERS = (
    'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',

    'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',

    'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 
    'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3',

    'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 
    'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4',

    'conv5_1', 'relu5_1'
)

In [3]:
def conv2d(x, kernel, bias):
    # padding image with reflection mode
    x_padded = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='REFLECT')

    # conv and add bias
    out = tf.nn.conv2d(x_padded, kernel, strides=[1, 1, 1, 1], padding='VALID')
    out = tf.nn.bias_add(out, bias)

    return out

def pool2d(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

In [4]:
class Encoder:
    def __init__(self, weights_path):
        # load weights (kernel and bias) from npz file
        weights = np.load(weights_path)

        idx = 0
        self.weight_vars = []

        # create the TensorFlow variables
        with tf.variable_scope('encoder'):
            for layer in ENCODER_LAYERS:
                kind = layer[:4]

                if kind == 'conv':
                    kernel = weights['arr_%d' % idx].transpose([2, 3, 1, 0])
                    bias   = weights['arr_%d' % (idx + 1)]
                    kernel = kernel.astype(np.float32)
                    bias   = bias.astype(np.float32)
                    idx += 2

                    with tf.variable_scope(layer):
                        W = tf.Variable(kernel, trainable=False, name='kernel')
                        b = tf.Variable(bias,   trainable=False, name='bias')

                    self.weight_vars.append((W, b))

    def encode(self, image):
        # create the computational graph
        idx = 0
        layers = {}
        current = image

        for layer in ENCODER_LAYERS:
            kind = layer[:4]

            if kind == 'conv':
                kernel, bias = self.weight_vars[idx]
                idx += 1
                current = conv2d(current, kernel, bias)

            elif kind == 'relu':
                current = tf.nn.relu(current)

            elif kind == 'pool':
                current = pool2d(current)

            layers[layer] = current

        assert(len(layers) == len(ENCODER_LAYERS))

        enc = layers[ENCODER_LAYERS[-1]]

        return layers

    def preprocess(self, image, mode='BGR'):
        if mode == 'BGR':
            return image - np.array([103.939, 116.779, 123.68])
        else:
            return image - np.array([123.68, 116.779, 103.939])

    def deprocess(self, image, mode='BGR'):
        if mode == 'BGR':
            return image + np.array([103.939, 116.779, 123.68])
        else:
            return image + np.array([123.68, 116.779, 103.939])

In [9]:
TRAINING_CONTENT_DIR = '../../_input/content'
TRAINING_STYLE_DIR = '../../_input/style'
ENCODER_WEIGHTS_PATH = '../vgg19_normalised.npz'

TRAINING_IMAGE_SHAPE = (256, 256, 3) # (height, width, color_channels)
HEIGHT, WIDTH, CHANNELS = TRAINING_IMAGE_SHAPE

content_imgs_path = list_images(TRAINING_CONTENT_DIR)
style_imgs_path   = list_images(TRAINING_STYLE_DIR)

In [10]:
encoder = Encoder(ENCODER_WEIGHTS_PATH)

In [11]:
content_batch_path = content_imgs_path[0:8]
style_batch_path   = style_imgs_path[0:8]

content_batch = get_train_images(content_batch_path, crop_height=HEIGHT, crop_width=WIDTH)
style_batch   = get_train_images(style_batch_path,   crop_height=HEIGHT, crop_width=WIDTH)

In [12]:
content_batch = content_batch.astype('float32') 

In [13]:
enc_c_layers = encoder.encode(content_batch)

In [14]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    layer = sess.run(enc_c_layers)

In [15]:
layer.keys()

dict_keys(['relu3_3', 'relu4_3', 'conv3_1', 'pool1', 'conv5_1', 'relu3_2', 'conv4_2', 'relu4_1', 'relu1_2', 'relu3_4', 'relu1_1', 'conv3_3', 'conv3_2', 'conv4_4', 'conv4_3', 'conv2_1', 'conv2_2', 'relu4_2', 'pool3', 'relu2_2', 'conv1_2', 'relu3_1', 'pool4', 'conv1_1', 'relu4_4', 'conv3_4', 'conv4_1', 'pool2', 'relu5_1', 'relu2_1'])

In [16]:
layer['conv4_1']

array([[[[-1.47759140e-01,  2.99163318e+00,  1.02709568e+00, ...,
           1.89978385e+00,  9.27699327e-01, -5.79483271e+00],
         [-1.47759140e-01,  2.99163318e+00,  1.02709568e+00, ...,
           1.89978385e+00,  9.27699327e-01, -5.79483271e+00],
         [-1.47759140e-01,  2.99163318e+00,  1.02709568e+00, ...,
           1.89978385e+00,  9.27699327e-01, -5.79483271e+00],
         ...,
         [ 6.02460504e-01, -3.59573030e+00, -9.36682343e-01, ...,
          -1.63212395e+00,  5.87595761e-01, -9.75105190e+00],
         [ 2.31873721e-01,  3.73829246e+00,  2.70697808e+00, ...,
          -2.40466285e+00,  1.75160336e+00, -1.11949644e+01],
         [-3.70196602e-03,  3.76883173e+00,  1.16560662e+00, ...,
          -2.02361584e+00,  9.15913105e-01, -1.15648298e+01]],

        [[-1.47759140e-01,  2.99163318e+00,  1.02709568e+00, ...,
           1.89978385e+00,  9.27699327e-01, -5.79483271e+00],
         [-1.47759140e-01,  2.99163318e+00,  1.02709568e+00, ...,
           1.89978385e

In [None]:
content = tf.reverse(content_batch, axis=[-1])
style   = tf.reverse(style_batch,   axis=[-1])

In [None]:
content = encoder.preprocess(content, mode='BGR')
style   = encoder.preprocess(style, mode='BGR')

In [None]:
# enc_c, enc_c_layers = encoder.encode(content)
# enc_s, enc_s_layers = encoder.encode(style)