In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
import utils
from vgg.imagenet_classes import class_names
from vgg.VGG import generate_VGG16

In [3]:
print("TensorFlow version : {}".format(tf.__version__))
print("Devices : {}".format(utils.get_tensorflow_devices()))

TensorFlow version : 1.1.0
Devices : [('/cpu:0', 'CPU')]


In [4]:
###############################################################################
# Constants for the image input and output.
###############################################################################


LOGS = 'logs/4.0'
# Output folder for the images.
OUTPUT_DIR = 'output/'
# Style image to use.
STYLE_IMAGE = 'images/udnie.jpg'
# Content image to use.
CONTENT_IMAGE = 'images/hongkong.jpg'
# Image dimensions constants. 

IMG_W = 256
IMG_H = 256
CHANNELS = 3

MODEL_WEIGHTS = 'vgg/vgg16.npy'

DATA_DIR_CONTENT = 'COCO/train2014/'
DATA_DIR_STYLE = 'COCO/train2014/'

In [5]:
if tf.gfile.Exists(LOGS):
    tf.gfile.DeleteRecursively(LOGS)
tf.gfile.MakeDirs(LOGS)

In [6]:
# Constant to put more emphasis on content loss.
BETA = 1
# Constant to put more emphasis on style loss.
ALPHA = 25

In [7]:
BATCH_SIZE = 4

In [8]:
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.InteractiveSession(config=config)

In [9]:
with tf.name_scope('style_batch'):
    style_batch = tf.placeholder(tf.float32,
                                  shape=(BATCH_SIZE, IMG_W, IMG_H, CHANNELS),
                                  name='style_batch')
    
with tf.name_scope('content_batch'):
    content_batch = tf.placeholder(tf.float32,
                                  shape=(BATCH_SIZE, IMG_W, IMG_H, CHANNELS),
                                  name='content_batch')

In [10]:
from vgg.VGG import generate_VGG16

In [11]:
with tf.name_scope("style_encoder_VGG"):
    style_encoder_VGG, vgg_scope = generate_VGG16(weights_file=MODEL_WEIGHTS,
                                                  apply_preprocess=True,
                                                  remove_top=True,
                                                  input_tensor=style_batch)
    
with tf.name_scope("content_encoder_VGG"):
    content_encoder_VGG, _ = generate_VGG16(weights_file=MODEL_WEIGHTS,
                                                  scope=vgg_scope,
                                                  apply_preprocess=True,
                                                  remove_top=True,
                                                  input_tensor=content_batch)

In [12]:
with tf.name_scope("Adaptive_IN"):
    eps = 1e-6
    
    encoded_style = style_encoder_VGG['conv4_1']
    encoded_content = content_encoder_VGG['conv4_1']
    
    mean_c, var_c = tf.nn.moments(encoded_content, [1,2], keep_dims=True)
    mean_s, var_s = tf.nn.moments(encoded_style, [1,2], keep_dims=True)
    
    target = tf.sqrt(var_s)*(encoded_content - mean_c)/(tf.sqrt(var_c) + eps) + mean_s

In [13]:
print("encoded_content.shape : ", encoded_content.shape)
print("encoded_style.shape : ", encoded_style.shape)

print("mean_c.shape : ", mean_c.shape)
print("var_c.shape: ", var_c.shape)

print("mean_s.shape : ", mean_s.shape)
print("var_s.shape: ", var_s.shape)

print("target.shape : ", target.shape)

encoded_content.shape :  (4, 32, 32, 512)
encoded_style.shape :  (4, 32, 32, 512)
mean_c.shape :  (4, 1, 1, 512)
var_c.shape:  (4, 1, 1, 512)
mean_s.shape :  (4, 1, 1, 512)
var_s.shape:  (4, 1, 1, 512)
target.shape :  (4, 32, 32, 512)


#### Moments for Batch Normalization

```python
mean, var = tf.nn.moments(encoded_content, [0,1,2], keep_dims=True)
print("mean.shape : ", mean.shape)
print("var.shape: ", var.shape)
```
```
mean.shape :  (1, 1, 1, 512)
var.shape:  (1, 1, 1, 512)
```

In [14]:
def _conv_block(prev_layer,kernel_size, nb_fmaps, stride, layer_name, relu=True):
    with tf.name_scope(layer_name):
        input_fmaps = prev_layer.get_shape().as_list()[-1]
        w = tf.Variable(tf.truncated_normal([kernel_size, kernel_size, input_fmaps, nb_fmaps],
                                            stddev=0.1), name='W')
        b = tf.Variable(tf.constant(1.0, shape=[nb_fmaps]), name='b')
        conv = tf.nn.conv2d(prev_layer, w, [1, stride, stride, 1], padding='SAME')
        if relu:
            return tf.nn.relu(tf.nn.bias_add(conv, b), name=layer_name)
        else:
            return tf.nn.bias_add(conv, b, name=layer_name)        

### Up Sampling in TensorFlow : 

In [15]:
def _up_sampling(prev_layer, up_factor, layer_name):
    with tf.name_scope(layer_name):
        b,h,w,c = prev_layer.get_shape().as_list()
        return tf.image.resize_nearest_neighbor(prev_layer,[int(w*up_factor), int(h*up_factor)],name=layer_name)

**Example** :

```python
x = tf.Variable(tf.truncated_normal([16, 8, 8, 56], stddev=2.0)
sess.run(tf.global_variables_initializer())
x_np = sess.run(x)
print(x_np.shape)
# output : (16, 8, 8, 56)


up_x = _up_sampling(x, 2, 'test_up')
up_x_np = sess.run(up_x)
print(up_x_np.shape)
# output : (16, 16, 16, 56)
```

In [16]:
with tf.name_scope('decoder'):
    decoder = {}
    decoder['conv_block1_1'] = _conv_block(target, 3, 256 , 1, 'conv_block1_1')
    decoder['up_sampling1'] = _up_sampling(decoder['conv_block1_1'], 2, 'up_sampling1')
    
    decoder['conv_block2_1'] = _conv_block(decoder['up_sampling1'], 3, 256 , 1, 'conv_block2_1')
    decoder['conv_block2_2'] = _conv_block(decoder['conv_block2_1'], 3, 256 , 1, 'conv_block2_2')
    decoder['conv_block2_3'] = _conv_block(decoder['conv_block2_2'], 3, 128 , 1, 'conv_block2_3')
    decoder['up_sampling2'] = _up_sampling(decoder['conv_block2_3'], 2, 'up_sampling2')
    
    decoder['conv_block3_1'] = _conv_block(decoder['up_sampling2'], 3, 128 , 1, 'conv_block3_1')
    decoder['conv_block3_2'] = _conv_block(decoder['conv_block3_1'], 3, 64 , 1, 'conv_block3_2')
    decoder['up_sampling3'] = _up_sampling(decoder['conv_block3_2'], 2, 'up_sampling3')
    
    decoder['conv_block4_1'] = _conv_block(decoder['up_sampling3'], 3, 64 , 1, 'conv_block3_1')
    decoder['conv_block4_2'] = _conv_block(decoder['conv_block4_1'], 3, 64 , 1, 'conv_block4_2')

    decoder['final_conv'] = _conv_block(decoder['conv_block4_1'], 9, 3, 1, 'final_conv', relu=False)
    decoder['output'] = tf.multiply(tf.tanh(decoder['final_conv']/255.0), 255, name="output")

In [17]:
with tf.name_scope("VGG_content_loss"):
    vgg_content_loss, _ = generate_VGG16(weights_file=MODEL_WEIGHTS,
                                        scope=vgg_scope,
                                        apply_preprocess=True,
                                        remove_top=True,
                                        input_tensor=decoder['output'])

In [18]:
with tf.name_scope('content_loss'):
    content_loss = tf.reduce_mean(tf.pow(vgg_content_loss['conv4_1'] - target, 2))

In [19]:
with tf.name_scope("VGG_style_loss"):
    vgg_style_loss, _ = generate_VGG16(weights_file=MODEL_WEIGHTS,
                                        scope=vgg_scope,
                                        apply_preprocess=True,
                                        remove_top=True,
                                        input_tensor=decoder['output'])

In [20]:
STYLE_LAYERS = [
    ('conv1_2', 1.0),
    ('conv2_2', 1.0),
    ('conv3_3', 1.0),
    ('conv4_3', 1.0)
]

def _gram_matrix_tf(F, B, N, M):
    F = tf.reshape(F, (B, M, N))
    return ( 1 / M) * tf.matmul(tf.transpose(F, perm=[0,2,1]), F)

with tf.name_scope("style_loss"):
    style_loss = 0 
    for layer_name, weight in STYLE_LAYERS:
        
        shape = vgg_style_loss[layer_name].get_shape().as_list()
        B = shape[0] # batch_size
        N = shape[3] # number of feature maps
        M = shape[1] * shape[2] # number of features per feature map

        G_style = _gram_matrix_tf(style_encoder_VGG[layer_name], B, N, M) # works on Numpy array
        G = _gram_matrix_tf(vgg_style_loss[layer_name],B, N, M) # works on Tensor 
        
        style_loss += weight * tf.reduce_mean(tf.pow(G - G_style, 2))
        

In [21]:
with tf.name_scope('total_loss'):
    total_loss = BETA * content_loss + ALPHA * style_loss
    tf.summary.scalar('total_loss', total_loss)

In [22]:
merged = tf.summary.merge_all()
writer = tf.summary.FileWriter(LOGS, sess.graph)

In [23]:
with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer(0.02)
    train_step = optimizer.minimize(total_loss)

In [24]:
sess.run(tf.global_variables_initializer())

In [26]:
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


In [None]:
style_generator = ImageDataGenerator(rotation_range=20,
                                     width_shift_range=0.2,
                                     height_shift_range=0.2,
                                     vertical_flip=True).flow_from_directory(DATA_DIR_STYLE,
                                                         target_size=(IMG_W, IMG_H),
                                                         batch_size=BATCH_SIZE,
                                                         class_mode=None)


content_generator = ImageDataGenerator(rotation_range=20,
                                     width_shift_range=0.2,
                                     height_shift_range=0.2,
                                     vertical_flip=True).flow_from_directory(DATA_DIR_CONTENT,
                                                         target_size=(IMG_W, IMG_H),
                                                         batch_size=BATCH_SIZE,
                                                         class_mode=None)


In [None]:
%%time

ITERATIONS = 2000

for it in range(ITERATIONS):
    content = content_generator.next()
    style = style_generator.next()
    
    feed={}
    feed[content_batch] = content
    feed[style_batch] = style
    
    if it%50 == 49 : 
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()
        
        summary, _ = sess.run([merged, train_step],
                              feed_dict=feed,
                              options=run_options,
                              run_metadata=run_metadata)
            
        writer.add_summary(summary, it)
        writer.add_run_metadata(run_metadata, 'step%03d' % i)
            
    else :
        summary, _ = sess.run([merged, train_step], feed_dict=feed)
        writer.add_summary(summary, it)

    if it%500 == 0:
        
        feed={}
        feed[content_batch] = [content_image[0]]*BATCH_SIZE
        feed[style_batch] = [style_image[0]]*BATCH_SIZE
        
        _image = sess.run(decoder['output'], feed_dict=feed)
        print('Iteration %d' % (it))
        filename = 'output/stylized_feedforward_IN_iter{}.png'.format(it)
        utils.save_image(filename, _image)
    elif it%50 == 0:
        print("--> {} / {}".format(it, ITERATIONS))