# AnimeGM - Style Transfer
GOAL: Generate new anime-style images

View model: `$ tensorboard --logdir=autoencoder`

Methodology:
1. Load the images and preprocess them to a consistent size, and shape
2. Build models for a, p, x using a pretrained model (VGG16)
3. Construct a placeholder array for style weights if the style layers
4. Extract feature representations to construct p and a, for each selected layer
5. Optimize with the BFGS algorithm

Important Functions:
1. Define a function to calculate the content loss
2. Define a function to calculate the style loss
3. Define a function to calculate the total loss
4. Define a function to calculate the gradient

### Imports

In [1]:
import tensorflow.contrib.keras.api.keras.backend as K
from tensorflow.contrib.keras.api.keras.applications import vgg19
import os
import matplotlib.pyplot as plt
import numpy as np
from scipy.misc import imresize, imsave
from scipy.optimize import fmin_l_bfgs_b
import seaborn as sns; sns.set()
import tensorflow as tf
%matplotlib inline

  from ._conv import register_converters as _register_converters


### Functions

In [2]:
def show_image(image, resize_dim):
    """Shows an image.
    image: image data to show.
    resize_dim: the number of pixels the image should be per size.
    """
    image = image.reshape((resize_dim, resize_dim, 3))

    plt.imshow(image, cmap='gray')
    plt.xticks([])
    plt.yticks([])
    plt.show()

In [3]:
def preprocess(img_path, resize_dim):
    # load the image from file system
    img = tf.keras.preprocessing.image.load_img(img_path)

    # convert it to an array
    img = tf.keras.preprocessing.image.img_to_array(img)

    # resize it
    img = imresize(img, (resize_dim, resize_dim, 3))

    # cast the image to a float64 
    img = img.astype('float32')
#     img = img.astype('float64')

    # add a batch number
    img = np.expand_dims(img, axis=0)

    # add the mean pixel values
    img = vgg19.preprocess_input(img)
    
    # show the image (it will be clipped)
    show_image(img, resize_dim)

    return img

In [4]:
def content_loss(content, gen):
    """get the Euclidean distance between outputs of more for the content 
    and generated image at a specific layer"""
    # why sum of squared errors?
    # why not the mean squared errors?
    return K.sum(K.square(gen - content))

In [5]:
def gram_matrix(x):
    """calculates the gram matrix.
    dot product of the flattened feature map and transposed 
    flatten feature map"""
    assert K.ndim(x) == 3
    features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram    

In [6]:
def style_loss(style, gen, resize_dim, channels):
    """calculates the Euclidean distance between gram matrices of 
    the feature maps of the inputs"""
    assert K.ndim(style) == 3
    assert K.ndim(gen) == 3
    S = gram_matrix(style)
    G = gram_matrix(gen)
    size = resize_dim * resize_dim
    
    # divide by $ * size^2 * channels^2
    return K.sum(K.square(S - G) / (4. * (channels ** 2) * (size ** 2)))

In [7]:
def eval_loss_and_grads(x, f_output, resize_dim, channels):
    x = x.reshape((1, resize_dim, resize_dim, channels))
    outs = f_output([x])
    loss_value = outs[0]
    if len(outs[1:]) == 1:
        grad_values = outs[1].flatten().astype('float64')
    else: 
        grad_values = np.array(outs[1:]).flatten().astype('float64')
    return loss_value, grad_values

In [8]:
class Evaluator(object):
    def __init__(self):
        self.loss_value = None
        self.grads_value = None
        
    def loss(self, x):
        assert self.loss_value is None
        loss_value, grad_value = eval_loss_and_grads(x, f_output, resize_dim, channels)
        self.loss_value = loss_value
        self.grads_value = grad_value
        return self.loss_value
    
    def grads(self, x):
        assert self.loss_value is not None
        grads_values = np.copy(self.grads_value)
        self.loss_value = None
        self.grads_value = None
        return grads_values

In [9]:
def deprocess_image(img, resize_dim, channels):
    img = img.reshape((resize_dim, resize_dim, channels))
    
    # Remove zero-center by mean pixel
    img[:, :, 0] += 103.939
    img[:, :, 1] += 116.779
    img[:, :, 2] += 123.68
    # 'BGR'->'RGB'
    img = img[:, :, ::-1]
    img = np.clip(img, 0, 255).astype('uint8')
    return img

### Variables

In [10]:
# data dir
content_path = 'images/content.png'
style_path = 'images/style2.jpg'

# data manipulation
resize_dim = 128
channels = 3

In [11]:
# hyper parameters
content_weight = 0.5
style_weight = 0.5
iterations = 10
loss_iterations = 1000

## Configure GPUs

In [12]:
os.environ["CUDA_VISIBLE_DEVICES"]="0"

NameError: name 'os' is not defined

## 1. Load images
content image = p  
style image = a  
generated image = x

In [None]:
content_img = preprocess(content_path, resize_dim)
style_img = preprocess(style_path, resize_dim)

## Build the model

In [None]:
# load images into vars
content_img_var = K.variable(content_img)
style_img_var = K.variable(style_img)
gen_img = K.placeholder(shape=(1, resize_dim, resize_dim, 3))

# batch them all together
input_tensor = K.concatenate([content_img_var, style_img_var, gen_img], axis=0)

# load the trained VGG19 model with imagenet weights, and specify an input tensor
model = vgg19.VGG19(include_top=False, weights='imagenet', input_tensor=input_tensor)

# make a dictionary of all the names and outputs of the model layers
outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])

## Losses

### Content Loss

In [None]:
# use the feature map
loss = 0

# higher layers in the model are better for overall shapes
layer_features = outputs_dict['block5_conv2']

# index 0 because of our input tensor batching
content_img_features = layer_features[0, :, :, :] 

# index 2 because of our input tensor batching
gen_img_features = layer_features[2, :, :, :]

# use content_weight to give weight to the amount to use in the mix
loss += content_weight * content_loss(content_img_features, gen_img_features)

### Style loss

In [None]:
# grab layer names for our style loss
# notice that it's the first conv layer of each block
feature_layer_names = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']

# get loss for each feature layer
for name in feature_layer_names:
    # grab the layer
    layer_features = outputs_dict[name]
    
    # index 1 because of our input tensor batching
    style_features = layer_features[1, :, :, :]
    
    # index 2 because of our input tensor batching
    gen_img_features = layer_features[2, :, :, :]
    
    # get the style loss
    s1 = style_loss(style_features, gen_img_features, resize_dim, channels)
    
    # use style_weight to give weight to the amount to use in the mix
    # scale it based it on the number of faeture layers
    loss += (style_weight / len(feature_layer_names)) * s1

## Evaluating the gradients and loss

In [None]:
grads = K.gradients(loss, gen_img)

outputs = [loss]

if isinstance(grads, (list, tuple)):
    outputs += grads
else:
    outputs.append(grads)
    
f_output = K.function([gen_img], outputs)

## Optimization

In [None]:
evaluator = Evaluator()

x = content_img

for i in range(iterations):
    print('Step {}'.format(i))
    
    # run the L-BFGS optimizer
    # this is the least memory intensive
    # the more iterations the better
    x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(), fprime=evaluator.grads, maxiter=loss_iterations)
    print('     loss: {}'.format(min_val))

    # save img
    img = deprocess_image(x, resize_dim, channels)
    show_image(img, resize_dim)
    imsave('output/img{}.jpg'.format(i), img)