# Image Style Transferの実装

--------------------------------
Gatys, Ecker, and Bethge(2016)の画像スタイル変換を実装する。

【参考文献】  
L. A. Gatys, A. S. Ecker, and M. Bethge,  
Image style transfer using convolutional neural networks,  
In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pages, 2414-2423, 2016.

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
CONTENT_FILE = '/home/ishiyama/image_style_transfer/image/input/test_input_01.JPG'
STYLE_FILE = '/home/ishiyama/image_style_transfer/image/style/test_style_01.jpg'

In [3]:
def read_images_as_jpeg(content_file, style_file):
    
    """
    JPEG Image Reader
    
    This function reads the content and style images as JPEG format.
    These image data must be square for now, different height and
    width will be able to supplied for future.
    
    Args:
        content_file : str. A path of the image file which you want
                       to convert its style.
        style_file   : str. A path of the image file which has the
                       style you want.
    
    Returns:
        A tuple. Each Elements are Tensor object of the read images.
    """
    
    filename_queue = tf.train.string_input_producer([content_file, style_file])
    reader = tf.WholeFileReader()
    key, value = reader.read(filename_queue)
    images = tf.image.decode_jpeg(value)
    
    return images

In [4]:
images = read_images_as_jpeg(
    content_file=CONTENT_FILE,
    style_file=STYLE_FILE
)

In [5]:
images

<tf.Tensor 'DecodeJpeg:0' shape=(?, ?, ?) dtype=uint8>

# VGGを実装する
------------------------
画像の特徴量を抽出するアルゴリズムにはSimonyan and Zisserman(2015)で提案されたCNN(VGG19)の畳込み層とプーリング層が使われている。  
ここでは、「TensorFlowで学ぶディープラーニング入門」の多層CNNの実装を参考にVGG19を構築する。  

【参考文献】  
K. Simonyan and A. Zisserman, Very Deep Convolutional Networks For Large-Scale Image Recognition, arXiv: 1409.1556v6, 2015  
中井悦司, TensorFlowで学ぶディープラーニング入門〜畳み込みニューラルネットワーク徹底解説, マイナビ出版, 2016

### 畳み込み層を実装する

In [7]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

# 現在は読み込んだ後の画像の利用方法を調べているので、
# TensorFlowで学ぶディープラーニング入門に載っている画像インポートの
# コードをそのまま借用する
mnist = input_data.read_data_sets('/tmp/data/', one_hot=True)

filters = 32
image_height = 28
image_width = 28
image_channels = 1

image_size = image_height * image_width
x = tf.placeholder(tf.float32, [None, image_size])
x_image = tf.reshape(x, [-1, image_height, image_width, image_channels])


def calculate_convolutional_layer(x, filter_conf, output_channels):

    """
    Executeing a convolutional layer task.
    
    Args:
        x               : An image data.
        filter_conf     : A dict, or a dict like. the configulations of convolution filter.
                          This must have 4 keys ("height", "width", "channels" and "num").
        output_channels : A number of channels which is output of this function.

    Returns:
        An activation of an convolutional layer.
    """

    if len(filter_size) != 4:
        raise ValueError('filter_conf does not have enough elements.')
    if not isinstance(filter_size, dict):
        raise TypeError('filter_conf must be the dict type, or a dict like.')
    if (('height' not in filter_conf)
        or ('width' not in filter_conf)
        or ('channels' not in filter_conf)
        or ('num' not in filter_conf)):
        raise ValueError('filter_conf must have "height", "width", "channels" and "num".\n'
                         'Please specify these 4 conditions.')
    
    W = tf.Variable(
        tf.truncated_normal(
            [filter_conf['height'],
             filter_conf['width'],
             filter_conf['channels'],
             filter_conf['num']],
            stddev=0.1
        )
    )
    h = tf.nn.conv2d(x_image, W, strides=[1, 1, 1, 1], padding='SAME')
    b = tf.Variable(tf.constant(0.1, shape=output_channels))
    convoluted_image = tf.nn.relu(h + b)
    
    return convoluted_image

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


### Maxプーリング層を実装する

In [9]:
def calculate_max_pooling_layer(x, ksize, strides):

    """Wrapper function of tf.nn.max_pool.
    
    Args:
        x       : A Tensor produced by calculate_convolutional_layer.
        ksize   : A list of ints that has length >= 4. The size of
                  the window for each dimension of the input tensor.
        strides : A list of ints that has length >= 4. The stride
                  of the sliding window for each dimension of the
                  input tensor.
    
    Returns:
        A pooled image.
    """

    pooled_image = tf.nn.max_pool(x, ksize=ksize, strides=strides, padding='SAME')

    return pooled_image

### 畳込みとプーリング処理の途中経過を保持するクラスを実装する

In [15]:
class ConvNetProgressHolder(object):

    """Holder of convoluted images and pooled image.
    
    This class is used like the struct of C language.
    This has no methods.
    
    Attributes:
        input_data (Tensor) : An image that is applied to convolution and pooling.
        conv (list)         : The list of convoluted images, each images are Tensor objects.
        pool (Tensor)       : A image that is pooled after convolutional layer.
    """

    def __init__(self):

        self.input_data = None
        self.conv = []
        self.pool = None

    
def adopt_unit(x, channels, num_conv):

    if num_conv < 2:
        raise ValueError('num_conv must be >= 2.')

    conv_holder = ConvNetProgressHolder()
    conv_holder.input_data = x

    conv = calculate_convolutional_layer(
        x=conv_holder.input_data,
        filter_conf=FILTER_CONF,
        output_channels=channels
    )
    conv_holder.conv.append(conv)

    for i in range(1, num_conv):
        conv = calculate_convolutional_layer(
            x=conv_holder.conv[i - 1],
            filter_conf=FILTER_CONF,
            output_channels=channels
        )
        conv_holder.conv.append(conv)

    conv_holder.pool = calculate_max_pooling_layer(
        x=conv_holder.conv[i - 1],
        ksize=[1, 1, 1, 1],
        strides=[1, 1, 1, 1]
    )

    return conv_holder


### VGGの畳込みとプーリング層を構築する

In [None]:
unit1 = adopt_unit(x=x, channels=64, num_conv=2)
unit2 = adopt_unit(x=unit1.pool, channels=128, num_conv=2)
unit3 = adopt_unit(x=unit2.pool, channels=256, num_conv=4)
unit4 = adopt_unit(x=unit3.pool, channels=512, num_conv=4)
unit5 = adopt_unit(x=unit4.pool, channels=512, num_conv=4)