# AnimeGM - Autoencoder
GOAL: Generate new anime-style images
    
Methodology:
1. Build a dataset of images suitable for out needs
2. Build a generative model.
3. Train the model
4. Generate a new image with random numbers
5. Transfer style to a photo
6. Examine results

## 1. Build a dataset
Pull in an existing dataset and modify it for my needs.

Lets start with the danbooru 2017 anime image dataset. Now this dataset is huge. So we are only going to use 1 torrent of the SFW subset.

### Imports

In [None]:
import pandas as pd
from scipy import misc
import numpy as np
import matplotlib.pyplot as plt
from skimage import color
import seaborn as sns; sns.set()
from skimage.transform import resize
from os import listdir
from os.path import join
from skimage import data
import tensorflow as tf
from tensorflow.python.framework.ops import reset_default_graph
%matplotlib inline

### Functions

In [None]:
def gray_scale_resize(image, resize_dim):
    # convert image to grayscale
    # same computation while we experiment
    gray_image = color.rgb2gray(image)
    
    # Resize the image down to 128x128
    resized_image = resize(gray_image, (resize_dim, resize_dim))
    
    return resized_image

In [None]:
def load_images(paths_list, n_images, resize_dim):
    # slice paths list to the amount we want
    slice_paths_list = paths_list[:n_images]

    # create an array for images
    images = np.zeros((n_images, resize_dim, resize_dim), dtype=np.float32)
        
    for i, image_path in enumerate(slice_paths_list): 
        # read image
        image = misc.imread(join(path, image_path))

        # rescale image
        images[i] = gray_scale_resize(image, resize_dim)
        
    return images

In [None]:
def show_image(image):
    plt.imshow(image, cmap='gray')
    plt.xticks([])
    plt.yticks([])
    plt.show()

In [None]:
def show_images(images, figsize=(10, 10)):
    # get the number of images
    n_images = images.shape[0]
    # get the square root of the number of images
    squareroot = int(np.ceil(np.sqrt(n_images)))
    
    fig, ax = plt.subplots(squareroot, squareroot, figsize=figsize,
                           subplot_kw=dict(xticks=[], yticks=[]))
    fig.subplots_adjust(hspace=0.05, wspace=0.05)

    for i, axi in enumerate(ax.flat):
        # exit if the index of subplots is greater than amount of images
        if i >= n_images:
            return
        im = axi.imshow(images[i], cmap='gray')

In [None]:
def get_batch(batch_size, images):
    np.random.RandomState(0)
    
    # get shape of images
    images_shape = images.shape
    
    # create a range based on number of images
    images_range = np.arange(0, images_shape[0])
    
    # pick n batch_size random elements from images range
    random_indices = np.random.choice(images_range, batch_size, replace=False)
    
    # use random indices to grab a batch from images
    random_elements = images[random_indices]
    
    return random_elements

In [None]:
# get the normalized image
def preprocess(images, mean_image, std_dev_image):
    norm_image = (images - mean_image) / std_dev_image
    return norm_image

In [None]:
# remove normalization
def deprocess(norm_image, mean_image, std_dev_image):
    denorm_image =  (norm_image * std_dev_image) + mean_image
    return denorm_image

### Variables

In [None]:
# base path
path = 'danbooru-small'

# list of img paths
paths_list = listdir(path)

# number of image to use in dataset
n_images = 1000

# number of pixels to resize image to
resize_dim = 64

# number of features
n_features = resize_dim * resize_dim

# hyper parameters~~~~
batch_size = 10
n_batches = int(n_images / batch_size)
n_epochs = 10
dimensions = [2048, 1024, 512, 256]

# number of test images
n_tests = 10

### Prepare data

In [None]:
# load images
images = load_images(paths_list, n_images, resize_dim)

In [None]:
# plot the images of the dataset
show_images(images)

In [None]:
# reshape images 
reshaped_images = images.reshape(-1, n_features)

In [None]:
# get the mean image of the dataset
# minus this from each batch so to normalize them.
# this will help the values from going wild
mean_image = reshaped_images.mean(axis=0)

# flatten it
mean_image_reshaped = mean_image.reshape(resize_dim, resize_dim)

# inspect it
show_image(mean_image_reshaped)

In [None]:
# get the standard deviation image
std_dev_image = reshaped_images.std(axis=0)

# flatten it
std_dev_image_reshaped = std_dev_image.reshape(resize_dim, resize_dim)

# inspect it
show_image(std_dev_image_reshaped)

In [None]:
# test images to reconstruct
test_images = reshaped_images[:n_tests]

# reshape them
reshaped_test_images = test_images.reshape(-1, resize_dim, resize_dim)

# plot them
show_images(reshaped_test_images)

## 2. Build a generative model
Lets build an autoencoder

In [None]:
reset_default_graph()

In [None]:
# create a place holder for inputs
X = tf.placeholder(tf.float32, [None, n_features])

In [None]:
# Build the first half of the autoencoder which reduces dimensions at each layer
# copy X placeholder to current_input
current_input = X
n_input = n_features

# create a list to store each matrix created
Ws = []

# loop over the list of dimensions and create a layer
# layer_i = index of current element
# n_output = element
for layer_i, n_output in enumerate(dimensions):
    # use variable scope to encapsulate variables
    # prefix all variables created in this scope
    with tf.variable_scope("encoder/layer/{}".format(layer_i)):
        
        # create a weight matrix of the shape [n_input, n_output]
        W = tf.get_variable(
                name='W',
                shape=[n_input, n_output],
                initializer=tf.random_normal_initializer(mean=0.0, stddev=0.02))
        
        # create bais vector of the shape [n_output]
        b = tf.get_variable(
                name='b',
                shape=[n_output],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0.0))
        
        # multiply the layer input and the weight matrix
        # and add the bais
        h = tf.nn.bias_add(
                name='h',
                value=tf.matmul(current_input, W),
                bias=b)
        
        # use an activation function (RELU) on the output and set it to the input for the next layer
        current_input = tf.nn.relu(h)
        
        # store the weight matrix so that we can build the decoder
        Ws.append(W)
        
        # update the input dimensions with the current layer output
        n_input = n_output

In [None]:
# reverse the order of the weight matrices
Ws = Ws[::-1]

# reverse the order of the dimensions
# append on the original dimension on the end of the list
dimensions = dimensions[::-1][1:] + [n_features]

In [None]:
# loop over the list of dimensions and create a layer
# layer_i = index of current element
# n_output = element
for layer_i, n_output in enumerate(dimensions):
    # use variable scope to encapsulate variables
    # prefix all variables created in this scope
    with tf.variable_scope("decoder/layer/{}".format(layer_i)):
        
        # grab the weight matrix fron the encoder and transpose it
        W = tf.transpose(Ws[layer_i])
        
        # create bais vector of the shape [n_output]
        b = tf.get_variable(
                name='b',
                shape=[n_output],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0.0))
        
        # multiply the layer input and the weight matrix
        # and add the bais
        h = tf.nn.bias_add(
                name='h',
                value=tf.matmul(current_input, W),
                bias=b)
        
        # use an activation function (RELU) on the output and set it to the input for the next layer
        current_input = tf.nn.relu(h)
        
        # update the input dimensions with the current layer output
        n_input = n_output

In [None]:
# the current_input of the last layer is Y
Y = current_input

### Define cost function
Define the training signal.
This will be a cost function to measure the success of the network

In [None]:
# measure average difference across pixels
pixel_cost = tf.reduce_mean(tf.squared_difference(X, Y), 1)

# measure mean across batches
cost = tf.reduce_mean(pixel_cost)

# use an Adam optimizer for training which tries to minimize cost
learning_rate = 0.001
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

## 3. Train the model

In [None]:
# create a session to use the graph
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [None]:
# train by running n batches for n epochs
fig, ax = plt.subplots(1, 1)
for epoch_i in range(n_epochs):
    for batch_i in range(n_batches):
        # get batch
        batch_X = get_batch(batch_size, reshaped_images)

        # preprocess batch
        preprocessed_batch = preprocess(batch_X, mean_image, std_dev_image)
        
        # train
        sess.run(optimizer, feed_dict={X: preprocessed_batch})

        # show cost per epoch
        print(epoch_i, batch_i, sess.run(cost, feed_dict={X: preprocessed_batch}))

    # preprocess the test images
    preprocessed_test_images = preprocess(test_images, mean_image, std_dev_image)

    # reconstruct the test images
    reconstructed_test_images = sess.run(Y, feed_dict={X: preprocessed_test_images})

    # deprocess the test images
    deprocessed_test_images = deprocess(reconstructed_test_images, mean_image, std_dev_image)
    
    # reshape them to the original shape and type
    reshaped_deprocessed_test_images = deprocessed_test_images.reshape(-1, resize_dim, resize_dim)

    # plot them results for this epoch
    show_images(reshaped_deprocessed_test_images)

## 4. Generate a new image
Use the trained model to generate a new image

In [None]:
# number of images to generate
n_new_images = 10

In [None]:
# generate using random numbers
np.random.RandomState(seed=0)
new_images = np.random.rand(n_new_images, resize_dim, resize_dim)

# flatten the images
reshaped_new_images = new_images.reshape(-1, n_features)

# view the image
show_images(new_images)

### Reconstruct without normalizing against training dataset

In [None]:
# reconstruct the example images
reconstructed_images = sess.run(Y, feed_dict={X: reshaped_new_images})

# reshape them to the original shape and type
reshaped_reconstructed_images = (reconstructed_images).reshape(-1, resize_dim, resize_dim)

# plot them
show_images(reshaped_reconstructed_images)

### Reconstruct with normalizing against training dataset

In [None]:
# preprocess batch
preprocessed_new_images = preprocess(reshaped_new_images, mean_image, std_dev_image)

# reconstruct the example images
reconstructed_images = sess.run(Y, feed_dict={X: preprocessed_new_images})

# deprocess the test images
deprocessed_new_images = deprocess(reconstructed_images, mean_image, std_dev_image)
    
# reshape them to the original shape and type
reshaped_deprocessed_images = deprocessed_new_images.reshape(-1, resize_dim, resize_dim)

# plot them
show_images(reshaped_deprocessed_images)

## 5. Transfer style to an image

In [None]:
# grab an image
transfer_image = data.astronaut()

# resize the image and put it into gray scale
resized_transfer_image = gray_scale_resize(transfer_image, resize_dim)

# flatten the image
reshaped_transfer_image = resized_transfer_image.reshape(-1, n_features)

# display an image
show_image(resized_transfer_image)

### Reconstruct without normalizing against training dataset

In [None]:
# reconstruct the example images
reconstructed_transfer_image = sess.run(Y, feed_dict={X: reshaped_transfer_image})

# reshape them to the original shape and type
reshaped_reconstructed_transfer_image = reconstructed_transfer_image.reshape(resize_dim, resize_dim)

# plot them
show_image(reshaped_reconstructed_transfer_image)

### Reconstruct with normalizing against training dataset

In [None]:
# preprocess batch
preprocessed_transfer_images = preprocess(reshaped_transfer_image, mean_image, std_dev_image)

# reconstruct the example images
reconstructed_transfer_image = sess.run(Y, feed_dict={X: preprocessed_transfer_images})

# deprocess the test images
deprocessed_transfer_images = deprocess(reconstructed_transfer_image, mean_image, std_dev_image)
  
# reshape them to the original shape and type
reshaped_deprocessed_transfer_image = deprocessed_transfer_images.reshape(resize_dim, resize_dim)

# plot them
show_image(reshaped_deprocessed_transfer_image)

## 6. Examine results

This does a much better job reconstructing the images than the PCA model. It still doesn't work so theres are few things we can do:
    - get more data
    - get better data
    
    
Lets explore a convolutional autoencoder first.