In [1]:
#import all required libraries
import tensorflow as tf
from tensorflow.keras import layers,Model,Sequential
import numpy as np
import cv2
from tensorflow.keras.applications.vgg19 import VGG19,preprocess_input

In [2]:
#read the style and content images
simg = cv2.imread('iris-scott-finger-painting-1.jpg')
cimg = cv2.imread('paintacat.jpg')

In [3]:
#load image helper function, it squares both images, convert to float 32, and add new bath dimension for conv2d, 
def load_img(img):
    img = tf.image.resize(img,(600,600))
    img = tf.cast(img,'float32')
    img = img/255
    img = img[tf.newaxis,:]
    return img

In [4]:
#preprocess reference images and define the trainable variable which is a (600,600,3) tensor of uniform random noise
c_reference = load_img(cimg)
s_reference = load_img(simg)
gen_img = tf.random.uniform((1,600,600,3),minval=0.0,maxval=1.0,dtype='float32')
gen_img = tf.Variable(gen_img)

In [5]:
#gen_img = cv2.imread('StyleTransferExampleEpoch17.jpeg')
#gen_img=gen_img/255
#gen_img = gen_img[tf.newaxis,:]
#gen_img = tf.Variable(gen_img,dtype='float32')

In [6]:
#load in VGG19 from keras
vgg = VGG19(include_top=False,weights = 'imagenet')
vgg.summary()

Model: "vgg19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None, None, 3)]   0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0     

In [7]:
#define the layer names of the content and style layers
style_layers = ['block1_conv1','block2_conv1','block3_conv1','block4_conv1','block5_conv1']
content_layers = 'block5_conv2'

In [8]:
#build model base on the VGG19 model, create a Model object that takes images as input and outputs the desired layers
#the style and content models are model objects which are modified models of VGG19, the call function returns the outputs of these models
#in a organized fashion

class NeuralStyleTransfer(Model):
    def __init__(self,style_layers,content_layers):
        super(NeuralStyleTransfer,self).__init__()
        self.vgg = VGG19(include_top=False,weights = 'imagenet')
        self.vgg.trainable = False
        self.style_layers = style_layers
        self.content_layers = content_layers
        self.style_model = Model([vgg.input],[vgg.get_layer(name).output for name in style_layers])
        self.content_model = Model([vgg.input],[vgg.get_layer(content_layers).output])
        
    def call(self,inputs):
        inputs = preprocess_input(inputs*255)
        style_out = self.style_model(inputs)
        content_out = self.content_model(inputs)
        return {'style':style_out,'content':content_out}

In [9]:
#create the model
model = NeuralStyleTransfer(style_layers,content_layers)

In [10]:
#get the targets from the model
style_target = model.style_model(preprocess_input(s_reference*255))
content_target = model.content_model(preprocess_input(c_reference*255))

In [11]:
#funtion to compute the gram matrix, which is the inner product with respect to the dimensional elements of the filters, and then
#compute the outer product of the filter vector with itself
def gram_matrix(inputs):
    input_shape = tf.shape(inputs)
    G = tf.einsum('aijb,aijc->abc',inputs,inputs)
    n = tf.cast((input_shape[1]*input_shape[2]),'float32')
    return G/n

In [12]:
#compute the style and content losses
def compute_loss(outputs,w_style = 1e-2,w_content = 1e4):
    style_out = outputs['style']
    content_out = outputs['content']
    style_loss = 0
    for i in range(len(style_out)):
        style_loss += tf.reduce_mean((gram_matrix(style_target[i])-gram_matrix(style_out[i]))**2)
    style_loss/=5
    content_loss = tf.reduce_mean((content_target-content_out)**2)
    return style_loss*w_style+content_loss*w_content

In [13]:
#create the train step function which utilizes the tf.function for auto-graph and gradientTape for auto-differentiation
optimizer = tf.optimizers.Adam(learning_rate=0.01)
@tf.function
def train_step(gen_img):
    with tf.GradientTape() as tape:
        outputs = model(gen_img)
        l = compute_loss(outputs)
    grad = tape.gradient(l,gen_img)
    optimizer.apply_gradients([(grad,gen_img)])
    gen_img = tf.clip_by_value(gen_img,clip_value_min = 0.0, clip_value_max = 1.0)

In [14]:
#train and save the intermediate images.
epochs = 15
steps_per_epoch = 100

for n in range(epochs):
    for m in range(steps_per_epoch):
        train_step(gen_img)
    img = np.array(tf.squeeze(gen_img))
    cv2.imwrite('NeuralStyleTrainingSamples/StyleTransferExampleEpoch{}-4.jpeg'.format(n),img*255)

W0619 15:06:05.310869 139665799026432 deprecation.py:323] From /home/zhang_boyang_00/.local/lib/python3.5/site-packages/tensorflow/python/ops/math_grad.py:1205: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
