In [None]:
import os
import sys
from PIL import Image
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras import Input
import sys
import cv2
print ('Running in colab:', 'google.colab' in sys.modules)

In [None]:
tf.compat.v1.disable_eager_execution()

In [None]:
def compute_content_cost(a_C, a_G):
    n_H, n_W, n_C = a_C.get_shape().as_list()

    a_C_unrolled = tf.reshape(a_C,shape=[n_H * n_W,n_C])
    a_G_unrolled = tf.reshape(a_G,shape=[n_H * n_W,n_C])
    
    #compute the cost with tensorflow
    #J_content = 0.5*tf.reduce_sum(tf.square(tf.subtract(a_C_unrolled,a_G_unrolled)))
    #The multiplying factor is mentioned as 0.5 in original paper but it is found that 1/(4*s) speeds up computation
    #where s = n_H * n_W * n_C (product of dimensions)
    J_content = tf.reduce_sum(tf.square(tf.subtract(a_C_unrolled,a_G_unrolled)))/(4*n_H*n_W*n_C)
    
    return J_content

In [None]:
def gram_matrix(A):
    GA = tf.matmul(A, A,transpose_b=True)
    return GA

In [None]:
def compute_layer_style_cost(a_S, a_G):

    n_H, n_W, n_C = a_S.get_shape().as_list()
  
    a_S = tf.transpose(tf.reshape(a_S,shape=[n_H * n_W,n_C]))
    a_G = tf.transpose(tf.reshape(a_G,shape=[n_H * n_W,n_C]))

    GS = gram_matrix(a_S)
    GG = gram_matrix(a_G)

    J_style_layer = tf.reduce_sum(tf.square(tf.subtract(GS,GG)))/(4 * (n_C**2) * (n_H*n_W)**2)
        
    return J_style_layer

In [None]:
STYLE_LAYERS = [
    ('block1_conv1', 0.5),
    ('block2_conv1', 0.5),
    ('block3_conv1', 0.5),
    ('block4_conv1', 0.5),
    ("block5_conv1",0.5)]

In [None]:
#The following manner of calculating content loss produced good results.
CONTENT_LAYERS = [
    ("block1_conv1",0.3),
    ("block3_conv1",0.5),
    ("block5_conv1",0.5)]

In [None]:
def compute_style_cost(feature_extractor,input_block,STYLE_LAYERS):
    J_style = 0
    for layer_name, coeff in STYLE_LAYERS:
        #The input_block is a tensor which consists of the combination, content and style images concatenated together
        features_style = feature_extractor(input_block)[layer_name]
        a_S = features_style[2,:,:,:]
        a_G_style = features_style[0,:,:,:]
        J_style_layer = compute_layer_style_cost(a_S, a_G_style)
        J_style += coeff * J_style_layer

    return J_style

In [None]:
#We will set the dimensions we want the images to have.
#It is highly recommended to try and choose the images whose dimensions match VGG-19 input size 224x224 
#Image resizing of of different dimensions may result in unwanted stretching or compression as well as slow computation
baseheight = 224
basewidth = 224

# The setting below is recommended by the authors of VGG paper who trained the dataset on ImageNet and
# found this combination of mean-centering to give best results
mean_centering_for_vgg_dataset = [103.939,116.779,123.68,]

In [None]:
def resize_normalise_image(im):
    #The commented code below is if you are using the Image library for preprocessing.
    #Please adjust the subtraction of mean as OpenCV reads in BGR format and Image library reads in RGB format.
    
    #hpercent = (baseheight / float(im.size[1]))
    #basewidth = int((float(im.size[0]) * float(hpercent)))
    #im = im.resize((basewidth, baseheight), Image.ANTIALIAS)
    
    pixels = np.asarray(im)
    pixels = pixels.astype('float32')
    #If image is grayscale then we need to stack it so that it has 3 channels
    if(len(pixels.shape) == 2):
      original_features = [pixels,pixels,pixels]
      # Stack them into one array
      pixels = np.stack(original_features, axis=2)
    pixels[:,:,0] -= mean_centering_for_vgg_dataset[0]
    pixels[:,:,1] -= mean_centering_for_vgg_dataset[1]
    pixels[:,:,2] -= mean_centering_for_vgg_dataset[2]
    return np.expand_dims(pixels,axis=0)

In [None]:
#Utilise OpenCV to set the size of all images to 224x224

#The content image is read as im1
im1 = cv2.imread('paris.jpeg',1)
#If you want to convert from BGR TO RGB
#im1 = im1[...,::-1]
im1 = cv2.resize(im1,(basewidth,baseheight))

#The style image is read as im2
im2 = cv2.imread('pattern.jpg',1)
#If you want to convert from BGR TO RGB
#im2 = im2[...,::-1]
im2 = cv2.resize(im2,(basewidth,baseheight))

# If using the Image library use the code below. 
#Please uncomment the lines in function resize_normalise_image() if you use Image library.
# im1 = Image.open('/content/mypic.jpg')
# im2 = Image.open('/content/cubes.jpg')

In [None]:
content_img = resize_normalise_image(im1)
style_img = resize_normalise_image(im2)
# Generate a random noise_image
noise_ratio = 0.5
noise_image = np.random.uniform(-20, 20, (1, basewidth, baseheight, 3)).astype('float32')
# Set the input_image to be a weighted average of the content_image and a noise_image
new_img = noise_image * noise_ratio + content_img * (1 - noise_ratio)

In [None]:
# Reset the graph
tf.compat.v1.reset_default_graph()
# Start interactive session
sess = tf.compat.v1.InteractiveSession()

In [None]:
content_img = tf.constant(content_img,tf.float32,name='ContentImage')
style_img = tf.constant(style_img,tf.float32,name='StyleImage')
new_img = tf.Variable(new_img,tf.float32,name='InputImage')

In [None]:
#Load the VGG model. We dont need to include the output layer and hence the size of download is approximately 80MB only.
model = VGG19(weights='imagenet',input_shape=(basewidth,baseheight,3),include_top=False)

In [None]:
outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
feature_extractor = Model(inputs=model.inputs, outputs=outputs_dict)

#We set these values as tensorflow variables if we wish to write code to change the values
#over the course of execution of program based on number of iterations.
#The metric can be adjusted according to the value of loss achieved after certain number of iterations.
alpha = tf.Variable(50.0,tf.float32,name="Alpha")
beta = tf.Variable(50.0,tf.float32,name="Beta")

In [None]:
#This computes the loss over one pass
def compute_block_loss(feature_extractor,new_img,content_img,style_img):
    #Extract the features for content_layer
    input_block = tf.keras.layers.Concatenate(axis = 0)([new_img,content_img,style_img])
    J_content = 0
    for content_layer,coeff in CONTENT_LAYERS:
      content_features = feature_extractor(input_block)[content_layer]
      a_G_content = content_features[0,:,:,:]
      a_C = content_features[1,:,:,:]
      J_content += coeff * compute_content_cost(a_C,a_G_content)
    
    #Extract features for style layer
    J_style = compute_style_cost(feature_extractor,input_block,STYLE_LAYERS)
    #The loss below is set for adjusting the distortion of pixels in the generated image.
    #The constant value was found by trial and error.
    tv_loss = 0.00005*tf.reduce_sum(tf.image.total_variation(input_block[0,:,:,:]))
    J_total = alpha * J_content + beta *J_style + tv_loss
    
    return J_total

In [None]:
#The learning rate can be adjusted as per requirement.
#If you are varying alpha and/or beta during execution of program it is recommended
#to set a low learning rate to prevent distortion.
opt = tf.compat.v1.train.AdamOptimizer(learning_rate=2.3,)
#Define the function which is to be minimised.
cost = compute_block_loss(feature_extractor,new_img,content_img,style_img)
#Create a graph which can be accessed using tensorboard. 
#Open terminal in directory of notebook followed by typing the command "tensorboard --logdir logs" without the quotes and open the link given in terminal. (localhost:6006 typically)
writer = tf.compat.v1.summary.FileWriter("logs")
writer.add_graph(sess.graph)
writer.close()
#Pass the variable we want to apply gradient descent on, i.e, new_image
train_step = opt.minimize(cost,var_list=[new_img])

In [None]:
def run_model(feature_extractor,new_img,content_img,style_img,train_step,num_iterations = 2000):
    sess.run(tf.compat.v1.global_variables_initializer())
    im = None
    for i in range(0,num_iterations,1):
  
        #Compute the gradient descent
        sess.run(train_step)
        Jt = sess.run(cost)
        
        print("Iteration: " + str(i) + " -- Cost:" + str(Jt))
        #The code below is my approach to dynamically adjust the value of alpha and beta every 250 iterations.
        #This is done to increase the loss and thereby increase the rate of gradient descent which tends to slow down
        #after certain number of iterations thereby slowing down changes in the generated image.
#         if((i+1)%250 == 0):
#             assign = tf.compat.v1.assign(beta,sess.run(beta)*5)
#             sess.run(assign)
#             assign = tf.compat.v1.assign(alpha,sess.run(alpha)*3)
#             sess.run(assign)
#             print("New alpha and beta are " + str(sess.run(alpha)) + " " + str(sess.run(beta)))
        
        #Make sure to re-adjust the dimensions of image by changing it from (1,x,y,3) to (x,y,3) (similar to np.squeeze)
        #and the values being strictly between 0 and 255.
        generated_image = sess.run(new_img)[0]
        generated_image[:,:,0] += mean_centering_for_vgg_dataset[0]
        generated_image[:,:,1] += mean_centering_for_vgg_dataset[1]
        generated_image[:,:,2] += mean_centering_for_vgg_dataset[2]
        generated_image = np.clip(generated_image,0,255)

        #Convert the image to RGB format.
        generated_image = np.squeeze(generated_image[...,::-1])
        im = Image.fromarray(generated_image.astype('uint8'),'RGB')
        # Print every 100 iteration.
        if (i+1)%100 == 0:
          #Create an ouputs folder in directory of notebook prior to running the program
          #Can also choose to remove the characters 'outputs/' if want to store in same directory.
          im.save("outputs/op" + str(i+1) + ".png")
    
    # save last generated image
    im.save("generated_image.png")

In [None]:
run_model(feature_extractor,new_img,content_img,style_img,train_step)
#Close the session after execution
sess.close()