In [185]:
#Assumed that the user will run it on Google Colab, it will run 
#the same on local machine but please install the prerequisite libraries.
import os
import sys
from PIL import Image
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras import Input
import sys
import cv2
print ('Running in colab:', 'google.colab' in sys.modules)

Running in colab: True


In [186]:
tf.compat.v1.disable_eager_execution()

In [187]:
def compute_content_cost(a_C, a_G):
    n_H, n_W, n_C = a_C.get_shape().as_list()
    m=1
    a_C_unrolled = tf.reshape(a_C,shape=[m,n_H * n_W,n_C])
    a_G_unrolled = tf.reshape(a_G,shape=[m,n_H * n_W,n_C])
    
    #compute the cost with tensorflow
    #J_content = 0.5*tf.reduce_sum(tf.square(tf.subtract(a_C_unrolled,a_G_unrolled)))
    #The multiplying factor is mentioned as 0.5 in original paper but it is found that 1/(s) speeds up computation
    #where s = n_H * n_W * n_C (product of dimensions)
    J_content = 1/(4*n_H*n_W*n_C)*tf.reduce_sum(tf.square(tf.subtract(a_C_unrolled,a_G_unrolled)))
    
    return J_content

In [188]:
def gram_matrix(A):
    GA = tf.matmul(A,tf.transpose(A))    
    return GA

In [189]:
def compute_layer_style_cost(a_S, a_G):

    n_H, n_W, n_C = a_S.get_shape().as_list()
    m = 1
    a_S = tf.transpose(tf.reshape(a_S,shape=[n_H * n_W,n_C * m]))
    a_G = tf.transpose(tf.reshape(a_G,shape=[n_H * n_W,n_C * m]))

    GS = gram_matrix(a_S)
    GG = gram_matrix(a_G)

    J_style_layer = 1/(4 * (n_C**2) * (n_H*n_W)**2)*tf.reduce_sum(tf.square(tf.subtract(GS,GG)))
        
    return J_style_layer

In [190]:
STYLE_LAYERS = [
    ('block1_conv1', 0.2),
    ('block2_conv1', 0.2),
    ('block3_conv1', 0.2),
    ('block4_conv1', 0.2),
    ("block5_conv1",0.2)]

In [191]:
content_layer = "block5_conv2"

In [192]:
def compute_style_cost(feature_extractor,input_block,STYLE_LAYERS):
    J_style = 0

    for layer_name, coeff in STYLE_LAYERS:
        features_style = feature_extractor(input_block)[layer_name]
        a_S = features_style[2,:,:,:]
        a_G_style = features_style[0,:,:,:]
        
        J_style_layer = compute_layer_style_cost(a_S, a_G_style)
        J_style += coeff * J_style_layer

    return J_style

In [193]:
def total_cost(J_content, J_style, alpha = 20, beta = 50):
    J = alpha * J_content + beta * J_style
    return J

In [194]:
#We will set the dimensions we want the images to have.
#It is highly recommended to try and choose the images whose dimensions match as 
#resizing of image may result in unwanted stretching or compression.
baseheight = 600
basewidth = 600

# The setting below is recommended by the authors of VGG paper who trained the dataset on ImageNet and
# found this combination of mean-centering to give best results
mean_centering_for_vgg_dataset = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3)).astype('float32')

In [195]:
def resize_normalise_image(im):
    #The commented code below is if you are using the Image library for preprocessing
    #hpercent = (baseheight / float(im.size[1]))
    #basewidth = int((float(im.size[0]) * float(hpercent)))
    #im = im.resize((basewidth, baseheight), Image.ANTIALIAS)
    
    pixels = np.asarray(im)
    pixels = pixels.astype('float32')
    #If image is grayscale then we need to stack it so that it has 3 channels
    if(len(pixels.shape) == 2):
      original_features = [pixels,pixels,pixels]
      # Stack them into one array
      pixels = np.stack(original_features, axis=2)
    return (np.expand_dims(pixels,axis=0) - mean_centering_for_vgg_dataset)

In [196]:
#Please specify the correct path if using it on local machine.

#We use OpenCV to set the size of all images to 600x600
im1 = cv2.imread('/content/mypic.jpg',1)
#To convert from BGR TO RGB
im1 = im1[...,::-1]
im1 = cv2.resize(im1,(basewidth,baseheight))

im2 = cv2.imread('/content/cubes.jpg',1)
#To convert from BGR TO RGB
im2 = im2[...,::-1]
im2 = cv2.resize(im2,(basewidth,baseheight))

# If using the Image library use the code below
# im1 = Image.open('/content/mypic.jpg')
# im2 = Image.open('/content/cubes.jpg')

In [197]:
content_img = resize_normalise_image(im1)
style_img = resize_normalise_image(im2)
# Generate a random noise_image
noise_ratio = 0.6
noise_image = np.random.uniform(-20, 20, (1, basewidth, baseheight, 3)).astype('float32')
# Set the input_image to be a weighted average of the content_image and a noise_image
new_img = noise_image * noise_ratio + content_img * (1 - noise_ratio)

In [198]:
# Reset the graph
tf.compat.v1.reset_default_graph()
# Start interactive session
sess = tf.compat.v1.InteractiveSession()



In [199]:
content_img = tf.constant(content_img,tf.float32,name='ContentImage')
style_img = tf.constant(style_img,tf.float32,name='StyleImage')
new_img = tf.Variable(new_img,tf.float32,name='InputImage')

In [200]:
#Load the VGG model. We dont need to include the output layer and hence the size of download is approximately 80MB only.
model = VGG19(weights='imagenet',input_shape=(basewidth,baseheight,3),include_top=False)

In [201]:
outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
feature_extractor = Model(inputs=model.inputs, outputs=outputs_dict)

In [202]:
#This computes the loss over one pass
def compute_block_loss(feature_extractor,new_img,content_img,style_img):
    #Extract the features for content_layer
    input_block = tf.keras.layers.Concatenate(axis = 0)([new_img,content_img,style_img])
    content_features = feature_extractor(input_block)[content_layer]
    a_G_content = content_features[0,:,:,:]
    a_C = content_features[1,:,:,:]
    J_content = compute_content_cost(a_C,a_G_content)
    #Extract features for style layer
    J_style = compute_style_cost(feature_extractor,input_block,STYLE_LAYERS)
    J = total_cost(J_content,J_style)
    return J

In [203]:
sess.run(tf.compat.v1.global_variables_initializer())
#The learning rate can be adjusted as per requirement.
opt = tf.compat.v1.train.AdamOptimizer(learning_rate=2.5)
#Define the function which is to be minimises
cost = compute_block_loss(feature_extractor,new_img,content_img,style_img)
#Create a graph which can be accessed using tensorboard. 
#Open terminal in directory of notebook followed by typing the command "tensorboard --logdir logs" without the quotes and open the link given in terminal. (localhost:6006 typically)
writer = tf.compat.v1.summary.FileWriter("logs")
writer.add_graph(sess.graph)
writer.close()
#Pass the variable we want to apply gradient descent on, i.e, new_image
train_step = opt.minimize(cost,var_list=[new_img])

In [204]:
def run_model(feature_extractor,new_img,content_img,style_img,train_step,num_iterations = 2000):
    sess.run(tf.compat.v1.global_variables_initializer())
    im = None
    for i in range(num_iterations):
  
        #Compute the gradient descent
        sess.run(train_step)
        Jt = sess.run(cost)
        print("Iteration: " + str(i) + " -- Cost:" + str(Jt))
        #Make sure to re-adjust the dimensions of image by changing it from (1,x,y,3) to (x,y,3) (similar to np.squueze) and the values being between 0 and 255.
        generated_image = np.clip((sess.run(new_img) + mean_centering_for_vgg_dataset)[0],0,255)
        im = Image.fromarray(generated_image.astype('uint8'),'RGB')
        if (Jt < 900):
          break
        # Print every 100 iteration.
        if i%50 == 0:            
          im.save("op" + str(i) + ".png")
    
    # save last generated image
    im.save("generated_image.png")

In [205]:
run_model(feature_extractor,new_img,content_img,style_img,train_step)
sess.close()

Iteration: 0 -- Cost:1590268.2
Iteration: 1 -- Cost:1534505.1
Iteration: 2 -- Cost:1478065.2
Iteration: 3 -- Cost:1421301.6
Iteration: 4 -- Cost:1364362.1
Iteration: 5 -- Cost:1307248.4
Iteration: 6 -- Cost:1249912.4
Iteration: 7 -- Cost:1192333.5
Iteration: 8 -- Cost:1134574.2
Iteration: 9 -- Cost:1076776.6
Iteration: 10 -- Cost:1019152.44
Iteration: 11 -- Cost:961941.3
Iteration: 12 -- Cost:905380.6


KeyboardInterrupt: ignored

In [None]:
sess.close()