# CMU 17-400/17-700 auto-graded notebook

Before you turn this assignment in, make sure everything runs as expected. First, **restart the kernel** (in the menubar, select Kernel$\rightarrow$Restart) and then **run all cells** (in the menubar, select Cell$\rightarrow$Run All).

Make sure you fill in any place that says `YOUR CODE HERE` or "YOUR ANSWER HERE."

---

# Homework 4: Style Transfer


In [None]:
# Who did you collaborate with on this assignment? 
# if no one, collaborators should contain an empty string,
# else list your collaborators below

# collaborators = [""]
# YOUR CODE HERE
raise NotImplementedError()

In [None]:
try:
    collaborators
except:
    raise AssertionError("you did not list your collaborators, if any")

In this assignment, we will implement the style transfer application described in the paper [Leon A. Gatys' paper, A Neural Algorithm of Artistic Style](https://arxiv.org/abs/1508.06576). 

### Five major steps:
1. Visualize the data and expected outputs
2. Process the data
3. Create the model
4. Define the loss function 
5. Optimize for the loss function and evaluation


# Import Packages and Supporting Files


In [None]:
# Import packages
import tensorflow as tf
from tensorflow.keras.applications.vgg19 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.python.keras.preprocessing import image as k_image
import matplotlib.pyplot as plt
import numpy as np
import time

from PIL import Image

%matplotlib inline

In [None]:
import tensorflow as tf
from tensorflow.python.keras.preprocessing import image as kp_image
from tensorflow.python.keras import models 
from tensorflow.python.keras import losses
from tensorflow.python.keras import layers
from tensorflow.python.keras import backend as K

In [None]:
#Download the Content Image and Style Image
!wget 'https://upload.wikimedia.org/wikipedia/commons/c/cd/VanGogh-starry_night.jpg' -O style.jpg
!wget 'https://upload.wikimedia.org/wikipedia/commons/e/e6/T%C3%BCbingen_Neckarfront_August_2013.jpg' -O content.jpg
!wget 'https://raw.githubusercontent.com/17-700/data/master/hw4/styleTransfer/my_styled_image.jpg' -O style_output.jpg

In [None]:
# Set path variables here
content_path = 'content.jpg'
style_path = 'style.jpg'
styled_ref='style_output.jpg'

# Visualize the Inputs and the Reference Stylized Output

In [None]:
def resize_img(path_to_img):
    max_dim = 512
    img = Image.open(path_to_img)
    long = max(img.size)
    scale = max_dim/long
    img = img.resize((round(img.size[0]*scale), round(img.size[1]*scale)), Image.ANTIALIAS)
    img = kp_image.img_to_array(img)
  
    # We need to broadcast the image array such that it has a batch dimension 
    img = np.expand_dims(img, axis=0)
    return img

In [None]:
def imshow(img, title=None):
    # Remove the batch dimension
    out = np.squeeze(img, axis=0)
    # Normalize for display 
    out = out.astype('uint8')
    plt.imshow(out)
    if title is not None:
        plt.title(title)
    plt.imshow(out)

In [None]:
plt.figure(figsize=(10,10))
content = resize_img(content_path).astype('uint8')
style = resize_img(style_path).astype('uint8')
ref_output=resize_img(styled_ref).astype('uint8')
plt.subplot(1, 2, 1)
imshow(content, 'Content Image')
plt.subplot(1, 2, 2)
imshow(style, 'Style Image')
plt.show()
imshow(ref_output,'Reference Output after Style Transfer')
plt.show()

# Prepare the Data
Let's implement some helper functions that will allow us to load and preprocess our images more easily. We will be using VGG as the backbone model. It is worth noting that VGG is trained on images with each channel normalized by `mean = [103.939, 116.779, 123.68]` and with channels BGR.

In [None]:
def load_and_process_img(path_to_img):
    """
    Load the image using the resize_img function and 
    use keras.applications.vgg19.preprocess_input to change the image from RGB to BGR format
    Return: precessed_img
    """
    
    # # TODO: Uncomment the lines below and replace <FILL IN> with appropriate code
    # img = <FILL IN>
    # # Preprocess them with respect to VGG19 stats
    # img = tf.keras.applications.vgg19.preprocess_input(img)
    
    # YOUR CODE HERE
    raise NotImplementedError()
    
    return img
    

In [None]:
# Test load_and_process_img
assert -10> np.mean(load_and_process_img(content_path))> -20

In order to visualize the output image of our model, we need to perform the inverse preprocessing step. Furthermore, since our output image may take its values anywhere between $- \infty$ and $\infty$, we must clip to maintain our values within the 0-255 range.   

In [None]:
def deprocess_img(processed_img):
    """
    Check if the processed_img dimension is 4 or 3, raise expection if less than 3
    else reduce the dimension to 3 and perform inverse of preprocessing step
    Remember that you need to add mean = [103.939, 116.779, 123.68] with channels BGR.
    Also for any image the pixel value should be between 0-255.
    """
    
    x = processed_img.copy()
    if len(x.shape) == 4:
        x = np.squeeze(x, 0)
    assert len(x.shape) == 3, ("Input to deprocess image must be an image of "
                             "dimension [1, height, width, channel] or [height, width, channel]")
    if len(x.shape) != 3:
        raise ValueError("Invalid input to deprocessing image")

    # # TODO: Uncomment the lines below and replace <FILL IN> with appropriate code
    # # add mean
    # <FILL IN>
    # # clip
    # x = <FILL IN>
    
    # YOUR CODE HERE
    raise NotImplementedError()
    
    return x


In [None]:
# Test deprocess_img
content_img_test = load_and_process_img(content_path)
d = deprocess_img(content_img_test)
mean_d = np.mean(d)

assert 200 > mean_d > 50

# Build the Model 
We will load [VGG19](https://keras.io/applications/#vgg19), and feed in our input tensor to the model to extract the feature maps of the content, style, and generated images.


### Extract content and style representations from VGG
For content layers we will use `conv2`  of `block5`.

For style layers we will use `conv1` of each block. For example, `lock1_conv1`.


In [None]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

vgg = tf.keras.applications.vgg19.VGG19(include_top=False, weights='imagenet')
vgg.summary()

In [None]:
'''
Define 4 variables:
`content_layers` and `style_layers`: lists containing the names of the blocks to be used
`num_content_layers` and `num_style_layers`: provide the length of the lists 
'''

# # TODO: Uncomment the lines below and replace <FILL IN> with appropriate code

# content_layers = <FILL IN>
# 
# # Style layers we are interested in
# style_layers = <FILL IN>
# num_content_layers = <FILL IN>
# num_style_layers = <FILL IN>

# YOUR CODE HERE
raise NotImplementedError()

In [None]:
#Test the content_layers 
assert style_layers[1]=='block2_conv1'
assert num_content_layers==1
assert num_style_layers==5

In order to access the intermediate layers corresponding to our style and content feature maps, we will get the corresponding outputs and use the Keras [**Functional API**](https://keras.io/getting-started/functional-api-guide/). We then define our model with the desired output activations. 


In [None]:
def model_VGG():
    """ 
    Creates our model with access to intermediate layers. 
  
    This function will load the VGG19 model and access the intermediate layers. 
    These layers will then be used to create a new model that will take input image
    and return the representations from these intermediate layers via VGG. 
  
    Returns:
        returns a keras model that takes image inputs and outputs the style and 
        content intermediate layers. 
    """
    
    # Load a pretrained VGG on Imagenet
    vgg = tf.keras.applications.vgg19.VGG19(include_top=False, weights='imagenet')
    vgg.trainable = False
    
    # Get output layers corresponding to style and content layers 
    style_outputs = [vgg.get_layer(name).output for name in style_layers]
    
    # # TODO: Uncomment the lines below and replace <FILL IN> with appropriate code
    # content_outputs = <FILL IN>
    # model_outputs = style_outputs + content_outputs
    
    # YOUR CODE HERE
    raise NotImplementedError()

    return models.Model(vgg.input, model_outputs)


## Define the Loss function (content loss + style loss)

### Content Loss

Content loss 

 $$L^l_{content}(p, x) = \sum_{i, j} (F^l_{ij}(x) - P^l_{ij}(p))^2$$

where F is the feature representation of the generated image and P is the feature representation
of the content image layer l. The paper suggests that we use the feature map from the layer
'conv4_2'



In [None]:
def compute_content_loss(base_content, target):
    # return the content loss (see equation above)
    # hint: you can use tf.reduce_mean
    
    # YOUR CODE HERE
    raise NotImplementedError()

In [None]:
# Test the Content Loss Function
content_img=load_and_process_img(content_path)
compute_content_loss(d,content_img).numpy()

assert 30000>compute_content_loss(d,content_img).numpy()>1000

### Style Loss

Style loss of the base input image, $x$, and the style image, $a$, as the distance between the style representation (the gram matrices) of these images. We describe the style representation of an image as the correlation between different filter responses given by the Gram matrix  $G^l$, where $G^l_{ij}$ is the inner product between the vectorized feature map $i$ and $j$ in layer $l$. We can see that $G^l_{ij}$ generated over the feature map for a given image represents the correlation between feature maps $i$ and $j$. 



$$E_l =  \sum_{i,j}(G^l_{ij} - A^l_{ij})^2$$

where $G^l_{ij}$ and $A^l_{ij}$ are the respective style representation in layer $l$ of $x$ and $a$. $N_l$ describes the number of feature maps, each of size $M_l = height * width$. Thus, the total style loss across each layer is 
$$L_{style}(a, x) = \sum_{l \in L} w_l E_l$$
where we weight the contribution of each layer's loss by giving more emphasis to
deep layers. For example, w for ‘conv1_1’ can be 1, then weight for ‘conv2_2’ can be 2, and so on.

### Computing the style loss
We will first implement a distance metric that will be used when calculating the style loss in `compute_style_loss()` below. 

In [None]:
def gram_matrix(input_tensor):
    """ 
    Create and return the gram matrix for `input_tensor`
    Hint: you'll first have to find the channels then 
    reshape the input_tensor, compute gram_matrix and
    divide it by shape of input_tensor
    """

    channels = int(input_tensor.shape[-1])
    
    # # TODO: Uncomment the lines below and replace <FILL IN> with appropriate code
    # a = tf.reshape(<FILL IN>)
    # gram = <FILL IN>
    # n = <FILL IN>
    
    # YOUR CODE HERE
    raise NotImplementedError()
     
    return gram / tf.cast(n, tf.float32)
 

In [None]:
# Test Gram Matrix
sample_tensor_x=tf.constant([[1.,2.,4,2],[1.,2.,4,44.],[2.,2,2.,2],[12.,4.,1,8]])
np_gram=np.mean(gram_matrix(sample_tensor_x).numpy())

assert 200>np_gram>40

In [None]:
def compute_style_loss(base_style, gram_target):
    """
    Expects two images of dimension h, w, c: height, width, num filters of each layer
    Return: style loss (G-A)**2 
    """
    
    height, width, channels = base_style.get_shape().as_list()
    
    # # TODO: Uncomment the lines below and replace <FILL IN> with appropriate code
    # gram_style = <FILL IN>
    
    # YOUR CODE HERE
    raise NotImplementedError()
    
    return tf.reduce_mean(tf.square(gram_style - gram_target)) 

In [None]:
# Test style Loss
sample_tensor_y=tf.constant([[[1.,2.,4],[1.,2.,4],[2.,2,2],[12.,4.,1]],[[1.,2.,4],[1.,2.,4],[2.,2,2.],[12.,4.,1]],[[1.,2.,4],[1.,2.,4],[2.,2,2.],[12.,4.,1]],[[1.,2.,4],[1.,2.,4],[2.,2,2.],[12.,4.,1]]])
sample_tensor_z=tf.constant([[1.,2.,4],[1.,2.,4],[2.,2,2]])
test_style_output=compute_style_loss(sample_tensor_y, sample_tensor_z).numpy()
assert 210>test_style_output>150

## Apply style transfer to our images


In [None]:
def compute_features(model, content_path, style_path):
    """
    This function would help to compute the content and style features
  
    Arguments:
        model: The model that we are using.
        content_path: The path to the content image.
        style_path: The path to the style image
    
    Returns:
        returns the style features and the content features. 
    """
    
    # Load our images in 
    content_image = load_and_process_img(content_path)
    style_image = load_and_process_img(style_path)
    
    # # TODO: Uncomment the lines below and replace <FILL IN> with appropriate code
    
    # # batch compute content and style features
    # style_outputs = model(style_image)
    # content_outputs = <FILL IN>
    # # Get the style and content feature representations from our model  
    # style_features = [style_layer[0] for style_layer in style_outputs[:num_style_layers]]
    # content_features = <FILL IN>
  
    
    # YOUR CODE HERE
    raise NotImplementedError()

    return style_features, content_features


In [None]:
# Test compute_features
test_style_features, test_content_features = compute_features(model_VGG() , content_path, style_path)
test_style_len=len(test_style_features)
test_content_len=len(test_content_features)
test_style_mean=np.mean(test_style_features[0].numpy())
test_content_mean=np.mean(test_content_features[0].numpy())
assert test_style_len>=5
assert test_content_len>=1
assert 30>test_style_mean> 20
assert 25>test_content_mean>15

### Computing the loss and gradients


In [None]:
def calculate_loss(model, loss_weights, init_image, gram_style_features, content_features):
    """
    This function will compute the total loss.
  
    Arguments:
        model: The model that will give us access to the intermediate layers
        loss_weights: The weights of each contribution of each loss function. 
                  (style weight, content weight)
        init_image: the base image
        gram_style_features: Precomputed gram matrices corresponding to the 
                        defined style layers
        content_features: Precomputed outputs from defined content layers  
      
    Returns:
        total_loss, style_loss, content_loss
    """

    style_weight, content_weight = loss_weights
    
    # Feed our init image through our model. This will give us the content and 
    # style representations at our desired layers. Since we're using eager
    # our model is callable just like any other function!
    model_outputs = model(init_image)
  
    # # TODO: Uncomment the lines below and replace <FILL IN> with appropriate code
    # style_output_features = <FILL IN>
    # content_output_features = <FILL IN>

    # YOUR CODE HERE
    raise NotImplementedError()
  
    style_loss = 0
    content_loss = 0

    # Accumulate style losses from all layers
    weight_per_style_layer = [1.0,2.0,3.0,4.0,5.0]
    for target_style, comb_style, wg_layer in zip(gram_style_features, style_output_features,weight_per_style_layer):
        # # TODO: Uncomment the lines below and replace <FILL IN> with appropriate code
        # style_loss += <FILL IN>
        
        # YOUR CODE HERE
        raise NotImplementedError()
        
    # Accumulate content losses from all layers     
    weight_per_content_layer = 1.0 / float(num_content_layers)
    for target_content, comb_content in zip(content_features, content_output_features):
        # # TODO: Uncomment the lines below and replace <FILL IN> with appropriate code
        # content_loss += <FILL IN>
        
        # YOUR CODE HERE
        raise NotImplementedError()
  
    style_loss *= style_weight
    content_loss *= content_weight
    
    
    loss = style_loss + content_loss 
    return loss, style_loss, content_loss

In [None]:
# Test calculate_loss
init_image = tf.convert_to_tensor(load_and_process_img(content_path))
loss_weights=(1000, 0.001)
gram_style_features = [gram_matrix(style_feature) for style_feature in test_style_features]
test_loss, test_style_loss, test_content_loss = calculate_loss(model_VGG(),
                                                             loss_weights,
                                                             init_image,
                                                             gram_style_features,
                                                             test_content_features)
assert 1e16 > test_loss>1e3
assert 1e16 > test_style_loss>1e3
assert test_content_loss>=0

# Training Loop

Now we will execute the training loop. Here we use [**tf.GradientTape**](https://www.tensorflow.org/programmers_guide/eager#computing_gradients) to compute the gradient. 

In [None]:
import IPython.display
def run_style_transfer(content_path, style_path, num_iterations): 
    '''
    Arguments:
      content_path, style_path, num_iterations
      
     Major steps:
      1. Load the VGG model and set layers trainable to False.
        Since we don't need to  train any layers of our model, 
        so we set trainable to false
      2. Get style and content features from compute_features
      3. Compute the gram_matrix for each style feature
      4. set initial image which would get trained to feature the content image 
      5. Define the optimizer. While you can play with learning_rate, beta, epsilon 
        Hint: use Adam with learning_rate=5,
                            beta_1==0.99, 
                            epsilon=1e-1
      6. Define loss weights style_weight, content_weight
      7. Define num_iterations 
      8. Compute_grad using  tf.GradientTape() to compute gradient
     
    Return:
        best_img, best_loss ,imgs
        
    Note that best_img will be used to autograde based on 
    similarity of best_img with our provided styled_ref
    '''

    model = model_VGG() 
    for layer in model.layers:
        layer.trainable = False
    
    # Get the style and content feature representations (from our specified intermediate layers) 
    style_features, content_features = compute_features(model, content_path, style_path)
    gram_style_features = [gram_matrix(style_feature) for style_feature in style_features]
  
    # Set initial image
    init_image = load_and_process_img(content_path)
    init_image = tf.Variable(init_image, dtype=tf.float32)
    
    # # TODO: Uncomment the lines below and replace <FILL IN> with appropriate code
    # # Create our optimizer
    # opt = tf.optimizers.<FILL IN>
    
    # YOUR CODE HERE
    raise NotImplementedError()

    # For displaying intermediate images 
    iter_count = 1
  
    # Store our best result
    best_loss, best_img = float('inf'), None
  
    # Create a nice config 
    content_weight=1e3
    style_weight=1e-2
    loss_weights = (style_weight, content_weight)
    cfg = {
      'model': model,
      'loss_weights': loss_weights,
      'init_image': init_image,
      'gram_style_features': gram_style_features,
      'content_features': content_features
    }
    
    # For displaying
    num_rows = 2
    num_cols = 5
    display_interval = num_iterations/(num_rows*num_cols)
    start_time = time.time()
    global_start = time.time()
  
    norm_means = np.array([103.939, 116.779, 123.68])
    min_vals = -norm_means
    max_vals = 255 - norm_means   
  
    imgs = []

    def compute_grads(cfg):
        with tf.GradientTape() as tape: 
            all_loss = calculate_loss(**cfg)
        # Compute gradients wrt input image
        total_loss = all_loss[0]
        return tape.gradient(total_loss, cfg['init_image']), all_loss

    for i in range(num_iterations):
        
        # # TODO: Uncomment the lines below and replace <FILL IN> with appropriate code
        # grads, all_loss = compute_grads(cfg)
        # loss, style_score, content_score = <FILL IN>
        # opt.<FILL IN>
        # clipped = <FILL IN>
        
        # YOUR CODE HERE
        raise NotImplementedError()
        
        init_image.assign(clipped)
        end_time = time.time() 
    
        if loss < best_loss:
          # Update best loss and best image from total loss. 
            best_loss = loss
            best_img = deprocess_img(init_image.numpy())

        if i % display_interval== 0:
          # Use the .numpy() method to get the concrete numpy array
            plot_img = init_image.numpy()
            plot_img = deprocess_img(plot_img)
            imgs.append(plot_img)
      
            print('Iteration: {}'.format(i))        
            print('Total loss: {:.4e}, ' 
                'style loss: {:.4e}, '
                'content loss: {:.4e}, '
                'time: {:.4f}s'.format(loss, style_score, content_score, time.time() - start_time))
            start_time = time.time()
        print('Total time: {:.4f}s'.format(time.time() - global_start))


    return best_img, best_loss ,imgs

In [None]:
best, best_loss ,imgs = run_style_transfer(content_path,style_path, num_iterations=100)

In [None]:
# select the best image which can pass the similarity test with the provided sample styled image
best=imgs[9]
Image.fromarray(best)

In [None]:
best.shape #Just note the shape of output image (345, 512, 3)

Since there can be variety of stylized outputs we have given a wide margin for testing the output. 

In [None]:
#Test stylized images

def mse(imageA, imageB):
	err = np.sum((imageA.astype("float") - imageB.astype("float")) ** 2)
	err /= float(imageA.shape[0] * imageA.shape[1])
	return err

a=np.array(Image.open(styled_ref))
mse_style_output= mse(a,best)

assert mse_style_output<15000


# Visualize outputs
We "deprocess" the output image in order to remove the processing that was applied to it. 

In [None]:
def show_results(best_img, content_path, style_path, show_large_final=True):
    plt.figure(figsize=(10, 5))
    content = resize_img(content_path) 
    style = resize_img(style_path)

    plt.subplot(1, 2, 1)
    imshow(content, 'Content Image')

    plt.subplot(1, 2, 2)
    imshow(style, 'Style Image')

    if show_large_final: 
        plt.figure(figsize=(10, 10))

        plt.imshow(best_img)
        plt.title('Output Image')
        plt.show()

In [None]:
show_results(best, content_path, style_path)