# Style Transfer with PyTorch

Style transfer jupyter notebook based on the paper from LA Gatys et al.: 
https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Gatys_Image_Style_Transfer_CVPR_2016_paper.pdf 

This notebook uses PyTorch.

In [1]:
import torch
import torch.optim as optim
from torchvision import transforms, models
import numpy as np
from PIL import Image, ImageOps
import sys

In [2]:
# Configure filenames, parameters etc.

# Filenames of images to process 
# One of each must be in both the content_images and he style_images
# The result will be stored using the same filename but in the output_images folder
image_filenames = ['image1.jpg']

# Paths for three types of images
content_images = './content_images/'
style_images = './style_images/'
output_images = './output_images/'

# Define weights for style layers
style_weights = {'conv1_1': 1.,
                 'conv2_1': 0.8,
                 'conv3_1': 0.6,
                 'conv4_1': 0.4,
                 'conv5_1': 0.2}

# Define content and style weight
content_weight = 1
style_weight = 1000000



# Define the number of iterations for each image loop
iterations = 5000



# Image size to use
output_max_size = 800
# output size 600x600 uses appox. 3 GB memory in you GPU


Check if GPU with CUDA is available and then load the VGG19 model

In [3]:
# Check if GPU w/CUDA is available else use CPU
torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load pretrained VGG19 feature layers into GPU (or regular memory for CPU when no GPU w/CUDA is available)
vgg19_features = models.vgg19(pretrained=True).features.to(torch_device)
vgg19_features.to(torch_device)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace)
  (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (17): ReLU(inplace)
  (18): MaxPool2d(kernel_size=2, stride=2, padding=0, 

Now define some helper functions

In [4]:
def images_loader(image_file):
    """Function to load content and style images, resize, convert to tensor, and normalize"""
    content_image = Image.open(content_images + image_file).convert('RGB')
    style_image = Image.open(style_images + image_file).convert('RGB')
    
    # define normalizer based based on mean and std defined here: 
    # https://discuss.pytorch.org/t/whats-the-range-of-the-input-value-desired-to-use-pretrained-resnet152-and-vgg19/1683
    transform = transforms.Compose([transforms.ToTensor(),
                                    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])
    # resize image 
    current_size = content_image.size
    xy_ratio = current_size[0]/current_size[1]
    output_size = list(current_size)
    # print(xy_ratio)
    if (max(current_size) > output_max_size):
        if (output_max_size < 1):
            output_size[0] = output_max_size
            output_size[1] = int(output_max_size * xy_ratio)
        else: 
            output_size[0] = int(output_max_size * xy_ratio)
            output_size[1] = output_max_size 
    
    # convert to tensor, normalize also remove alpha channel and add batch dimension
    content_image = transform(content_image.resize(output_size, Image.ANTIALIAS))[:3,:,:].unsqueeze(0)
    style_image = transform(style_image.resize(output_size, Image.ANTIALIAS))[:3,:,:].unsqueeze(0)
    
    return content_image, style_image


def get_features(image, model):
    """Run image through the model""" 
    x = image
    features = {}
    # Layers for the content and style representations of image. For more details see 
    # https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Gatys_Image_Style_Transfer_CVPR_2016_paper.pdf
    style_layers = {'0': 'conv1_1',
                    '5': 'conv2_1',
                    '10': 'conv3_1',
                    '19': 'conv4_1',
                    '28': 'conv5_1'}
    content_layers = {'21': 'conv4_2'}
    for name, layer in model._modules.items():
        x = layer(x)
        if name in style_layers:
            features[style_layers[name]] = x
        if name in content_layers:
            features[content_layers[name]] = x           
    return features


def gram(x):
    """Computer gram matrix"""
    # Compute gram matrix (matrix multiplied with its transpose matrix)
    _, c, h, w = x.size()
    x = x.view(c, h * w)
    # multiply x with its transpose
    x = torch.mm(x, x.t())
    return x
  
    
def convert_back_to_image(tensor, image_filename):
    """Convert tensor back to (PIL) image and save it on disk"""
    # Move tensor to regular memory (CPU) etc.
    image = tensor.to("cpu").clone().detach().numpy().squeeze().transpose(1,2,0)
    # Reverse the normalization applied earlier
    image = image * np.array((0.229, 0.224, 0.225)) + np.array((0.485, 0.456, 0.406))
    # Get rid of values smaller than 0 and bigger than 1
    image = image.clip(0, 1)
    # Convert from array to image 
    image = Image.fromarray((image * 255).astype(np.uint8))
    # ... and save 
    image.save(image_filename)
    return image

Now loop through the images and create the new images

In [None]:
# print out a little warning when no GPU with CUDA present
if torch_device == torch.device("cpu"):
    print('Running on CPU. This will take a while ...')

    
    
# Iterate over the list of filenames
for image_filename in image_filenames:
    
    print(f'Processing image: {image_filename}')
    
    content_image, style_image = images_loader(image_filename)
    content_image = content_image.to(torch_device)
    style_image = style_image.to(torch_device)
    
    # Content and style features
    content_features = get_features(content_image, vgg19_features)
    style_features = get_features(style_image, vgg19_features)
    # Gram matrices 
    style_grams = {layer: gram(style_features[layer]) for layer in style_features}
    
    # Create output image as copy from content_image (as opposed to random, zeros etc.)
    output_image = content_image.clone().requires_grad_(True).to(torch_device)

    # Use Adam optimizer, set learing rate 
    optimizer = optim.Adam([output_image], lr=0.003)
    
    # Now loop x times (number of iteration defined by variable iteration above) over the model
    for i in range(iterations):
        output_features = get_features(output_image, vgg19_features)

        # Content loss
        content_loss = torch.mean((output_features['conv4_2'] - content_features['conv4_2'])**2)
        #  Initialize the style loss with zero
        style_loss = 0
        # Iterate through layers and compute style loss
        for layer in style_weights:
            output_feature = output_features[layer]
            _, c, h, w = output_feature.shape
            output_gram = gram(output_feature)
            style_gram = style_grams[layer]
            layer_style_loss = style_weights[layer] * torch.mean((output_gram - style_gram)**2)
            style_loss += layer_style_loss / (c * h * w)

        # Compute loss    
        total_loss = content_weight * content_loss + style_weight * style_loss
        # Clear gradients
        optimizer.zero_grad()

        if (i < iterations):
            # This is not the last iteration -> retain graph for next iteration
            total_loss.backward(retain_graph=True)
        else:
            # This is the last iteration ... do not retain graph
            total_loss.backward(retain_graph=False)
        optimizer.step()
        
        if not(i % 100):
            sys.stdout.write('.')

    print()
    print(f'Saving output image: {image_filename}')
    # now save the resulting image in the output_images folder
    convert_back_to_image(output_image, output_images + image_filename)
    
    del content_image
    del style_image
    torch.cuda.empty_cache()
    

Processing image: image1.jpg
........

In [None]:
image_filenames