In [119]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import cv2 
import copy

In [86]:
MAX_IMAGE_SIZE = 300

OPTIMIZER = 'adam' 
ADAM_LR = 10
CONTENT_WEIGHT = 5e0
STYLE_WEIGHT = 1e2
TV_WEIGHT = 1e-3
NUM_ITER = 500
SHOW_ITER = 100


In [100]:
MODEL_PATH = "model/vgg19-d01eb7cb.pth"
CONTENT_IMG = "content_img/content10.jpg"
STYLE_IMG = "style_img/style1.jpg"
IMAGE_HEIGHT = 225
IMAGE_WIDTH = 300
COLOR_CHANNELS = 3
DEVICE = "cuda"

In [102]:
content_image = matplotlib.pyplot.imread(CONTENT_IMG)
style_image = matplotlib.pyplot.imread(STYLE_IMG)
content_tensor = itot(content_image).to(device)
style_tensor = itot(style_image).to(device)

(225, 300)
(225, 300)


In [108]:
def noise_tensor(content_tensor, init_image='random'):
    B, C, H, W = content_tensor.shape
    tensor = torch.randn(C, H, W).mul(0.001).unsqueeze(0)
    
    return tensor

content_tensor = itot(content_image).to(device)
style_tensor = itot(style_image).to(device)
g = noise_tensor(content_tensor)
g = g.to(device).requires_grad_(True)

(225, 300)
(225, 300)


AttributeError: 'str' object has no attribute 'shape'

In [95]:
generate_noise_image(content_tensor, noise_ratio = 0.6)

torch.Size([1, 225, 300, 3]) torch.Size([1, 3, 384, 512])


RuntimeError: The size of tensor a (3) must match the size of tensor b (512) at non-singleton dimension 3

torch.Size([225, 300, 3])

In [93]:

def itot(img):
    H, W, C = img.shape
    image_size = tuple([int((float(MAX_IMAGE_SIZE) / max([H,W]))*x) for x in [H, W]])
    print(image_size)
    itot_t = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(image_size),
        transforms.ToTensor()
    ])
    
    normalize_t = transforms.Normalize([103.939, 116.779, 123.68],[1,1,1])
    tensor = normalize_t(itot_t(img)*255)   # multiplying by 255 because .ToTensor() method convert it in range (0,1)
    
    # Add the batch_size dimension
    tensor = tensor.unsqueeze(dim=0)
    return tensor

def ttoi(tensor):
    ttoi_t = transforms.Compose([
        transforms.Normalize([-103.939, -116.779, -123.68],[1,1,1])]
    )
    
    # Remove the batch_size dimension
    tensor = tensor.squeeze()
    img = ttoi_t(tensor)
    img = img.cpu().numpy()
    
    # Transpose from [C, H, W] -> [H, W, C]
    img = img.transpose(1, 2, 0)
    return img


In [70]:
vgg = models.vgg19(pretrained=False)
vgg.load_state_dict(torch.load(MODEL_PATH), strict=False)

# Change Pooling Layer
def pool_(model, pool='avg'):
    if (pool=='avg'):
        ct=0
        for layer in model.children():
            if isinstance(layer, nn.MaxPool2d):
                model[ct] = nn.AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=False)
            ct+=1
            
    return model

print(vgg.features)
model = copy.deepcopy(vgg.features)
model.to(DEVICE)                     # load model to GPU

# Turn-off unnecessary gradient tracking
for param in model.parameters():
    param.requires_grad = False

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (17): ReLU(inplace=True)
  (18): MaxPoo

In [74]:
mse_loss = torch.nn.MSELoss()

In [113]:
def content_loss(a_C, a_G):
    J_content = mse_loss(a_G,a_C)
    
    return J_content

def gram(tensor):
    B, C, H, W = tensor.shape
    x = tensor.view(C, H*W)
    
    return torch.mm(x, x.t())

def style_loss(a_G,a_S):
    c1,c2 = a_G.shape
    loss = mse_loss(a_G, a_S)
    return loss / 4*(c1**2) 
    

In [114]:
def get_features(model, tensor):
    layers = {
        '3': 'relu1_2',   # Style layers
        '8': 'relu2_2',
        '17' : 'relu3_3',
        '26' : 'relu4_3',
        '35' : 'relu5_3',
        '22' : 'relu4_2', # Content layers
    }
    
    # Get features
    features = {}
    x = tensor
    for name, layer in model._modules.items():
        x = layer(x)
        if name in layers:
            if (name=='22'):   # relu4_2
                features[layers[name]] = x
            elif (name=='31'): # relu5_2
                features[layers[name]] = x
            else:
                b, c, h, w = x.shape
                features[layers[name]] = gram(x) / (h*w)
                
            # Terminate forward pass
            if (name == '35'):
                break
            
    return features

In [120]:
def fit(iteration=NUM_ITER):     
    # Get features representations/Forward pass
    content_layers = ['relu4_2']
    style_layers = ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3', 'relu5_3']
    style_weights = {'relu1_2': 0.2, 'relu2_2': 0.2, 'relu3_3': 0.2, 'relu4_3': 0.2, 'relu5_3': 0.2}
    c_feat = get_features(model, content_tensor)
    s_feat = get_features(model, style_tensor)
    
    i = [0]
    while i[0] < iteration:
        def closure():
            # Zero-out gradients
            optimizer.zero_grad()

            # Forward pass
            g_feat = get_features(model, g)

            # Compute Losses
            c_loss=0
            s_loss=0
            for j in content_layers:
                c_loss += content_loss(g_feat[j], c_feat[j])
            for j in style_layers:
                s_loss += style_weights[j] * style_loss(g_feat[j], s_feat[j])
            
            c_loss = CONTENT_WEIGHT * c_loss
            s_loss = STYLE_WEIGHT * s_loss
            total_loss = c_loss + s_loss

            # Backprop
            total_loss.backward(retain_graph=True)
            
            # Print Loss, show and save image
            i[0]+=1
            if (((i[0] % SHOW_ITER) == 1) or (i[0]==NUM_ITER)):
                print("Style Loss: {} Content Loss: {} TV Total Loss : {}".format(s_loss.item(), c_loss.item(), total_loss.item()))
#                 if (PRESERVE_COLOR=='True'):
#                     g_ = transfer_color(ttoi(content_tensor.clone().detach()), ttoi(g.clone().detach()))
#                 else:
                g_ = ttoi(g.clone().detach())
                plt.show(g_)
                saveimg(g_, i[0]-1)
                plt.show()
            
            return (total_loss)
        
        # Weight/Pixel update
        optimizer.step(closure)

    return g

In [121]:
optimizer = optim.Adam([g], lr=ADAM_LR)
out = fit(iteration=NUM_ITER)

Style Loss: 1.1932658847648317e+18 Content Loss: 3126704.5 TV Total Loss : 1.1932658847648317e+18


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()