In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.nn.functional as F
from PIL import Image
from torchvision.models import vgg19, VGG19_Weights

In [2]:
style_img = Image.open("./notebook_images/starry_night.jpg")

In [3]:
img_size = 128

transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
])

In [None]:
style_tensor = transform(style_img)
style_tensor = style_tensor.unsqueeze(0)
print(f"style_tensor shape: {style_tensor.shape}")

In [None]:
noise_tensor = torch.randn(1, 3, 128, 128)
print(f"noise_tneosr shape: {noise_tensor.shape}")

In [None]:
vgg19_model = vgg19(weights=VGG19_Weights.DEFAULT).features.eval()

model = nn.Sequential()
i = 0
for layer in vgg19_model.children():
    if isinstance(layer, nn.Conv2d):
        i += 1
        name = f'conv_{i}'
    elif isinstance(layer, nn.ReLU):
        name = f'relu_{i}'
        layer = nn.ReLU(inplace=False)
    elif isinstance(layer, nn.MaxPool2d):
        layer = nn.AvgPool2d(2, 2)
        name = f'pool_{i}'
    elif isinstance(layer, nn.BatchNorm2d):
        name = f'bn_{i}'
    
    model.add_module(name, layer)

    if name == f'conv_5':
        break

print(model)

In [7]:
def gram_matrix(input):
    a, b, c, d = input.size()  # a=배치 크기(=1)
    # b=특징 맵의 수
    # (c,d)=특징 맵의 차원 (N=c*d)

    features = input.view(a * b, c * d)  # F_XL을 \hat F_XL로 크기 조정

    G = torch.mm(features, features.t())  # gram product를 계산

    # 각 특징 맵이 갖는 값의 수로 나누어
    # gram 행렬의 값을 '정규화'
    return G.div(a * b * c * d)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = model.to(device)
style_tensor_output = style_tensor.clone().to(device)

style_tensor_results = {}
for name, layer in model.named_children():
    style_tensor_output = layer(style_tensor_output)
    style_tensor_results[name] = style_tensor_output

required_tensor = torch.nn.Parameter(noise_tensor.clone().to(device))
optimizer = optim.SGD([required_tensor])

epochs = 1000
for epoch in range(epochs):
    optimizer.zero_grad()

    output_tensor = required_tensor
    with torch.no_grad():
        output_tensor.clamp_(0, 1) # replace

    loss = 0.0
    for name, layer in model.named_children():
        output_tensor = layer(output_tensor)

        if name == "conv_1":
            style_G = gram_matrix(style_tensor_results["conv_1"].detach())
            noise_G = gram_matrix(output_tensor)
            loss += F.mse_loss(noise_G, style_G)
        elif name == "conv_2":
            style_G = gram_matrix(style_tensor_results["conv_2"].detach())
            noise_G = gram_matrix(output_tensor)
            loss += F.mse_loss(noise_G, style_G)
        elif name == "conv_3":
            style_G = gram_matrix(style_tensor_results["conv_3"].detach())
            noise_G = gram_matrix(output_tensor)
            loss += F.mse_loss(noise_G, style_G)
        elif name == "conv_4":
            style_G = gram_matrix(style_tensor_results["conv_4"].detach())
            noise_G = gram_matrix(output_tensor)
            loss += F.mse_loss(noise_G, style_G)
        elif name == "conv_5":
            style_G = gram_matrix(style_tensor_results["conv_5"].detach())
            noise_G = gram_matrix(output_tensor)
            loss += F.mse_loss(noise_G, style_G)
        
        loss *= 1000

    loss.backward()
    optimizer.step()

    if epoch % 50 == 0:
        print(f"{epoch}/{epochs} loss: {loss.item():.4f}")

    if loss == 0.0:
        print(f"Train finished")
        break

with torch.no_grad():
    output_tensor.clamp_(0, 1) # replace