### Task: Neural Style Transfer 

#### For this task, I have selected two datasets. First, the dataset of best artworks of all time, and other is the dataset of Images of Dragon Ball Z characters. I will choose a style image from the art dataset and train the CNN network to transfer its style to the anime characters' images.

I have used the autoencoder architecture in which the original image will be taken as an input, and the 

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from PIL import Image
import matplotlib.pyplot as plt
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
import cv2

  from .autonotebook import tqdm as notebook_tqdm


### Creating the Dataset

In [3]:
class ContentDataset(Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.transform = transform

        files = os.listdir(folder_path)
        image_files = [file for file in files if file.lower().endswith(('.png', '.jpg', '.jpeg'))]

        self.images = [Image.open(os.path.join(folder_path, image_file)) for image_file in image_files]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]

        if self.transform:
            image = self.transform(image)

        return image

# Define a transformation to be applied to the images
transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
]) 

# Create a custom dataset
dataset = ContentDataset(folder_path="C:\\Users\\awast\\Downloads\\archive_3", transform=transform)

In [4]:
dataset_size=len(dataset)
dataset_size

3145

#### Importing Pretrained VGG network

In [5]:
import torchvision.models as models


In [7]:


def plot_image(tensor):
#     if not torch.jit.is_scripting() and not torch.jit.is_tracing():
#         _log_api_usage_once(save_image)
#     grid = make_grid(tensor, **kwargs)
    # Add 0.5 after unnormalizing to [0, 255] to round to the nearest integer
    ndarr = tensor.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to("cpu", torch.uint8).numpy()
    im = Image.fromarray(ndarr)
    plt.imshow(im)

# Load image file
def load_image(path):
    # Images loaded as BGR
    img = cv2.imread(path)
    return img


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# image_size = 224



# original_img = dataset[0]
# style_img = load_image(styles[2])

## Creating the Model

#### I have used an encoder decoder architecture, which will take the original image as the input and output the stylized image as the output

In [None]:
import torch
import torch.nn as nn

class ConvLayer(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, norm="instance"):
        super(ConvLayer, self).__init__()
        # Padding Layers
        padding_size = kernel_size // 2
        self.reflection_pad = nn.ReflectionPad2d(padding_size)

        # Convolution Layer
        self.conv_layer = nn.Conv2d(in_channels, out_channels, kernel_size, stride)

        # Normalization Layers
        self.norm_type = norm
        if (norm=="instance"):
            self.norm_layer = nn.InstanceNorm2d(out_channels, affine=True)
        elif (norm=="batch"):
            self.norm_layer = nn.BatchNorm2d(out_channels, affine=True)

    def forward(self, x):
        x = self.reflection_pad(x)
        x = self.conv_layer(x)
        if (self.norm_type=="None"):
            out = x
        else:
            out = self.norm_layer(x)
        return out

class ResidualLayer(nn.Module):
    
    def __init__(self, channels=128, kernel_size=3):
        super(ResidualLayer, self).__init__()
        self.conv1 = ConvLayer(channels, channels, kernel_size, stride=1)
        self.relu = nn.ReLU()
        self.conv2 = ConvLayer(channels, channels, kernel_size, stride=1)

    def forward(self, x):
        identity = x                     # preserve residual
        out = self.relu(self.conv1(x))   # 1st conv layer + activation
        out = self.conv2(out)            # 2nd conv layer
        out = out + identity             # add residual
        return out

class DeconvLayer(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, output_padding, norm="instance"):
        super(DeconvLayer, self).__init__()

        # Transposed Convolution 
        padding_size = kernel_size // 2
        self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride, padding_size, output_padding)

        # Normalization Layers
        self.norm_type = norm
        if (norm=="instance"):
            self.norm_layer = nn.InstanceNorm2d(out_channels, affine=True)
        elif (norm=="batch"):
            self.norm_layer = nn.BatchNorm2d(out_channels, affine=True)

    def forward(self, x):
        x = self.conv_transpose(x)
        if (self.norm_type=="None"):
            out = x
        else:
            out = self.norm_layer(x)
        return out

### EncoderDecoder Architecture

In [8]:
class EncoderDecoder(nn.Module):
   
    def __init__(self):
        super(EncoderDecoder, self).__init__()
        self.ConvBlock = nn.Sequential(
            ConvLayer(3, 32, 9, 1),
            nn.ReLU(),
            ConvLayer(32, 64, 3, 2),
            nn.ReLU(),
            ConvLayer(64, 128, 3, 2),
            nn.ReLU()
        )
        self.ResidualBlock = nn.Sequential(
            ResidualLayer(128, 3), 
            ResidualLayer(128, 3), 
            ResidualLayer(128, 3), 
            ResidualLayer(128, 3), 
            ResidualLayer(128, 3)
        )
        self.DeconvBlock = nn.Sequential(
            DeconvLayer(128, 64, 3, 2, 1),
            nn.ReLU(),
            DeconvLayer(64, 32, 3, 2, 1),
            nn.ReLU(),
            ConvLayer(32, 3, 9, 1, norm="None")
        )

    def forward(self, x):
        x = self.ConvBlock(x)
        x = self.ResidualBlock(x)
        out = self.DeconvBlock(x)
        return out

#### Using Pretrained VGG16 with original Caffe weights trained of ImageNet

For Feature extraction, outputs from layers 3,8,15,22 will be used to calculate the style loss and the content loss

In [9]:
class VGG16(nn.Module):
    def __init__(self, vgg_path="vgg16-00b39a1b.pth"):
        super(VGG16, self).__init__()
        # Load VGG Skeleton, Pretrained Weights
        vgg16_features = models.vgg16(pretrained=False)
        vgg16_features.load_state_dict(torch.load(vgg_path), strict=False)
        self.features = vgg16_features.features

        # Turn-off Gradient History
        for param in self.features.parameters():
            param.requires_grad = False

    def forward(self, x):
        layers = {'3': 'relu1_2', '8': 'relu2_2', '15': 'relu3_3', '22': 'relu4_3'}
        features = {}
        for name, layer in self.features._modules.items():
            x = layer(x)
            if name in layers:
                features[layers[name]] = x
                if (name=='22'):
                    break

        return features

In [10]:
# train_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True)

#### Calculating Gram Matrix to quantify style loss

In [11]:
# Gram Matrix
def gram(tensor):
    B, C, H, W = tensor.shape
    x = tensor.view(B, C, H*W)
    x_t = x.transpose(1, 2)
    return  torch.bmm(x, x_t) / (C*H*W)

#### Defining Some utility functions

In [12]:
image_size=224
# Show image
def show(img):
    
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    img = np.array(img/255).clip(0,1)
    
    plt.figure(figsize=(10, 5))
    plt.imshow(img)
    plt.show()

def saveimg(img, image_path):
    img = img.clip(0, 255)
    cv2.imwrite(image_path, img)

# Preprocessing ~ Image to Tensor
def image_to_tensor(img, max_size=None):
    # Rescale the image
    if (max_size==None):
        itot_t = transforms.Compose([
            #transforms.ToPILImage(),
#             transforms.Resize(image_size),
            transforms.ToTensor(),
            transforms.Lambda(lambda x: x.mul(255))
        ])    
    else:
        H, W, C = img.shape
        image_size = tuple([int((float(max_size) / max([H,W]))*x) for x in [H, W]])
        itot_t = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize(image_size),
            transforms.ToTensor(),
            transforms.Lambda(lambda x: x.mul(255))
        ])

    # Convert image to tensor
    tensor = itot_t(img)

    # Add the batch_size dimension
    tensor = tensor.unsqueeze(dim=0)
    return tensor

# Preprocessing ~ Tensor to Image
def tensor_to_image(tensor):
    
    tensor = tensor.squeeze()
    #img = ttoi_t(tensor)
    img = tensor.cpu().numpy()
    
    # Transpose from [C, H, W] -> [H, W, C]
    img = img.transpose(1, 2, 0)
    return img


In [13]:
# plt.imshow(styles[3])
# STYLE_IMAGE_PATH = "images/mosaic.jpg"
style_datapath="C:\\Users\\awast\\Downloads\\artworks"
files = os.listdir(style_datapath)
image_files = [file for file in files if file.lower().endswith(('.png', '.jpg', '.jpeg'))]

styles = [Image.open(os.path.join(style_datapath, image_file)) for image_file in image_files]


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import random
import numpy as np
import time

# import vgg
# import transformer
# import utils

TRAIN_IMAGE_SIZE = 224
DATASET_PATH = "C:\\Users\\awast\\Downloads\\dragon_ball"
NUM_EPOCHS = 2


BATCH_SIZE = 4 
CONTENT_WEIGHT = 1
STYLE_WEIGHT = 10
ADAM_LR = 0.001
SAVE_MODEL_PATH = "C:/Users/awast/dashtoon/models/"
SAVE_IMAGE_PATH = "C:/Users/awast/dashtoon/images/out/"
SAVE_MODEL_EVERY = 100 # 2,000 Images with batch size 4
SEED = 35
train_dataset = datasets.ImageFolder(DATASET_PATH, transform=transform)
train_set, val_set = torch.utils.data.random_split(train_dataset, [3000, 145])
train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False)

def train():
    # Seeds
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    np.random.seed(SEED)
    random.seed(SEED)

    # Device
    device = ("cuda" if torch.cuda.is_available() else "cpu")

    # Dataset and Dataloader
    transform = transforms.Compose([
        transforms.Resize(TRAIN_IMAGE_SIZE),
        transforms.CenterCrop(TRAIN_IMAGE_SIZE),
        transforms.ToTensor(),
        transforms.Lambda(lambda x: x.mul(255))
    ])
   

    # Load networks
    TransformerNetwork = EncoderDecoder().to(device)
    VGG =VGG16().to(device)

    # Saving Style Features in Advance as they wont change and we wont have to recompute them
    imagenet_neg_mean = torch.tensor([-103.939, -116.779, -123.68], dtype=torch.float32).reshape(1,3,1,1).to(device)
    style_image = load_image("C:\\Users\\awast\\Downloads\\artworks\\Pablo_Picasso_381.jpg")
    style_tensor = image_to_tensor(style_image).to(device)
    style_tensor = style_tensor.add(imagenet_neg_mean)
    B, C, H, W = style_tensor.shape
    style_features = VGG(style_tensor.expand([BATCH_SIZE, C, H, W]))
    style_gram = {}
    for key, value in style_features.items():
        style_gram[key] = gram(value)

    # Optimizer
    optimizer = optim.Adam(TransformerNetwork.parameters(), lr=ADAM_LR)

    # Loss trackers
    content_loss_history = []
    style_loss_history = []
    total_loss_history = []
    batch_content_loss_sum = 0
    batch_style_loss_sum = 0
    batch_total_loss_sum = 0

    # Optimization/Training Loop
    batch_count = 1
    start_time = time.time()
    for epoch in range(NUM_EPOCHS):
        print("========Epoch {}/{}========".format(epoch+1, NUM_EPOCHS))
        for content_batch, _ in train_loader:
            # Get current batch size in case of odd batch sizes
            curr_batch_size = content_batch.shape[0]

            # Free-up unneeded cuda memory
            torch.cuda.empty_cache()

            # Zero-out Gradients
            optimizer.zero_grad()

            # Generate images and get features
            content_batch = content_batch[:,[2,1,0]].to(device)
            generated_batch = TransformerNetwork(content_batch)
            content_features = VGG(content_batch.add(imagenet_neg_mean))
            generated_features = VGG(generated_batch.add(imagenet_neg_mean))

            # Content Loss
            MSELoss = nn.MSELoss().to(device)
            content_loss = CONTENT_WEIGHT * MSELoss(generated_features['relu2_2'], content_features['relu2_2'])            
            batch_content_loss_sum += content_loss

            # Style Loss
            style_loss = 0
            for key, value in generated_features.items():
                s_loss = MSELoss(gram(value), style_gram[key][:curr_batch_size])
                style_loss += s_loss
            style_loss *= STYLE_WEIGHT
            batch_style_loss_sum += style_loss.item()

            # Total Loss
            total_loss = content_loss + style_loss
            batch_total_loss_sum += total_loss.item()

            # Backprop and Weight Update
            total_loss.backward()
            optimizer.step()

            # Save Model and Print Losses
            if (((batch_count-1)%SAVE_MODEL_EVERY == 0) or (batch_count==NUM_EPOCHS*len(train_loader))):
                # Print Losses
                
                # Save Model
                checkpoint_path = SAVE_MODEL_PATH + "checkpoint_" + str(batch_count-1) + ".pth"
                torch.save(TransformerNetwork.state_dict(), checkpoint_path)
                

                # Save sample generated image
                sample_tensor = generated_batch[0].clone().detach().unsqueeze(dim=0)
                sample_image = tensor_to_image(sample_tensor.clone().detach())
                sample_image_path = SAVE_IMAGE_PATH + "sample2_" + str(batch_count-1) + ".png"
                saveimg(sample_image, sample_image_path)
                print("Saved sample tranformed image at {}".format(sample_image_path))

                # Save loss histories
                content_loss_history.append(batch_total_loss_sum/batch_count)
                style_loss_history.append(batch_style_loss_sum/batch_count)
                total_loss_history.append(batch_total_loss_sum/batch_count)

            # Iterate Batch Counter
            batch_count+=1


    # Save TransformerNetwork weights
    TransformerNetwork.eval()
    TransformerNetwork.cpu()
    final_path = SAVE_MODEL_PATH + "transformer_weight.pth"
#     print("Saving TransformerNetwork weights at {}".format(final_path))
    torch.save(TransformerNetwork.state_dict(), final_path)
#     print("Done saving final model")

#     # Plot Loss Histories
#     if (PLOT_LOSS):
#         utils.plot_loss_hist(content_loss_history, style_loss_history, total_loss_history)

train()



	Content Loss:	318455.59
	Style Loss:	35116648.00
	Total Loss:	35435104.00
Time elapsed:	0.4330017566680908 seconds
Saved TransformerNetwork checkpoint file at C:/Users/awast/dashtoon/models/checkpoint_0.pth
Saved sample tranformed image at C:/Users/awast/dashtoon/images/out/sample2_0.png
	Content Loss:	443173.94
	Style Loss:	18303304.47
	Total Loss:	18746478.42
Time elapsed:	19.906001567840576 seconds
Saved TransformerNetwork checkpoint file at C:/Users/awast/dashtoon/models/checkpoint_100.pth
Saved sample tranformed image at C:/Users/awast/dashtoon/images/out/sample2_100.png
	Content Loss:	497884.44
	Style Loss:	12354724.98
	Total Loss:	12852609.36
Time elapsed:	41.65099835395813 seconds
Saved TransformerNetwork checkpoint file at C:/Users/awast/dashtoon/models/checkpoint_200.pth
Saved sample tranformed image at C:/Users/awast/dashtoon/images/out/sample2_200.png
	Content Loss:	528413.81
	Style Loss:	9056620.03
	Total Loss:	9585034.00
Time elapsed:	64.04099941253662 seconds
Saved Tran

#### Examples of Results

In [None]:
def test():
    # Device
    device = ("cuda" if torch.cuda.is_available() else "cpu")

    # Load Transformer Network
    net = EncoderDecoder()
    net.load_state_dict(torch.load(SAVE_MODEL_PATH + "transformer_weight.pth"))
    net = net.to(device)

    with torch.no_grad():
#         content_image_path = input("Enter the image path: ")
        content_image = load_image(content_image_path)
        
        content_tensor = utils.image_to_t(content_image).to(device)
        generated_tensor = net(content_tensor)
        generated_image = tensor_to_image(generated_tensor.detach())
        
        show(generated_image)
        saveimg(generated_image, "generated.jpg")