In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as T
import torch.nn.functional as F
import numpy as np
from torchvision.transforms.functional import resize
from torchvision.utils import save_image
from torchvision.models import vgg19

from PIL import Image
from torchvision.transforms.transforms import Resize

#ENCODER

In [None]:
class VGGEncoder(torch.nn.Module):
    def __init__(self):
        super().__init__()
        vgg = vgg19( weights='DEFAULT').features
        self.slice1 = vgg[: 2]
        self.slice2 = vgg[2: 7]
        self.slice3 = vgg[7: 12]
        self.slice4 = vgg[12: 21]
        for p in self.parameters():
            p.requires_grad = False

    def forward(self, images, output_last_feature=False):
        h1 = self.slice1(images)
        h2 = self.slice2(h1)
        h3 = self.slice3(h2)
        h4 = self.slice4(h3)
        if output_last_feature:
            return h4
        else:
            return h1, h2, h3, h4

# ADAIN

In [None]:
def calc_mean_std(features):
    """
    :param features: shape of features -> [batch_size, c, h, w]
    :return: features_mean, feature_s: shape of mean/std ->[batch_size, c, 1, 1]
    """

    batch_size, c = features.size()[:2]
    features_mean = features.reshape(batch_size, c, -1).mean(dim=2).reshape(batch_size, c, 1, 1)
    features_std = features.reshape(batch_size, c, -1).std(dim=2).reshape(batch_size, c, 1, 1) + 1e-6
    return features_mean, features_std


def adain(content_features, style_features):
    """
    Adaptive Instance Normalization
    :param content_features: shape -> [batch_size, c, h, w]
    :param style_features: shape -> [batch_size, c, h, w]
    :return: normalized_features shape -> [batch_size, c, h, w]
    """
    content_mean, content_std = calc_mean_std(content_features)
    style_mean, style_std = calc_mean_std(style_features)
    normalized_features = style_std * (content_features - content_mean) / content_std + style_mean
    return normalized_features

# Decoder

In [None]:
class RC(torch.nn.Module):
    """A wrapper of ReflectionPad2d and Conv2d"""
    def __init__(self, in_channels, out_channels, kernel_size=3, pad_size=1, activated=True):
        super().__init__()
        self.pad = nn.ReflectionPad2d((pad_size, pad_size, pad_size, pad_size))
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
        self.activated = activated

    def forward(self, x):
        h = self.pad(x)
        h = self.conv(h)
        if self.activated:
            return F.relu(h)
        else:
            return h


class Decoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.rc1 = RC(512, 256, 3, 1)
        self.rc2 = RC(256, 256, 3, 1)
        self.rc3 = RC(256, 256, 3, 1)
        self.rc4 = RC(256, 256, 3, 1)
        self.rc5 = RC(256, 128, 3, 1)
        self.rc6 = RC(128, 128, 3, 1)
        self.rc7 = RC(128, 64, 3, 1)
        self.rc8 = RC(64, 64, 3, 1)
        self.rc9 = RC(64, 3, 3, 1, False)

    def forward(self, features):
        h = self.rc1(features)
        h = F.interpolate(h, scale_factor=2)
        h = self.rc2(h)
        h = self.rc3(h)
        h = self.rc4(h)
        h = self.rc5(h)
        h = F.interpolate(h, scale_factor=2)
        h = self.rc6(h)
        h = self.rc7(h)
        h = F.interpolate(h, scale_factor=2)
        h = self.rc8(h)
        h = self.rc9(h)
        return h

# Image Generation

In [None]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.vgg_encoder = VGGEncoder()
        self.decoder = Decoder()

    def generate(self, content_images,style_images,alpha=1.0):
        """
        Generates stylized images using adain that aligns the mean and variance of the
        content features with those of the style features.

        Args:
            content_images (torch.Tensor): a tensor of shape (batch_size, channels, height, width) representing
                the content image(s)
            style_images (torch.Tensor): a tensor of shape (batch_size, channels, height, width) representing
                the style image(s)
            alpha (float): a value between 0 and 1 indicating the strength of the style transfer.

        Returns:
            out (torch.Tensor): a tensor of shape (batch_size, channels, height, width) representing the
                generated stylized image(s)
        """
        content_features = self.vgg_encoder(content_images, output_last_feature=True)
        style_features = self.vgg_encoder(style_images, output_last_feature=True)
        
        t = adain(content_features, style_features)
        t = alpha * t + (1 - alpha) * content_features
        out = self.decoder(t)
        return out


# Transforms

In [None]:
stats = ((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
def transforms(H,W):
  tfms = T.Compose([
    T.Resize((H,W)),
    T.ToTensor(), 
    T.Normalize(*stats,inplace=True)])
  return tfms

# Denormalization

In [None]:
def denorm(tensor):
  """
  Normalize the input tensor using ImageNet statistics (mean and standard deviation)
  Args:
    tensor: Input tensor to be de-normalized of shape [batch_size, c, h, w]
  Returns:
    res: De-normalized tensor with shape [batch_size, c, h, w]
  """
  std = torch.Tensor([0.229, 0.224, 0.225]).reshape(-1, 1, 1).to(device)
  mean = torch.Tensor([0.485, 0.456, 0.406]).reshape(-1, 1, 1).to(device)
  #res = torch.clamp(tensor * std + mean, 0, 1)
  res = tensor * std + mean
  return res

# Choosing device

In [None]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
 
device = get_default_device()

# Image prediction

In [None]:
def predict_image(content_path, style_path, model,output_path,alpha=1.0):
  """
  Generates a stylized image by aligning the mean and variance of the
  content features with those of the style features using the
  provided model and saves the result to the output path.

  Args:
  content_path (str): the file path to the content image
  style_path (str): the file path to the style image
  model (nn.Module): the pre-trained model used for style transfer
  output_path (str): the file path to save the output image
  alpha (float):  to adjust the degree of stylization. It should be a value between 0 and 1 (default).
  """
  torch.cuda.empty_cache()
  # Load content and style images
  c = Image.open(content_path)
  og_size = c.size
  og_size = (og_size[1], og_size[0])
  s = Image.open(style_path)
  # Converting image to tensors and normalizing them
  if og_size[0]<=2000:
    tfms = transforms(int(og_size[0]), int(og_size[1]))
  else:
    tfms = transforms(int(og_size[0]*0.9), int(og_size[1]*0.9))
  c_tensor = tfms(c).unsqueeze(0).to(device)
  tfms = transforms(s_size[1], s_size[0])
  s_tensor = tfms(s).unsqueeze(0).to(device)
  # Generate stylized image using the model
  model.eval()
  with torch.no_grad():
    out = model.generate(c_tensor, s_tensor, alpha)
  img = out.squeeze()
  # Denormalizing the image and saving it
  img = denorm(img).cpu().detach()
  save_image(resize(img,size=(og_size)), f'{output_path}.jpg', nrow=1)
  torch.cuda.empty_cache()


# File paths

In [None]:
# Creating a folder for outputs
!mkdir /content/output

mkdir: cannot create directory ‘/content/output’: File exists


In [None]:
# Loading the pre-trained model.
model = Model().to(device)
model.load_state_dict(torch.load('/content/drive/MyDrive/ADAIN/final.pth'))

<All keys matched successfully>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os

In [None]:
# creating 9 images with different degrees of stylization ranging from 0 to 1 with an increase in 0.125 at every step 
for i in np.arange(0,1.1,0.125):
  predict_image(content_path='/content/istockphoto-1359275011-170667a.jpg',style_path ='/content/starry-night-g89b7431a8_1920.jpg',
             model=model,output_path=f'/content/output/{str(i)}',alpha =i)

In [None]:
file = []
file.append('/content/pexels-pixabay-208745.jpg')
file.append('/content/starry-night-g89b7431a8_1920.jpg')
names = os.listdir('/content/output')
names.sort()
# Appending names of files in the output folder.
for i in names:
  file.append(i)

In [None]:
def show_images(names):
    # Parameters for our graph; we'll output images in a 5x4 configuration
    nrows = 12
    ncols = 3
    alpha = 0.0
    fontsize = 60
    fig = plt.gcf()
    fig.set_size_inches(ncols * 20, nrows * 20)
    for i in range(len(names)):
      if i == 0 or i ==1:
        img = mpimg.imread(names[i])
      else:
        img = mpimg.imread('/content/output/'+names[i])
        # Set up subplot; subplot indices start at 1
      sp = plt.subplot(nrows, ncols, i + 1)
      sp.axis('Off') # Don't show axes (or gridlines)
      plt.imshow(img)
      if  i ==0:
        plt.title('Content',fontsize=fontsize)
      elif i==1:
        plt.title('Style',fontsize= fontsize)
      else:
        plt.title(f'α = {str(alpha)}',fontsize=fontsize)
        alpha = alpha + 0.125
      plt.savefig('/content/alpha.png', bbox_inches='tight')


In [None]:
show_images(file)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
import numpy as np
import cv2

# read three images of different sizes
img1 = cv2.imread('/content/pexels-pixabay-208745.jpg')
img2 = cv2.imread('/content/starry-night-g89b7431a8_1920.jpg')
img3 = cv2.imread('/content/output/1.0.jpg')

# determine the maximum height and width of the images
h_max = max(img1.shape[0], img2.shape[0], img3.shape[0])
w_max = img1.shape[1] + img2.shape[1] + img3.shape[1]

# create an empty array of the required size
img_concat = np.zeros((h_max, w_max, 3), dtype=np.uint8)

# copy the individual images into the appropriate locations in the array
img_concat[:img1.shape[0], :img1.shape[1], :] = img1
img_concat[:img2.shape[0], img1.shape[1]:img1.shape[1]+img2.shape[1], :] = img2
img_concat[:img3.shape[0], img1.shape[1]+img2.shape[1]:, :] = img3

# save the concatenated image
cv2.imwrite('/content/img_concat.jpg', img_concat)


True