In [1]:
import torch
import os
from torchsummary import summary
from realesrgan import RealESRGANer
from basicsr.archs.rrdbnet_arch import RRDBNet

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"


In [2]:
torch.cuda.is_available()

True

In [3]:
# Upsampler

model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64)

upsampler = RealESRGANer(
    scale=4,
    model_path='realesrgan/experiments/pretrained_models/RealESRGAN_x4plus.pth',
    model=model,
    tile=False,
    tile_pad=10,
    pre_pad=0,
    half=False)

In [7]:
from src.helpers import utils
from compress import make_deterministic
from src.loss.perceptual_similarity import perceptual_loss as ps
from default_config import ModelModes

In [8]:
# Compression

# Reproducibility
make_deterministic()
perceptual_loss_fn = ps.PerceptualLoss(model='net-lin', net='alex', use_gpu=torch.cuda.is_available())

# Load model
device = torch.device('cpu')#utils.get_device()
print(device)
logger = utils.logger_setup(logpath=os.path.join('images', 'logs'), filepath=os.path.abspath('1'))
loaded_args, compression, _ = utils.load_model('experiments/hific_low.pt', logger, device, model_mode=ModelModes.EVALUATION,
    current_args_d=None, prediction=True, strict=False)
print('logger done')

#compression.Hyperprior.hyperprior_entropy_model.build_tables()

Setting up Perceptual loss...


Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to C:\Users\user/.cache\torch\hub\checkpoints\alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:26<00:00, 9.39MB/s] 
21:26:07 INFO - logger_setup: d:\Documents\project\thesis\thesis\1


Loading model from: d:\Documents\project\thesis\thesis\src\loss\perceptual_similarity\weights\v0.1\alex.pth
...[net-lin [alex]] initialized
...Done
cpu
Building prior probability tables...


100%|██████████| 64/64 [00:00<00:00, 84.56it/s] 


Setting up Perceptual loss...
Loading model from: d:\Documents\project\thesis\thesis\src\loss\perceptual_similarity\weights\v0.1\alex.pth
...[net-lin [alex]] initialized
...Done


21:26:42 INFO - load_model: Loading model ...
21:26:42 INFO - load_model: MODEL TYPE: compression_gan
21:26:42 INFO - load_model: MODEL MODE: evaluation
21:26:42 INFO - load_model: Model(
  (Encoder): Encoder(
    (pre_pad): ReflectionPad2d((3, 3, 3, 3))
    (asymmetric_pad): ReflectionPad2d((0, 1, 1, 0))
    (post_pad): ReflectionPad2d((1, 1, 1, 1))
    (conv_block1): Sequential(
      (0): ReflectionPad2d((3, 3, 3, 3))
      (1): Conv2d(3, 60, kernel_size=(7, 7), stride=(1, 1))
      (2): ChannelNorm2D()
      (3): ReLU()
    )
    (conv_block2): Sequential(
      (0): ReflectionPad2d((0, 1, 1, 0))
      (1): Conv2d(60, 120, kernel_size=(3, 3), stride=(2, 2), padding_mode=reflect)
      (2): ChannelNorm2D()
      (3): ReLU()
    )
    (conv_block3): Sequential(
      (0): ReflectionPad2d((0, 1, 1, 0))
      (1): Conv2d(120, 240, kernel_size=(3, 3), stride=(2, 2), padding_mode=reflect)
      (2): ChannelNorm2D()
      (3): ReLU()
    )
    (conv_block4): Sequential(
      (0): Reflect

logger done


In [9]:
# Input size: [2, 220, 8, 8]

input = torch.rand(1, 220, 8, 8)

result = compression.Generator(input)
summary(compression.Generator, (220, 8, 8), 1, device='cpu')


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
     ChannelNorm2D-1             [1, 220, 8, 8]               0
   ReflectionPad2d-2           [1, 220, 10, 10]               0
   ReflectionPad2d-3           [1, 220, 10, 10]               0
            Conv2d-4             [1, 960, 8, 8]       1,901,760
     ChannelNorm2D-5             [1, 960, 8, 8]               0
   ReflectionPad2d-6           [1, 960, 10, 10]               0
            Conv2d-7             [1, 960, 8, 8]       8,295,360
     ChannelNorm2D-8             [1, 960, 8, 8]               0
   ReflectionPad2d-9           [1, 960, 10, 10]               0
           Conv2d-10             [1, 960, 8, 8]       8,295,360
    ChannelNorm2D-11             [1, 960, 8, 8]               0
    ResidualBlock-12             [1, 960, 8, 8]               0
  ReflectionPad2d-13           [1, 960, 10, 10]               0
           Conv2d-14             [1, 96

In [10]:
summary(compression, (3, 128, 128), 2, device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
   ReflectionPad2d-1           [2, 3, 134, 134]               0
   ReflectionPad2d-2           [2, 3, 134, 134]               0
            Conv2d-3          [2, 60, 128, 128]           8,880
     ChannelNorm2D-4          [2, 60, 128, 128]               0
              ReLU-5          [2, 60, 128, 128]               0
   ReflectionPad2d-6          [2, 60, 129, 129]               0
   ReflectionPad2d-7          [2, 60, 129, 129]               0
   ReflectionPad2d-8          [2, 60, 129, 129]               0
   ReflectionPad2d-9          [2, 60, 129, 129]               0
  ReflectionPad2d-10          [2, 60, 129, 129]               0
           Conv2d-11           [2, 120, 64, 64]          64,920
    ChannelNorm2D-12           [2, 120, 64, 64]               0
             ReLU-13           [2, 120, 64, 64]               0
  ReflectionPad2d-14           [2, 120,

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


TypeError: can't multiply sequence by non-int of type 'list'

In [None]:
summary(upsampler.model, (3, 128, 128), 1, device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [1, 64, 128, 128]           1,792
            Conv2d-2          [1, 32, 128, 128]          18,464
         LeakyReLU-3          [1, 32, 128, 128]               0
            Conv2d-4          [1, 32, 128, 128]          27,680
         LeakyReLU-5          [1, 32, 128, 128]               0
            Conv2d-6          [1, 32, 128, 128]          36,896
         LeakyReLU-7          [1, 32, 128, 128]               0
            Conv2d-8          [1, 32, 128, 128]          46,112
         LeakyReLU-9          [1, 32, 128, 128]               0
           Conv2d-10          [1, 64, 128, 128]         110,656
ResidualDenseBlock-11          [1, 64, 128, 128]               0
           Conv2d-12          [1, 32, 128, 128]          18,464
        LeakyReLU-13          [1, 32, 128, 128]               0
           Conv2d-14          [1, 32, 

In [11]:
compression.train(False)
compression(torch.rand((16, 3, 128, 128)))

(tensor([[[[0.7201, 0.7382, 0.7393,  ..., 0.7338, 0.7193, 0.7259],
           [0.7143, 0.7198, 0.7212,  ..., 0.7274, 0.7201, 0.7341],
           [0.7176, 0.7298, 0.7260,  ..., 0.7360, 0.7179, 0.7346],
           ...,
           [0.7763, 0.7779, 0.7661,  ..., 0.7281, 0.7511, 0.7639],
           [0.7703, 0.7638, 0.7451,  ..., 0.7346, 0.7423, 0.7643],
           [0.7617, 0.7592, 0.7430,  ..., 0.7546, 0.7471, 0.7668]],
 
          [[0.7550, 0.7638, 0.7597,  ..., 0.7348, 0.7218, 0.7184],
           [0.7441, 0.7400, 0.7376,  ..., 0.7182, 0.7135, 0.7192],
           [0.7437, 0.7480, 0.7374,  ..., 0.7219, 0.7080, 0.7198],
           ...,
           [0.7594, 0.7492, 0.7375,  ..., 0.7098, 0.7295, 0.7316],
           [0.7596, 0.7451, 0.7266,  ..., 0.7154, 0.7159, 0.7251],
           [0.7531, 0.7498, 0.7340,  ..., 0.7366, 0.7112, 0.7150]],
 
          [[0.7413, 0.7609, 0.7575,  ..., 0.7574, 0.7380, 0.7422],
           [0.7231, 0.7309, 0.7265,  ..., 0.7483, 0.7332, 0.7423],
           [0.7188, 0.73

In [12]:
class Model(torch.nn.Module):
    def __init__(self, compression, upsampler):
        super().__init__()
        self.compression = compression
        self.upsampler = upsampler
    
    def forward(self, x):
        y, loss = self.compression(x)
        y = self.upsampler(y)
        return y

    def train(self, train=True):
        self.compression.train(False)
        self.compression.Generator.train(train)
        self.upsampler.train(train)

In [13]:
m = Model(compression, upsampler.model)

In [14]:
x = torch.rand(1, 3, 128, 128)
result = m(x)

print(result.shape)

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor

### Remarks
- Conv2D with 2x2 padding that's equivalent to Conv2DTranspose with no padding.
- Checkerboard artifacts can start to become an issue when using strides (even after stacking multiple layers).

### Things to try: 

1. To avoid checkerboard artifacts, an alternative upsampling method that’s gaining popularity is to apply classical upsampling followed by a regular convolution (that preserves the spatial dimensions).

In [17]:
import torch
import torch.nn.functional as F
from src.normalisation import channel, instance

class ResidualBlock(torch.nn.Module):
    def __init__(self, input_dims, kernel_size=3, stride=1, 
                 channel_norm=True, activation='relu'):
        """
        input_dims: Dimension of input tensor (B,C,H,W)
        """
        super(ResidualBlock, self).__init__()

        self.activation = getattr(F, activation)
        in_channels = input_dims[1]
        norm_kwargs = dict(momentum=0.1, affine=True, track_running_stats=False)

        if channel_norm is True:
            self.interlayer_norm = channel.ChannelNorm2D_wrap
        else:
            self.interlayer_norm = instance.InstanceNorm2D_wrap

        pad_size = int((kernel_size-1)/2)
        self.pad = torch.nn.ReflectionPad2d(pad_size)
        self.conv1 = torch.nn.Conv2d(in_channels, in_channels, kernel_size, stride=stride)
        self.conv2 = torch.nn.Conv2d(in_channels, in_channels, kernel_size, stride=stride)
        self.norm1 = self.interlayer_norm(in_channels, **norm_kwargs)
        self.norm2 = self.interlayer_norm(in_channels, **norm_kwargs)

    def forward(self, x):
        identity_map = x
        res = self.pad(x)
        res = self.conv1(res)
        res = self.norm1(res) 
        res = self.activation(res)

        res = self.pad(res)
        res = self.conv2(res)
        res = self.norm2(res)

        return torch.add(res, identity_map)

class Upsampler(torch.nn.Module):

    def __init__(self, input_dims, batch_size, C=220, activation='relu',
                 n_residual_blocks=8, channel_norm=True, sample_noise=False,
                 noise_dim=32, silent=True):
        super(Upsampler, self).__init__()
        self.silent = silent

        kernel_dim = 3
        filters = [960, 480, 240, 120, 60]
        self.n_residual_blocks = n_residual_blocks
        self.sample_noise = sample_noise
        self.noise_dim = noise_dim

        # Layer / normalization options
        cnn_kwargs = dict(stride=2, padding=1, output_padding=1)
        norm_kwargs = dict(momentum=0.1, affine=True, track_running_stats=False)
        activation_d = dict(relu='ReLU', elu='ELU', leaky_relu='LeakyReLU')
        self.activation = getattr(torch.nn, activation_d[activation])  # (leaky_relu, relu, elu)
        self.n_upsampling_layers = 4
        
        if channel_norm is True:
            self.interlayer_norm = channel.ChannelNorm2D_wrap
        else:
            self.interlayer_norm = instance.InstanceNorm2D_wrap

        self.pre_pad = torch.nn.ReflectionPad2d(1)
        self.asymmetric_pad = torch.nn.ReflectionPad2d((0,1,1,0))  # Slower than tensorflow?
        self.post_pad = torch.nn.ReflectionPad2d(3)

        H0, W0 = input_dims[1:]
        heights = [2**i for i in range(5,9)]
        widths = heights
        H1, H2, H3, H4 = heights
        W1, W2, W3, W4 = widths 


        # (16,16) -> (16,16), with implicit padding
        self.conv_block_init = torch.nn.Sequential(
            self.interlayer_norm(C, **norm_kwargs),
            self.pre_pad,
            torch.nn.Conv2d(C, filters[0], kernel_size=(3,3), stride=1),
            self.interlayer_norm(filters[0], **norm_kwargs),
        )

        if sample_noise is True:
            # Concat noise with latent representation
            filters[0] += self.noise_dim

        for m in range(n_residual_blocks):
            resblock_m = ResidualBlock(input_dims=(batch_size, filters[0], H0, W0), 
                channel_norm=channel_norm, activation=activation)
            self.add_module(f'resblock_{str(m)}', resblock_m)
        
        self.upconv_block1 = torch.nn.Sequential(
            torch.nn.ConvTranspose2d(filters[0], filters[1], kernel_dim, **cnn_kwargs),
            self.interlayer_norm(filters[1], **norm_kwargs),
            self.activation(),
        )
        self.upconv_block2 = torch.nn.Sequential(
            torch.nn.ConvTranspose2d(filters[1], filters[2], kernel_dim, **cnn_kwargs),
            self.interlayer_norm(filters[2], **norm_kwargs),
            self.activation(),
        )
        self.upconv_block3 = torch.nn.Sequential(
            torch.nn.ConvTranspose2d(filters[2], filters[3], kernel_dim, **cnn_kwargs),
            self.interlayer_norm(filters[3], **norm_kwargs),
            self.activation(),
        )
        self.upconv_block4 = torch.nn.Sequential(
            torch.nn.ConvTranspose2d(filters[3], filters[4], kernel_dim, **cnn_kwargs),
            self.interlayer_norm(filters[4], **norm_kwargs),
            self.activation(),
        )
        self.conv_block_out = torch.nn.Sequential(
            self.post_pad,
            torch.nn.Conv2d(filters[-1], 3, kernel_size=(7,7), stride=1),
        )

    def forward(self, x):
        if not self.silent:
            print(f'{"INPUT : ": <15}', x.shape)
        head = self.conv_block_init(x)
        if not self.silent:
            print(f'{"INITIAL_CONV : ": <15}', head.shape)

        if self.sample_noise is True:
            B, C, H, W = tuple(head.size())
            z = torch.randn((B, self.noise_dim, H, W)).to(head)
            head = torch.cat((head,z), dim=1)

        for m in range(self.n_residual_blocks):
            resblock_m = getattr(self, f'resblock_{str(m)}')
            if m == 0:
                x = resblock_m(head)
            else:
                x = resblock_m(x)
            if not self.silent:
                print(f'{f"RESIDUAL_{m}": <15}', x.shape)
        
        x += head
        x = self.upconv_block1(x)
        if not self.silent:
            print(f'{"UPCONV_1 : ": <15}', x.shape)
        x = self.upconv_block2(x)
        if not self.silent:
            print(f'{"UPCONV_2 : ": <15}', x.shape)
        x = self.upconv_block3(x)
        if not self.silent:
            print(f'{"UPCONV_3 : ": <15}', x.shape)
        x = torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest')
        if not self.silent:
            print(f'{"BILINEAR : ": <15}', x.shape)
        x = self.upconv_block4(x)
        if not self.silent:
            print(f'{"UPCONV_4 : ": <15}', x.shape)
        out = self.conv_block_out(x)
        if not self.silent:
            print(f'{"UPCONV_5 : ": <15}', out.shape)

        return out

In [5]:
upsampler = Upsampler([220, 8, 8], 2)

input = torch.rand(2, 220, 8, 8)
output = upsampler(input)

print(output.shape)

INPUT :         torch.Size([2, 220, 8, 8])
INITIAL_CONV :  torch.Size([2, 960, 8, 8])
RESIDUAL_0      torch.Size([2, 960, 8, 8])
RESIDUAL_1      torch.Size([2, 960, 8, 8])
RESIDUAL_2      torch.Size([2, 960, 8, 8])
RESIDUAL_3      torch.Size([2, 960, 8, 8])
RESIDUAL_4      torch.Size([2, 960, 8, 8])
RESIDUAL_5      torch.Size([2, 960, 8, 8])
RESIDUAL_6      torch.Size([2, 960, 8, 8])
RESIDUAL_7      torch.Size([2, 960, 8, 8])
UPCONV_1 :      torch.Size([2, 480, 16, 16])
UPCONV_2 :      torch.Size([2, 240, 32, 32])
UPCONV_3 :      torch.Size([2, 120, 64, 64])
BILINEAR :      torch.Size([2, 120, 128, 128])
UPCONV_4 :      torch.Size([2, 60, 256, 256])
UPCONV_5 :      torch.Size([2, 3, 256, 256])
torch.Size([2, 3, 256, 256])


In [17]:
summary(upsampler, (220, 8, 8), 2, device='cpu')

INPUT :         torch.Size([2, 220, 8, 8])
INITIAL_CONV :  torch.Size([2, 960, 8, 8])
RESIDUAL_0      torch.Size([2, 960, 8, 8])
RESIDUAL_1      torch.Size([2, 960, 8, 8])
RESIDUAL_2      torch.Size([2, 960, 8, 8])
RESIDUAL_3      torch.Size([2, 960, 8, 8])
RESIDUAL_4      torch.Size([2, 960, 8, 8])
RESIDUAL_5      torch.Size([2, 960, 8, 8])
RESIDUAL_6      torch.Size([2, 960, 8, 8])
RESIDUAL_7      torch.Size([2, 960, 8, 8])
UPCONV_1 :      torch.Size([2, 480, 16, 16])
UPCONV_2 :      torch.Size([2, 240, 32, 32])
UPCONV_3 :      torch.Size([2, 120, 64, 64])
BILINEAR :      torch.Size([2, 120, 128, 128])
UPCONV_4 :      torch.Size([2, 60, 256, 256])
UPCONV_5 :      torch.Size([2, 3, 256, 256])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
     ChannelNorm2D-1             [2, 220, 8, 8]               0
   ReflectionPad2d-2           [2, 220, 10, 10]               0
   ReflectionPad2d-3           [2, 220, 

Try using interpolation to upscale feature maps. Need to match shapes.

In [18]:
import torch
from torch.nn import functional as F

shape = (2, 220, 8, 8)

input = torch.rand(shape)
output1 = F.interpolate(input, scale_factor=2, mode='nearest')

print(input.shape, output1.shape)

torch.Size([2, 220, 8, 8]) torch.Size([2, 220, 16, 16])


### Load model from checkpoint and modify structure of Generator

In [18]:
# Compression
import torch
from src.helpers import utils
from compress import make_deterministic
from src.loss.perceptual_similarity import perceptual_loss as ps
from default_config import ModelModes

# Reproducibility
make_deterministic()
perceptual_loss_fn = ps.PerceptualLoss(model='net-lin', net='alex', use_gpu=torch.cuda.is_available())

# Load model
device = torch.device('cpu')#utils.get_device()
logger = utils.logger_setup(logpath=os.path.join('images', 'logs'), filepath=os.path.abspath('1'))
loaded_args, compression, _ = utils.load_model('experiments/hific_low.pt', logger, device, model_mode=ModelModes.EVALUATION,
    current_args_d=None, prediction=True, strict=False, silent=True)

#compression.Hyperprior.hyperprior_entropy_model.build_tables()

Setting up Perceptual loss...
Loading model from: d:\Documents\project\thesis\thesis\src\loss\perceptual_similarity\weights\v0.1\alex.pth


15:21:22 INFO - logger_setup: d:\Documents\project\thesis\thesis\1
15:21:22 INFO - logger_setup: d:\Documents\project\thesis\thesis\1


...[net-lin [alex]] initialized
...Done
Building prior probability tables...


100%|██████████| 64/64 [00:02<00:00, 22.89it/s]


Setting up Perceptual loss...
Loading model from: d:\Documents\project\thesis\thesis\src\loss\perceptual_similarity\weights\v0.1\alex.pth
...[net-lin [alex]] initialized
...Done


15:22:08 INFO - load_model: Loading model ...
15:22:08 INFO - load_model: Loading model ...
15:22:08 INFO - load_model: Estimated model size (under fp32): 593.146 MB
15:22:08 INFO - load_model: Estimated model size (under fp32): 593.146 MB
15:22:08 INFO - load_model: Model init 46.549s
15:22:08 INFO - load_model: Model init 46.549s


In [19]:
upsampler = Upsampler((220, 8, 8), 1)

In [20]:
compression.train(False)
compression.Generator = upsampler
compression.Generator.train(True)

Upsampler(
  (pre_pad): ReflectionPad2d((1, 1, 1, 1))
  (asymmetric_pad): ReflectionPad2d((0, 1, 1, 0))
  (post_pad): ReflectionPad2d((3, 3, 3, 3))
  (conv_block_init): Sequential(
    (0): ChannelNorm2D()
    (1): ReflectionPad2d((1, 1, 1, 1))
    (2): Conv2d(220, 960, kernel_size=(3, 3), stride=(1, 1))
    (3): ChannelNorm2D()
  )
  (resblock_0): ResidualBlock(
    (pad): ReflectionPad2d((1, 1, 1, 1))
    (conv1): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1))
    (conv2): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1))
    (norm1): ChannelNorm2D()
    (norm2): ChannelNorm2D()
  )
  (resblock_1): ResidualBlock(
    (pad): ReflectionPad2d((1, 1, 1, 1))
    (conv1): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1))
    (conv2): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1))
    (norm1): ChannelNorm2D()
    (norm2): ChannelNorm2D()
  )
  (resblock_2): ResidualBlock(
    (pad): ReflectionPad2d((1, 1, 1, 1))
    (conv1): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1))


In [21]:
input = torch.rand((1, 3, 128, 128))
output = compression(input)

print(f'OUTPUT SHAPE : {output[0].shape}')

OUTPUT SHAPE : torch.Size([1, 3, 128, 128])
