### GANomaly Architecture
<hr/> 

##### If the resolution increases, the network is also increased!

ex.1) resolution(width,height): 64 -> parameter(MB): 40MB  
ex.2) resolution(width,height): 128 -> parameter(MB): 140MB

![Image of Yaktocat](https://raw.githubusercontent.com/openvinotoolkit/anomalib/main/docs/source/images/ganomaly/architecture.jpg)

Load Library

In [1]:
import torch
import torch.nn as nn
from torchsummary import summary
import numpy as np
from collections import OrderedDict
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 

<< Important Formula >>

- Convolution Shape Formula: (n+2p-f)/s + 1
- Deconvolution Shape Formula: s(n-1)+f-2p

* n: input size
* p: padding
* s: stride
* f: kernel size

GANomaly Encoder

In [2]:
class Encoder(nn.Module):
    def __init__(self, isize, icnl, z_length):
        super().__init__()

        """
        isize: input_size, icnl: input_channel 
        fsize: feature_size, fcnl: feature_channel
        z_length: latent vector z length 
        "isize has to be a 16 x 2^i (i>0)
         ex) 16, 32, 64, 128, 256, ..."
         
        """
        def CBLR2d(in_channels,out_channels,kernel_size,stride=1,padding=0,bias=True,batchnorm=True):
            layers=[]
            layers+=[nn.Conv2d(in_channels=in_channels,out_channels=out_channels,
            kernel_size=kernel_size,stride=stride,padding=padding,bias=bias)]
            if batchnorm:
                layers+=[nn.BatchNorm2d(num_features=out_channels)]
            layers+=[nn.LeakyReLU(0.2, inplace=True)]
            conv=nn.Sequential(*layers)
            return conv

        # make layers
        layers=nn.Sequential()
        layer_num=0

        # first layer
        # (icnl x isize x isize) -> (fcnl x fsize x fsize) // fcnl =64, fsize = isize / 2
        layers.add_module('layer_{}'.format(layer_num),
        CBLR2d(in_channels=icnl, out_channels=64, kernel_size=4, stride=2, padding=1, bias=False, batchnorm=False))
        fsize, fcnl = isize/2, 64
        layer_num+=1

        # other layers
        # (fcnl x fsize x fsize) -> (fcnl*2 x fsize/2 x fsize/2) until fsize become 4
        while fsize > 4:
            layers.add_module('layer_{}'.format(layer_num),
            CBLR2d(in_channels=fcnl, out_channels=fcnl*2, kernel_size=4, stride=2, padding=1, bias=False))
            fsize = fsize/2
            fcnl = fcnl*2
            layer_num+=1
        
        # last layer
        # (fcnl x 4 x 4) -> (z_length x 1 x 1) 
        layers.add_module('layer_{}'.format(layer_num),
        nn.Conv2d(in_channels=fcnl, out_channels=z_length, kernel_size=4, stride=1, padding=0, bias=False))
        
        self.layers=layers

    def forward(self,x):
        output=self.layers(x)
        return output

GANomaly Decoder

In [5]:
class Decoder(nn.Module):
    def __init__(self, osize, ocnl, z_length):
        super().__init__()
        
        """
        osize: output_size, ocnl: output_channel 
        k: (number of convolution in encoder)-1
        fsize: feature_size, fcnl: feature_channel
        z_length: latent vector z length 
        "osize has to be a 16 x 2^i (i>0)
         ex) 16, 32, 64, 128, 256, ..."

        """
        def CTBR2d(in_channels,out_channels,kernel_size,stride=1,padding=0,bias=True,batchnorm=True):
            layers=[]
            layers+=[nn.ConvTranspose2d(in_channels=in_channels,out_channels=out_channels,
            kernel_size=kernel_size,stride=stride,padding=padding,bias=bias)]
            if batchnorm:
                layers+=[nn.BatchNorm2d(num_features=out_channels)]
            layers+=[nn.ReLU(inplace=True)]
            convT=nn.Sequential(*layers)
            return convT
        
        # make layers
        layers=nn.Sequential()
        layer_num=0

        # first layer
        # (z_length x 1 x 1) -> (64*{2^(k-1)} x 4 x 4) // k=log2(osize/4) 
        k = int(np.log2(osize/4))
        out = int(64 * np.power(2,k-1))
        layers.add_module('layer_{}'.format(layer_num),
        CTBR2d(in_channels=z_length, out_channels=out, kernel_size=4, stride=1, padding=0, bias=False))
        fsize, fcnl = 4, out
        layer_num+=1

        # other layers
        # (fcnl x fsize x fsize) -> (fcnl/2 x fsize*2 x fsize*2) until fsize become osize/2
        while fsize < (osize // 2):
            layers.add_module('layer_{}'.format(layer_num),
            CTBR2d(in_channels=fcnl, out_channels=fcnl//2, kernel_size=4, stride=2, padding=1, bias=False))
            fsize = fsize*2
            fcnl = fcnl//2
            layer_num+=1

        # last layer
        # (fcnl x osize/2 x osize/2) -> (ocnl x osize x osize) 
        layer=[]
        layer+=[nn.ConvTranspose2d(in_channels=fcnl,out_channels=ocnl,kernel_size=4,stride=2,padding=1,bias=False)]
        layer+=[nn.Tanh()]
        last_layer=nn.Sequential(*layer)
        layers.add_module('layer_{}'.format(layer_num),last_layer)

        self.layers=layers

    def forward(self,x):
        output=self.layers(x)
        return output

Show Encoder 

(3x64x64 -> 100x1x1)

In [4]:
model=Encoder(isize=64, icnl= 3, z_length=100).to(device)
print(summary(model,(3,64,64)))
print(model.parameters)
# Input Shape: [-1, 3, 64, 64]

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           3,072
         LeakyReLU-2           [-1, 64, 32, 32]               0
            Conv2d-3          [-1, 128, 16, 16]         131,072
       BatchNorm2d-4          [-1, 128, 16, 16]             256
         LeakyReLU-5          [-1, 128, 16, 16]               0
            Conv2d-6            [-1, 256, 8, 8]         524,288
       BatchNorm2d-7            [-1, 256, 8, 8]             512
         LeakyReLU-8            [-1, 256, 8, 8]               0
            Conv2d-9            [-1, 512, 4, 4]       2,097,152
      BatchNorm2d-10            [-1, 512, 4, 4]           1,024
        LeakyReLU-11            [-1, 512, 4, 4]               0
           Conv2d-12            [-1, 100, 1, 1]         819,200
Total params: 3,576,576
Trainable params: 3,576,576
Non-trainable params: 0
---------------------------

Show Decoder

(100x1x1 -> 3x64x64)

In [27]:
model=Decoder(osize=64, ocnl= 3, z_length=100).to(device)
print(summary(model,(100,1,1)))
print(model.parameters)
# Input Shape: [-1, 100, 1, 1]

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
   ConvTranspose2d-1            [-1, 512, 4, 4]         819,200
       BatchNorm2d-2            [-1, 512, 4, 4]           1,024
              ReLU-3            [-1, 512, 4, 4]               0
   ConvTranspose2d-4            [-1, 256, 8, 8]       2,097,152
       BatchNorm2d-5            [-1, 256, 8, 8]             512
              ReLU-6            [-1, 256, 8, 8]               0
   ConvTranspose2d-7          [-1, 128, 16, 16]         524,288
       BatchNorm2d-8          [-1, 128, 16, 16]             256
              ReLU-9          [-1, 128, 16, 16]               0
  ConvTranspose2d-10           [-1, 64, 32, 32]         131,072
      BatchNorm2d-11           [-1, 64, 32, 32]             128
             ReLU-12           [-1, 64, 32, 32]               0
  ConvTranspose2d-13            [-1, 3, 64, 64]           3,072
             Tanh-14            [-1, 3,

Make Generator & Encoder

In [28]:
class NetG(nn.Module):
    """
    GENERATOR & ENCODER NETWORK
    """

    def __init__(self, setting):
        super(NetG, self).__init__()
        self.encoder1 = Encoder(isize=setting.isize, icnl= setting.icnl, z_length=setting.z_length)
        self.decoder = Decoder(osize=setting.isize, ocnl= setting.icnl, z_length=setting.z_length)
        self.encoder2 = Encoder(isize=setting.isize, icnl= setting.icnl, z_length=setting.z_length)

    def forward(self, x):
        z = self.encoder1(x)
        fake = self.decoder(z)
        z_carat = self.encoder2(fake)
        return fake, z, z_carat

Make Discriminator

In [29]:
class NetD(nn.Module):
    """
    DISCRIMINATOR NETWORK
    """

    def __init__(self, setting):
        super(NetD, self).__init__()
        model = Encoder(isize=setting.isize, icnl= setting.icnl, z_length=1) # one class classification
        layers = list(model.layers.children())

        self.features = nn.Sequential(*layers[:-1])
        self.classifier = nn.Sequential(layers[-1])
        self.classifier.add_module('Sigmoid', nn.Sigmoid())

    def forward(self, x):
        features = self.features(x)
        classifier = self.classifier(features)
        classifier = classifier.view(-1, 1).squeeze(1)

        return classifier, features

Make Setting

In [30]:
class setting:
    """
    isize: input_size <isize has to be a 16 x 2^i (i>0)>
    icnl: input_channel 
    z_length: latent vector z length 

    """
    def __init__(self,isize,icnl,z_length):
        self.isize=isize
        self.icnl=icnl
        self.z_length=z_length
    
    def show(self):
        print('isize:',self.isize)
        print('icnl:',self.icnl)
        print('z_length:',self.z_length)

In [31]:
my_setting=setting(isize=64,icnl=3,z_length=100)
my_setting.show()

isize: 64
icnl: 3
z_length: 100


Show Generator & Encoder

(3x64x64 -> 100x1x1 -> 3x64x64 -> 100x1x1)

In [32]:
model=NetG(my_setting).to(device)
print(summary(model,(3,64,64)))
print(model.parameters)
# start: [-1, 3, 64, 64]

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           3,072
         LeakyReLU-2           [-1, 64, 32, 32]               0
            Conv2d-3          [-1, 128, 16, 16]         131,072
       BatchNorm2d-4          [-1, 128, 16, 16]             256
         LeakyReLU-5          [-1, 128, 16, 16]               0
            Conv2d-6            [-1, 256, 8, 8]         524,288
       BatchNorm2d-7            [-1, 256, 8, 8]             512
         LeakyReLU-8            [-1, 256, 8, 8]               0
            Conv2d-9            [-1, 512, 4, 4]       2,097,152
      BatchNorm2d-10            [-1, 512, 4, 4]           1,024
        LeakyReLU-11            [-1, 512, 4, 4]               0
           Conv2d-12            [-1, 100, 1, 1]         819,200
          Encoder-13            [-1, 100, 1, 1]               0
  ConvTranspose2d-14            [-1, 51

Show Discriminator

(3x64x64 -> 1x1x1)

In [33]:
model=NetD(my_setting).to(device)
print(summary(model,(3,64,64)))
print(model.parameters)
# Input Shape: [-1, 3, 64, 64]

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           3,072
         LeakyReLU-2           [-1, 64, 32, 32]               0
            Conv2d-3          [-1, 128, 16, 16]         131,072
       BatchNorm2d-4          [-1, 128, 16, 16]             256
         LeakyReLU-5          [-1, 128, 16, 16]               0
            Conv2d-6            [-1, 256, 8, 8]         524,288
       BatchNorm2d-7            [-1, 256, 8, 8]             512
         LeakyReLU-8            [-1, 256, 8, 8]               0
            Conv2d-9            [-1, 512, 4, 4]       2,097,152
      BatchNorm2d-10            [-1, 512, 4, 4]           1,024
        LeakyReLU-11            [-1, 512, 4, 4]               0
           Conv2d-12              [-1, 1, 1, 1]           8,192
          Sigmoid-13              [-1, 1, 1, 1]               0
Total params: 2,765,568
Trainable param