In [2]:
import torch
import torch.nn as nn
import torch.nn.init as init
from torchinfo import summary

import easydict

from network_module import *


In [2]:
opt = easydict.EasyDict({
    "data_dir": '../dataset',
    "input_length": 220500,
    "image_height": 1025,
    "image_width": 431,
    "bbox_shape": 120,
    "mask_type": 'time_masking',
    "in_channels" : 2,
    "out_channels" : 1,
    "latent_channels" : 32,
    "pad_type": 'zero',
    "activation": 'lrelu',
    "norm":'in',
    "init_type":'xavier',
    "init_gain":0.02,
    "stage_num": 1,
    "batch_size": 4,
    })

In [15]:
class jj_Discriminator(nn.Module):
    def __init__(self, input_nc=1, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, use_sigmoid=True):
        super(jj_Discriminator, self).__init__()
        self.n_layers = n_layers
        self.use_sigmoid = True
        use_bias = norm_layer == nn.InstanceNorm2d
        self.relu = nn.LeakyReLU(0.2, True)

        self.conv1 = nn.Conv2d(input_nc, ndf, kernel_size=(1, 4), stride=(1, 2), padding=(0, 1), bias=use_bias)
        self.bn1 = norm_layer(ndf)
        nf_mult = 1
        for n in range(1, n_layers):
            nf_mult_prev = nf_mult
            nf_mult = min(2**n, 8)
            self.add_module('conv2_' + str(n), nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult,
                          kernel_size=(3, 3), stride=2, padding=1, bias=use_bias))
            self.add_module('norm_' + str(n), norm_layer(ndf * nf_mult))
        nf_mult_prev = nf_mult
        nf_mult = min(2**n_layers, 8)

        self.conv3 = nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult,
                      kernel_size=3, stride=1, padding=1, bias=use_bias)
        self.norm3 = norm_layer(ndf * nf_mult)
        self.conv4 = nn.Conv2d(ndf * nf_mult, 1,
                      kernel_size=3, stride=1, padding=1, bias=use_bias)
        if use_sigmoid:
            self.sig = nn.Sigmoid()

    def forward(self, input):
        net = self.conv1(input)
        netn = self.relu(self.bn1(net))
        for n in range(1, self.n_layers):
            netn = self._modules['conv2_' + str(n)](netn)
            netn = self._modules['norm_' + str(n)](netn)
            netn = self.relu(netn)
        net = self.conv3(netn)
        net = self.norm3(net)
        net = self.relu(net)
        net = self.conv4(net)
        if self.use_sigmoid:
            net = self.sig(net)
        return net

In [16]:
discriminator = jj_Discriminator()
summary(discriminator, (4, 1, 1024, 428), device='cpu')

Layer (type:depth-idx)                   Output Shape              Param #
jj_Discriminator                         --                        --
├─Conv2d: 1-1                            [4, 64, 1024, 214]        256
├─BatchNorm2d: 1-2                       [4, 64, 1024, 214]        128
├─LeakyReLU: 1-3                         [4, 64, 1024, 214]        --
├─Conv2d: 1-4                            [4, 128, 512, 107]        73,728
├─BatchNorm2d: 1-5                       [4, 128, 512, 107]        256
├─LeakyReLU: 1-6                         [4, 128, 512, 107]        --
├─Conv2d: 1-7                            [4, 256, 256, 54]         294,912
├─BatchNorm2d: 1-8                       [4, 256, 256, 54]         512
├─LeakyReLU: 1-9                         [4, 256, 256, 54]         --
├─Conv2d: 1-10                           [4, 512, 256, 54]         1,179,648
├─BatchNorm2d: 1-11                      [4, 512, 256, 54]         1,024
├─LeakyReLU: 1-12                        [4, 512, 256, 54]    

In [47]:
class jj_Discriminator(nn.Module):
    def __init__(self, input_nc=2, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, use_sigmoid=True):
        super(jj_Discriminator2, self).__init__()
        self.n_layers = n_layers
        self.use_sigmoid = True
        use_bias = norm_layer == nn.InstanceNorm2d
        self.relu = nn.LeakyReLU(0.2, True)

        self.conv1 = nn.Conv2d(input_nc, ndf, kernel_size=(1, 4), stride=(1, 2), padding=(0, 1), bias=use_bias)
        self.bn1 = norm_layer(ndf)
        nf_mult = 1
        for n in range(1, n_layers):
            nf_mult_prev = nf_mult
            nf_mult = min(2**n, 8)
            self.add_module('conv2_' + str(n), nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult,
                          kernel_size=(3, 3), stride=2, padding=1, bias=use_bias))
            self.add_module('norm_' + str(n), norm_layer(ndf * nf_mult))
        nf_mult_prev = nf_mult
        nf_mult = min(2**n_layers, 8)

        self.conv3 = nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult,
                      kernel_size=3, stride=1, padding=1, bias=use_bias)
        self.norm3 = norm_layer(ndf * nf_mult)
        self.conv4 = nn.Conv2d(ndf * nf_mult, 1,
                      kernel_size=3, stride=1, padding=1, bias=use_bias)
        self.fc1 = nn.Linear(256*54, 1)
        if use_sigmoid:
            self.sig = nn.Sigmoid()

    def forward(self, input, mask):
        batch_size = input.shape[0]
        input_cat = torch.cat([input, mask], 1)
        net = self.conv1(input_cat)
        netn = self.relu(self.bn1(net))
        for n in range(1, self.n_layers):
            netn = self._modules['conv2_' + str(n)](netn)
            netn = self._modules['norm_' + str(n)](netn)
            netn = self.relu(netn)
        net = self.conv3(netn)
        net = self.norm3(net)
        net = self.relu(net)
        net = self.conv4(net)
        net = net.view(batch_size, -1)
        net = self.fc1(net)
        if self.use_sigmoid:
            net = self.sig(net)
        return net

In [48]:
discriminator = jj_Discriminator()
summary(discriminator, [(4, 1, 1024, 428), (4, 1, 1024, 428)],  device='cpu')

Layer (type:depth-idx)                   Output Shape              Param #
jj_Discriminator2                        --                        --
├─Conv2d: 1-1                            [4, 64, 1024, 214]        512
├─BatchNorm2d: 1-2                       [4, 64, 1024, 214]        128
├─LeakyReLU: 1-3                         [4, 64, 1024, 214]        --
├─Conv2d: 1-4                            [4, 128, 512, 107]        73,728
├─BatchNorm2d: 1-5                       [4, 128, 512, 107]        256
├─LeakyReLU: 1-6                         [4, 128, 512, 107]        --
├─Conv2d: 1-7                            [4, 256, 256, 54]         294,912
├─BatchNorm2d: 1-8                       [4, 256, 256, 54]         512
├─LeakyReLU: 1-9                         [4, 256, 256, 54]         --
├─Conv2d: 1-10                           [4, 512, 256, 54]         1,179,648
├─BatchNorm2d: 1-11                      [4, 512, 256, 54]         1,024
├─LeakyReLU: 1-12                        [4, 512, 256, 54]    

In [48]:
a = torch.rand([4, 1])

In [50]:
torch.mean(a), a.mean()

(tensor(0.5344), tensor(0.5344))

In [35]:

class Discriminator(nn.Module):
    def __init__(self, img_size, dim):
        """
        img_size : (int, int, int)
            Height and width must be powers of 2.  E.g. (32, 32, 1) or
            (64, 128, 3). Last number indicates number of channels, e.g. 1 for
            grayscale or 3 for RGB
        """
        super(Discriminator, self).__init__()

        self.img_size = img_size

        self.image_to_features = nn.Sequential(
            nn.Conv2d(self.img_size[2], dim, 4, 2, 1),
            nn.LeakyReLU(0.2),
            nn.Conv2d(dim, 2 * dim, 4, 2, 1),
            nn.LeakyReLU(0.2),
            nn.Conv2d(2 * dim, 4 * dim, 4, 2, 1),
            nn.LeakyReLU(0.2),
            nn.Conv2d(4 * dim, 8 * dim, 4, 2, 1),
            nn.Sigmoid()
        )

        # 4 convolutions of stride 2, i.e. halving of size everytime
        # So output size will be 8 * (img_size / 2 ^ 4) * (img_size / 2 ^ 4)
        output_size = int(8 * dim * (img_size[0] / 16) * (img_size[1] / 16))
        self.features_to_prob = nn.Sequential(
            nn.Linear(output_size, 1),
            nn.Sigmoid()
        )

    def forward(self, input_data):
        batch_size = input_data.size()[0]
        x = self.image_to_features(input_data)
        x = x.view(batch_size, -1)
        return self.features_to_prob(x)

In [38]:
discriminator = Discriminator(img_size=[256, 256, 1], dim=32)

In [41]:
summary(discriminator, (4, 1, 256, 256), device='cpu')

Layer (type:depth-idx)                   Output Shape              Param #
Discriminator                            --                        --
├─Sequential: 1-1                        [4, 256, 16, 16]          --
│    └─Conv2d: 2-1                       [4, 32, 128, 128]         544
│    └─LeakyReLU: 2-2                    [4, 32, 128, 128]         --
│    └─Conv2d: 2-3                       [4, 64, 64, 64]           32,832
│    └─LeakyReLU: 2-4                    [4, 64, 64, 64]           --
│    └─Conv2d: 2-5                       [4, 128, 32, 32]          131,200
│    └─LeakyReLU: 2-6                    [4, 128, 32, 32]          --
│    └─Conv2d: 2-7                       [4, 256, 16, 16]          524,544
│    └─Sigmoid: 2-8                      [4, 256, 16, 16]          --
├─Sequential: 1-2                        [4, 1]                    --
│    └─Linear: 2-9                       [4, 1]                    65,537
│    └─Sigmoid: 2-10                     [4, 1]                   