In [92]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# [VGG](https://arxiv.org/pdf/1409.1556.pdf)

Implement VGG16, for that write specific `nn.Module`, `VGGBlock` implementing block of VGG.

In [93]:
class VGGBlock2(nn.Module):
    def __init__(self, in_channels, out_chanels):
        super(VGGBlock2, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_chanels, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(out_chanels, out_chanels, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation= 1)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        return self.pool(x)

In [94]:
class VGGBlock3(nn.Module):
    def __init__(self, in_channels, out_chanels):
        super(VGGBlock3, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_chanels, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(out_chanels, out_chanels, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(out_chanels, out_chanels, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation= 1)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        return self.pool(x)

In [95]:
class VGG16(nn.Module):
    def __init__(self, in_channels=3):
        super(VGG16, self).__init__()

        self.main_branch = nn.Sequential(
            VGGBlock2(in_channels, 64),
            VGGBlock2(64, 128),
            VGGBlock3(128, 256),
            VGGBlock3(256, 512),
            VGGBlock3(512, 512),
        )

        self.linLayer1 = nn.Linear(25088, 4096)
        self.linLayer2 = nn.Linear(4096, 4096)
        self.linLayer3 = nn.Linear(4096, 1000)
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, x):
        x = self.main_branch(x)
        x = self.dropout(F.relu(self.linLayer1(x)))
        x = self.dropout(F.relu(self.linLayer2(x)))
        x = self.linLayer3(x)
        return F.softmax(x)

In [96]:
model = VGG16()
model

VGG16(
  (main_branch): Sequential(
    (0): VGGBlock2(
      (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (1): VGGBlock2(
      (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (2): VGGBlock3(
      (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (3): VGGBlock3(
      (conv1): Conv2d(25

# [GoogLeNet](https://arxiv.org/pdf/1409.4842.pdf)

## Inception module

Write specific `nn.Module` for Inception module.

In [97]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_chanels, **kwargs):
        super(ConvBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_chanels, **kwargs)
        self.bn = nn.BatchNorm2d(out_chanels)
        
    def forward(self, x):
        return F.relu(self.bn(self.conv(x)))

In [98]:
class InceptionModule(nn.Module):
    def __init__(
        self, 
        in_channels=256, 
        out_1x1=128,
        red_3x3=64,
        out_3x3=192,
        red_5x5=64,
        out_5x5=96,
        out_pool=64,
    ):
        super(InceptionModule, self).__init__()
        self.branch1 = ConvBlock(in_channels, out_1x1, kernel_size=1)
        self.branch2 = nn.Sequential(
            ConvBlock(in_channels, red_3x3, kernel_size=1, padding=0),
            ConvBlock(red_3x3, out_3x3, kernel_size=3, padding=1),
        )
        self.branch3 = nn.Sequential(
            ConvBlock(in_channels, red_5x5, kernel_size=1),
            ConvBlock(red_5x5, out_5x5, kernel_size=5, padding=2),
        )
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, padding=1, stride=1),
            ConvBlock(in_channels, out_pool, kernel_size=1),
        )
    
    def forward(self, x):
        branches = (self.branch1, self.branch2, self.branch3, self.branch4)
        return torch.cat([branch(x) for branch in branches], 1)

In [99]:
model = InceptionModule()
model

InceptionModule(
  (branch1): ConvBlock(
    (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
    (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (branch2): Sequential(
    (0): ConvBlock(
      (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): ConvBlock(
      (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (branch3): Sequential(
    (0): ConvBlock(
      (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): ConvBlock(
      (conv): Conv2d(64, 96, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_

## Stem network

Write down, why do we need a Stem network.

In order to quickly and strongly reduce the spatial dimensions (compress the image before parallel processing) in order to minimize the number of elements in the layers.

# [ResNet](https://arxiv.org/pdf/1512.03385.pdf)

Implement ResNet-18, for that write specific `nn.Module`, `ResNetBlock` implementing block of ResNet.

In [100]:
class ResNetBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride = 1, downsample = None):
        super(ResNetBlock, self).__init__()
        self.conv1 = nn.Sequential(
                        nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1),
                        nn.BatchNorm2d(out_channels),
                        nn.ReLU())
        self.conv2 = nn.Sequential(
                        nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1),
                        nn.BatchNorm2d(out_channels))
        self.downsample = downsample
        
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.conv2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        return F.relu(out)

In [101]:
class ResNet18(nn.Module):
    
    def init(self, in_channels=3, out_channels=1000):
        super(ResNet18, self).init()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
        )
        self.blocks = nn.Sequential(
            ResNetBlock(64, 64, stride=1), 
            ResNetBlock(64, 64),
            ResNetBlock(64, 128, downsample=self.downsampleF(64, 128), stride=2), 
            ResNetBlock(128, 128),
            ResNetBlock(128, 256, downsample=self.downsampleF(128, 256), stride=2), 
            ResNetBlock(256, 256),
            ResNetBlock(256, 512, downsample=self.downsampleF(256, 512), stride=2), 
            ResNetBlock(512, 512),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, out_channels)
        
    def forward(self, x):
        x = self.conv(x)
        x = self.blocks(x)
        x = self.avgpool(x)
        x = x.view(x.shape[0], -1)
        x = self.fc(x)
        return x 
    
    def downsampleF(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1), 
            nn.BatchNorm2d(out_channels)
        )

In [102]:
model = ResNet18()
print(model)

ResNet18()


# [ResNeXt](https://arxiv.org/pdf/1611.05431.pdf)

Write specific `nn.Module`, `ResNeXtBlock` implementing block of ResNeXt.

In [103]:
class ResNeXtBlock(nn.Module):
    def __init__(self, in_channels, stride = 1, downsample = None):
        super(ResNeXtBlock, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels, in_channels//2, kernel_size=1, stride=1),
            nn.BatchNorm2d(in_channels//2),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels//2, in_channels//2, kernel_size=3, stride=1, padding=1, groups=32),
            nn.BatchNorm2d(in_channels//2),
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels//2, in_channels, kernel_size=1, stride=1),
            nn.BatchNorm2d(in_channels),
            nn.ReLU(),
        )
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        return self.conv3(x)

In [104]:
model = ResNeXtBlock(512)
print(model)

ResNeXtBlock(
  (conv1): Sequential(
    (0): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv2): Sequential(
    (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): Sequential(
    (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1))
    (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
)


# [SENet](https://arxiv.org/pdf/1709.01507.pdf)

Write specific `nn.Module`, `SEBlock` implementing block of SENet.

In [105]:
class SEBlock(nn.Module):
    "credits: https://github.com/moskomule/senet.pytorch/blob/master/senet/se_module.py#L4"
    def __init__(self, c, r=16):
        super().__init__()
        self.squeeze = nn.AdaptiveAvgPool2d(1)
        self.excitation = nn.Sequential(
            nn.Linear(c, c // r, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(c // r, c, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        bs, c, _, _ = x.shape
        y = self.squeeze(x).view(bs, c)
        y = self.excitation(y).view(bs, c, 1, 1)
        return x * y.expand_as(x)

In [107]:
model = SEBlock(256)
print(model)

SEBlock(
  (squeeze): AdaptiveAvgPool2d(output_size=1)
  (excitation): Sequential(
    (0): Linear(in_features=256, out_features=16, bias=False)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=16, out_features=256, bias=False)
    (3): Sigmoid()
  )
)


# [Neural Architecture Search](https://arxiv.org/pdf/1611.01578.pdf)

For the neural network of your assignment 2, write down the parametrization of the network you would use for the NAS.

I would parametrize the size of the hidden layers. It doesn't seems to be wery usefull in my implementation, since I don't use convolutions in it (work just with set of numbers)