In [22]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
from typing import Iterable
import sys

In [9]:
def pytorch_init():
    device_id = 1
    torch.cuda.set_device(device_id)
    
    # Sanity checks
    assert torch.cuda.current_device() == 1, 'Using wrong GPU'
    assert torch.cuda.device_count() == 2, 'Cannot find both GPUs'
    assert torch.cuda.get_device_name(0) == 'GeForce RTX 2080 Ti', 'Wrong GPU name'
    assert torch.cuda.is_available() == True, 'GPU not available'
    return torch.device('cuda', device_id)
    
device = pytorch_init()
device

device(type='cuda', index=1)

# Main building blocks of PyTorch
- Module
- Sequential
- ModuleList

# Module 
The Module is the main building block, it defines the base class for all neural networks and you **MUST** subclass it. Below we make a simple CNN classifier with an encoding part that uses two layers with $3 \times 3$ convs + batchnorm + ReLU.

In [10]:
class MyCNNClassifier(nn.Module):
    def __init__(self, in_c: int, n_classes: int):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=in_c, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        
        self.fc1 = nn.Linear(in_features=32*28*28, out_features=1024)
        self.fc2 = nn.Linear(in_features=1024, out_features=n_classes)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)
        
        x = x.view(x.size(0), -1) # Flatten, .size(0) is same as .shape[0]
        
        x = self.fc1(x)
        x = F.sigmoid(x)
        x = self.fc2(x)
        return x

In [11]:
model = MyCNNClassifier(1, 10)
print(model)

MyCNNClassifier(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=25088, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=10, bias=True)
)


### Problem is that it is not that scalable
If we want to add a layer we have to declare and initialize another layer, then code what to do with it in the forward method. Also if we have some common block that we want to use in another model, we have to write it all over again.

# Sequential
Sequential is a container of Moules that can be stacked togeter and run at the same time. It very similar to Keras' sequential API. It can often yield cleaner code if used correctly

In [12]:
class MyCNNClassifier(nn.Module):
    def __init__(self, in_c: int, n_classes: int):
        super().__init__()
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(in_c, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLu()
        )
        
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLu()
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(32 * 28 * 28, 1024),
            nn.Sigmoid(),
            nn.Linear(1024, n_classed)
        )
        
    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)

        x = x.view(x.size(0), -1) # flatten
        
        x = self.decoder(x)
        
        return x

## Pythonic simplification
We can see that some come above is quite repetetive (conv_block). We can further generalize the such that we get even more scalable code.

In [15]:
def conv_block(in_f: int, out_f: int, *args, **kwargs):
    return nn.Sequential(
        nn.Conv2d(in_f, out_f, *args, **kwargs),
        nn.BatchNorm2d(in_f, out_f),
        nn.ReLU()
    )

class MyCNNClassifier(nn.Module):
    def __init__(self, in_c: int, n_classes: int):
        super().__init__()
        self.conv_block1 = conv_block(in_c, 32, kernel_size=3, padding=1)
        self.conv_block2 = conv_block(32, 64, kernel_size=3, padding=1)
        
        self.decoder = nn.Sequential(
            nn.Linear(32 * 28 * 28, 1024),
            nn.Sigmoid(),
            nn.Linear(1024, n_classes)
        )
        
    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)

        x = x.view(x.size(0), -1) # flatten
        
        x = self.decoder(x)
        
        return x

In [16]:
model = MyCNNClassifier(1, 10)
print(model)

MyCNNClassifier(
  (conv_block1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(1, eps=32, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (conv_block2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=64, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=25088, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)


## Even further Pythonic simplification!
Recall that all neural network stuff subclasses nn.Module (meaning nn.Sequential is a type of nn.Module), we can use nn.Sequential inside nn.Sequential!

In [17]:
def conv_block(in_f: int, out_f: int, *args, **kwargs):
    return nn.Sequential(
        nn.Conv2d(in_f, out_f, *args, **kwargs),
        nn.BatchNorm2d(in_f, out_f),
        nn.ReLU()
    )

class MyCNNClassifier(nn.Module):
    def __init__(self, in_c: int, n_classes: int):
        super().__init__()
        
        self.encoder = nn.Sequential(
            conv_block(in_c, 32, kernel_size=3, padding=1),
            conv_block(32, 64, kernel_size=3, padding=1)
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(32 * 28 * 28, 1024),
            nn.Sigmoid(),
            nn.Linear(1024, n_classes)
        )
        
    def forward(self, x):
        x = self.decoder(x)

        x = x.view(x.size(0), -1) # flatten
        
        x = self.decoder(x)
        
        return x

In [18]:
model = MyCNNClassifier(1, 10)
print(model)

MyCNNClassifier(
  (encoder): Sequential(
    (0): Sequential(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(1, eps=32, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(32, eps=64, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (decoder): Sequential(
    (0): Linear(in_features=25088, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)


## Dynamic layers!
Since everything is so Pythonic, you can very easily do Python tricks to make things more dynamic and scalable. We can simply pass the desired size of the network with a constructor parameter 8=====D

In [32]:
def conv_block(in_f: int, out_f: int, *args, **kwargs):
    return nn.Sequential(
        nn.Conv2d(in_f, out_f, *args, **kwargs),
        nn.BatchNorm2d(in_f, out_f),
        nn.ReLU()
    )

class MyCNNClassifier(nn.Module):
    def __init__(self, in_c: int, enc_sizes: Iterable[int], n_classes: int):
        super().__init__()
        
        self.enc_sizes = [in_c, *enc_sizes]
        
        conv_blocks = [conv_block(in_f, out_f, kernel_size=3, padding=1) 
                       for in_f, out_f in zip(self.enc_sizes, self.enc_sizes[1:])]
        
        self.encoder = nn.Sequential(*conv_blocks)
        
        
        self.decoder = nn.Sequential(
            nn.Linear(32 * 28 * 28, 1024),
            nn.Sigmoid(),
            nn.Linear(1024, n_classes)
        )
        
    def forward(self, x):
        x = self.decoder(x)
        x = x.view(x.size(0), -1) # flatten
        x = self.decoder(x)
        
        return x

In [33]:
model = MyCNNClassifier(1, [32, 64, 128], 10)
print(model)

MyCNNClassifier(
  (encoder): Sequential(
    (0): Sequential(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(1, eps=32, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(32, eps=64, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (2): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=128, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (decoder): Sequential(
    (0): Linear(in_features=25088, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)


## Refactoring and generalizing decoder
At this point we have worked the encoder part, the idea works for the decoder part as well!

In [47]:
def conv_block(in_f: int, out_f: int, *args, **kwargs):
    return nn.Sequential(
        nn.Conv2d(in_f, out_f, *args, **kwargs),
        nn.BatchNorm2d(in_f, out_f),
        nn.ReLU()
    )


def dec_block(in_f: int, out_f: int):
    return nn.Sequential(
        nn.Linear(in_f, out_f),
        nn.Sigmoid()
    )


class MyCNNClassifier(nn.Module):
    def __init__(self, in_c: int, enc_sizes: Iterable[int], dec_sizes: Iterable[int],
                 n_classes: int):
        super().__init__()
        
        self.enc_sizes = [in_c, *enc_sizes]
        self.dec_sizes = [32 * 28 * 28, *dec_sizes]
        
        conv_blocks = [conv_block(in_f, out_f, kernel_size=3, padding=1) 
                       for in_f, out_f in zip(self.enc_sizes, self.enc_sizes[1:])]
        
        self.encoder = nn.Sequential(*conv_blocks)
        
        
        dec_blocks = [dec_block(in_f, out_f) 
                      for in_f, out_f in zip(self.dec_sizes, self.dec_sizes[1:])]
        self.decoder = nn.Sequential(*dec_blocks)
        
    def forward(self, x):
        x = self.decoder(x)
        x = x.view(x.size(0), -1) # flatten
        x = self.decoder(x)
        
        return x

In [48]:
model = MyCNNClassifier(1, [32, 64], [1024, 512], 10)
print(model)

MyCNNClassifier(
  (encoder): Sequential(
    (0): Sequential(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(1, eps=32, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(32, eps=64, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (decoder): Sequential(
    (0): Sequential(
      (0): Linear(in_features=25088, out_features=1024, bias=True)
      (1): Sigmoid()
    )
    (1): Sequential(
      (0): Linear(in_features=1024, out_features=512, bias=True)
      (1): Sigmoid()
    )
  )
)


## Just make the encoder and decoder their of nn.Modules!

In [64]:
def conv_block(in_f: int, out_f: int, *args, **kwargs):
    return nn.Sequential(
        nn.Conv2d(in_f, out_f, *args, **kwargs),
        nn.BatchNorm2d(in_f, out_f),
        nn.ReLU()
    )


def dec_block(in_f: int, out_f: int):
    return nn.Sequential(
        nn.Linear(in_f, out_f),
        nn.Sigmoid()
    )
    

class MyDecoder(nn.Module):
    def __init__(self, dec_sizes: Iterable[int], n_classes: int):
        super().__init__()
        self.dec_blocks = nn.Sequential(*([
            dec_block(in_f, out_f) for in_f, out_f in zip(dec_sizes, dec_sizes[1:])
        ] + [nn.Linear(dec_sizes[-1], n_classes)]))

    def forward(self, x):
        return self.dec_blocks(x)

    
class MyEncoder(nn.Module):
    def __init__(self, enc_sizes: Iterable[int]):
        super().__init__()
        self.conv_blocks = nn.Sequential(*[
            conv_block(in_f, out_f, kernel_size=3, padding=1) 
            for in_f, out_f in zip(enc_sizes, enc_sizes[1:])
        ])

    def forward(self, x):
        return self.conv_blocks(x)
    

class MyCNNClassifier(nn.Module):
    def __init__(self, in_c: int, enc_sizes: Iterable[int], dec_sizes: Iterable[int],
                 n_classes: int):
        super().__init__()
        
        self.enc_sizes = [in_c, *enc_sizes]
        self.dec_sizes = [32 * 28 * 28, *dec_sizes]
        
        
        self.encoder = MyEncoder(self.enc_sizes)
        self.decoder = MyDecoder(self.dec_sizes, n_classes)
        
    def forward(self, x):
        x = self.decoder(x)
        x = x.view(x.size(0), -1) # flatten
        x = self.decoder(x)
        
        return x

In [65]:
model = MyCNNClassifier(1, [32, 64], [1024, 512], 10)
print(model)

MyCNNClassifier(
  (encoder): MyEncoder(
    (conv_blocks): Sequential(
      (0): Sequential(
        (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(1, eps=32, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
      )
      (1): Sequential(
        (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(32, eps=64, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
      )
    )
  )
  (decoder): MyDecoder(
    (dec_blocks): Sequential(
      (0): Sequential(
        (0): Linear(in_features=25088, out_features=1024, bias=True)
        (1): Sigmoid()
      )
      (1): Sequential(
        (0): Linear(in_features=1024, out_features=512, bias=True)
        (1): Sigmoid()
      )
      (2): Linear(in_features=512, out_features=10, bias=True)
    )
  )
)


# ModuleList: when we need to iterate
ModuleList allows you to store Module as a list. It can be useful when you need to iterate through layers and store/use some informations, like in U-Net.

The main difference between Sequential is that ModuleList have NOT have forward method, so the inner layers are not connected. Assuming we need each output of each layer i nthe decoder we can store it by:

In [76]:
class MyModule(nn.Module):
    def __init__(self, sizes):
        super().__init__()
        self.layers = nn.ModuleList([
            nn.Linear(in_f, out_f) for in_f, out_f in zip(sizes, sizes[1:])
        ])
        self.trace = []
    
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
            self.trace.append(x)
        return x

In [77]:
model = MyModule([1,16,32])
print(model)
model(torch.rand((4,1)))
[print(trace.shape) for trace in model.trace]

MyModule(
  (layers): ModuleList(
    (0): Linear(in_features=1, out_features=16, bias=True)
    (1): Linear(in_features=16, out_features=32, bias=True)
  )
)
torch.Size([4, 16])
torch.Size([4, 32])


[None, None]

# ModuleDict: when we need to choose
What if we want to switch to LeakyReLU in our conv_block? We can use ModuleDict to create a dictionary of Module and dynamically switch Module when we want

In [82]:
def conv_block(in_f: int, out_f: int, activation: str='relu', *args, **kwargs):
    
    activations = nn.ModuleDict([
                ['lrelu', nn.LeakyReLU()],
                ['relu', nn.ReLU()]
    ])
    
    return nn.Sequential(
        nn.Conv2d(in_f, out_f, *args, **kwargs),
        nn.BatchNorm2d(out_f),
        activations[activation]
    )

In [83]:
print(conv_block(1, 32, 'lrelu', kernel_size=3, padding=1))
print(conv_block(1, 32, 'relu', kernel_size=3, padding=1))

Sequential(
  (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): LeakyReLU(negative_slope=0.01)
)
Sequential(
  (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
)


# Final implementation

In [91]:
def conv_block(in_f: int, out_f: int, activation: str='relu', *args, **kwargs):
    
    activations = nn.ModuleDict([
                ['lrelu', nn.LeakyReLU()],
                ['relu', nn.ReLU()]
    ])
    
    return nn.Sequential(
        nn.Conv2d(in_f, out_f, *args, **kwargs),
        nn.BatchNorm2d(out_f),
        activations[activation]
    )


def dec_block(in_f: int, out_f: int):
    return nn.Sequential(
        nn.Linear(in_f, out_f),
        nn.Sigmoid()
    )

    
class MyEncoder(nn.Module):
    def __init__(self, enc_sizes: Iterable[int], activation: str):
        super().__init__()
        self.conv_blocks = nn.Sequential(*[
            conv_block(in_f, out_f, kernel_size=3, padding=1, activation=activation) 
            for in_f, out_f in zip(enc_sizes, enc_sizes[1:])
        ])

    def forward(self, x):
        return self.conv_blocks(x)
    
    
class MyDecoder(nn.Module):
    def __init__(self, dec_sizes: Iterable[int], n_classes: int):
        super().__init__()
        self.dec_blocks = nn.Sequential(*([
            dec_block(in_f, out_f) for in_f, out_f in zip(dec_sizes, dec_sizes[1:])
            ] + [nn.Linear(dec_sizes[-1], n_classes)]))

    def forward(self, x):
        return self.dec_blocks(x)
    
    
class MyCNNClassifier(nn.Module):
    def __init__(self, in_c: int, enc_sizes: Iterable[int], dec_sizes: Iterable[int],
                 n_classes: int, activation: str):
        super().__init__()
        
        self.enc_sizes = [in_c, *enc_sizes]
        self.dec_sizes = [32 * 28 * 28, *dec_sizes]
        
        self.encoder = MyEncoder(self.enc_sizes, activation=activation)
        self.decoder = MyDecoder(self.dec_sizes, n_classes)
        
    def forward(self, x):
        x = self.decoder(x)
        x = x.view(x.size(0), -1) # flatten
        x = self.decoder(x)
        
        return x

In [92]:
model = MyCNNClassifier(3, [32,64], [1024, 512], 10, activation='lrelu')
print(model)

MyCNNClassifier(
  (encoder): MyEncoder(
    (conv_blocks): Sequential(
      (0): Sequential(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.01)
      )
      (1): Sequential(
        (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.01)
      )
    )
  )
  (decoder): MyDecoder(
    (dec_blocks): Sequential(
      (0): Sequential(
        (0): Linear(in_features=25088, out_features=1024, bias=True)
        (1): Sigmoid()
      )
      (1): Sequential(
        (0): Linear(in_features=1024, out_features=512, bias=True)
        (1): Sigmoid()
      )
      (2): Linear(in_features=512, out_features=10, bias=True)
    )
  )
)
