In [1]:
import math
from inspect import isfunction
from functools import partial

%matplotlib inline
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
#from einops import rearrange

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import numpy as np


In [2]:
#https://github.com/g2archie/UNet-MRI-Reconstruction
#https://amaarora.github.io/2020/09/13/unet.html#understanding-input-and-output-shapes-in-u-net

In [3]:
class Block(nn.Module):
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.conv1 = nn.Conv2d(in_ch, out_ch, 3, padding=1)
        self.relu  = nn.ReLU()
        self.conv2 = nn.Conv2d(out_ch, out_ch, 3, padding=1)
    
    def forward(self, x):
        return self.relu(self.conv2(self.relu(self.conv1(x))))

In [4]:
enc_block = Block(1, 64)
x         = torch.randn(1, 1, 28, 28)
enc_block(x).shape


torch.Size([1, 64, 28, 28])

In [5]:
class Encoder(nn.Module):
    def __init__(self, chs=(1,32,64,128,256)):
        super().__init__()
        self.enc_blocks = nn.ModuleList([Block(chs[i], chs[i+1]) for i in range(len(chs)-1)])
        self.pool       = nn.MaxPool2d(2)
    
    def forward(self, x):
        ftrs = []
        for block in self.enc_blocks:
            x = block(x)
            ftrs.append(x)
            x = self.pool(x)
        return ftrs

In [6]:
chs=(1,32,64,128,256)
nn.ModuleList([Block(chs[i], chs[i+1]) for i in range(len(chs)-1)])

ModuleList(
  (0): Block(
    (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu): ReLU()
    (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (1): Block(
    (conv1): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu): ReLU()
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (2): Block(
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu): ReLU()
    (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (3): Block(
    (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu): ReLU()
    (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
)

In [7]:
encoder = Encoder()
# input image
x    = torch.randn(1, 1, 256, 256)
ftrs = encoder(x)
for ftr in ftrs: print(ftr.shape)

torch.Size([1, 32, 256, 256])
torch.Size([1, 64, 128, 128])
torch.Size([1, 128, 64, 64])
torch.Size([1, 256, 32, 32])


In [8]:
class Decoder(nn.Module):
    def __init__(self, chs=(256, 128, 64, 32)):
        super().__init__()
        self.chs         = chs
        self.upconvs    = nn.ModuleList([nn.ConvTranspose2d(chs[i], chs[i+1], 2, 2) for i in range(len(chs)-1)])
        self.dec_blocks = nn.ModuleList([Block(chs[i], chs[i+1]) for i in range(len(chs)-1)]) 
        
    def forward(self, x, encoder_features):
        for i in range(len(self.chs)-1):
            x        = self.upconvs[i](x)
            enc_ftrs = self.crop(encoder_features[i], x)
            x        = torch.cat([x, enc_ftrs], dim=1)
            x        = self.dec_blocks[i](x)
        return x
    
    def crop(self, enc_ftrs, x):
        _, _, H, W = x.shape
        enc_ftrs   = torchvision.transforms.CenterCrop([H, W])(enc_ftrs)
        return enc_ftrs

In [9]:
chs=(1024, 512, 256, 128, 64)
nn.ModuleList([nn.ConvTranspose2d(chs[i], chs[i+1], 2, 2) for i in range(len(chs)-1)])

ModuleList(
  (0): ConvTranspose2d(1024, 512, kernel_size=(2, 2), stride=(2, 2))
  (1): ConvTranspose2d(512, 256, kernel_size=(2, 2), stride=(2, 2))
  (2): ConvTranspose2d(256, 128, kernel_size=(2, 2), stride=(2, 2))
  (3): ConvTranspose2d(128, 64, kernel_size=(2, 2), stride=(2, 2))
)

In [10]:
decoder = Decoder()
x = torch.randn(1, 256, 28, 28)
decoder(x, ftrs[::-1][1:]).shape

torch.Size([1, 32, 224, 224])

In [11]:
class UNet(nn.Module):
  def __init__(self, enc_chs=(1,32,64,128,256), dec_chs=(256, 128, 64, 32), num_class=1, retain_dim=False, out_sz=(572,572)):
      super().__init__()
      self.encoder     = Encoder(enc_chs)
      self.decoder     = Decoder(dec_chs)
      self.head        = nn.Conv2d(dec_chs[-1], num_class, 1)
      self.retain_dim  = retain_dim

  def forward(self, x):
      enc_ftrs = self.encoder(x)
      out      = self.decoder(enc_ftrs[::-1][0], enc_ftrs[::-1][1:])
      out      = self.head(out)
      if self.retain_dim:
          out = F.interpolate(out, out_sz)
      return out

In [12]:
pip install torchsummary

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [13]:
import torchsummary
from torchsummary import summary
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # PyTorch v0.4.0
model = UNet().to(device)

summary(model, (1, 128, 128))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 128, 128]             320
              ReLU-2         [-1, 32, 128, 128]               0
            Conv2d-3         [-1, 32, 128, 128]           9,248
              ReLU-4         [-1, 32, 128, 128]               0
             Block-5         [-1, 32, 128, 128]               0
         MaxPool2d-6           [-1, 32, 64, 64]               0
            Conv2d-7           [-1, 64, 64, 64]          18,496
              ReLU-8           [-1, 64, 64, 64]               0
            Conv2d-9           [-1, 64, 64, 64]          36,928
             ReLU-10           [-1, 64, 64, 64]               0
            Block-11           [-1, 64, 64, 64]               0
        MaxPool2d-12           [-1, 64, 32, 32]               0
           Conv2d-13          [-1, 128, 32, 32]          73,856
             ReLU-14          [-1, 128,

In [14]:
unet = UNet()
x    = torch.randn(1, 1, 256, 256)
unet(x).shape

torch.Size([1, 1, 256, 256])