In [2]:
import torch
from torch import nn

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
class Block(nn.Module):
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.conv1 = nn.Conv3d(in_ch, out_ch, 2, padding=1)
        self.relu  = nn.ReLU()
        self.conv2 = nn.Conv3d(out_ch, out_ch, 2, padding=1)
    
    def forward(self, x):
        return self.conv2(self.relu(self.conv1(x)))


class Encoder(nn.Module):
    def __init__(self, chs=(1,64,128, 256, 512)):
        super().__init__()
        self.enc_blocks = nn.ModuleList([Block(chs[i], chs[i+1]) for i in range(len(chs)-1)])
        self.pool       = nn.MaxPool3d(2)
    
    def forward(self, x):
        ftrs = []
        for block in self.enc_blocks:
            x = block(x)
            ftrs.append(x)
            x = self.pool(x)
        return ftrs


class Decoder(nn.Module):
    def __init__(self, chs=(512, 256, 128, 64)):
        super().__init__()
        self.chs         = chs
        self.upconvs    = nn.ModuleList([nn.ConvTranspose3d(chs[i], chs[i+1], 2, 1) for i in range(len(chs)-1)])
        self.dec_blocks = nn.ModuleList([Block(chs[i], chs[i+1]) for i in range(len(chs)-1)]) 
        
    def forward(self, x, encoder_features):
        for i in range(len(self.chs)-1):
            x        = self.upconvs[i](x)
            enc_ftrs = self.crop(encoder_features[i], x)
            x        = torch.cat([x, enc_ftrs], dim=1)
            x        = self.dec_blocks[i](x)
        return x

    def crop(self, tensor, output_size):
        left_1 = (tensor.shape[-1] - output_size.shape[-1]) // 2
        left_2 = (tensor.shape[-2] - output_size.shape[-2]) // 2
        left_3 = (tensor.shape[-3] - output_size.shape[-3]) // 2
        
        
        left_1 = left_1 if left_1 > 0 else 0
        left_2 = left_2 if left_2 > 0 else 0
        left_3 = left_3 if left_3 > 0 else 0
        
        right_1 = left_1 + output_size.shape[-1]
        right_2 = left_2 + output_size.shape[-2]
        right_3 = left_3 + output_size.shape[-3]
        
        return tensor[..., left_1:right_1, left_2:right_2, left_3:right_3]


class Classification_head(nn.Module):
    def __init__(self, n_classes):
        super().__init__()
        self.b_net = nn.Sequential(
            nn.Conv3d(512, 256, kernel_size = (2,2,2), stride = (1,1,1)),
            nn.ReLU(),
            nn.Conv3d(256, 64, kernel_size = (1,1,1), stride = (1,1,1)),
            nn.ReLU(),
            )
        
        self.o_net = nn.Sequential(
            nn.Conv3d(64, 64, kernel_size = (4,4,4), stride = (2,2,2)),
            nn.ReLU(),
            nn.Conv3d(64, 64, kernel_size = (3,3,3), stride = (1,1,1)),
            nn.ReLU(),
            )

        self.classifier = nn.Sequential(
            nn.Conv3d(128, 128, kernel_size = (2,2,2), stride = (2,2,2)),
            nn.ReLU(),
            
            nn.Conv3d(128, 64, kernel_size = (2,2,2), stride = (1,1,1)),
            nn.ReLU(),
            
            nn.Conv3d(64, 64, kernel_size = (1,1,1), stride = (1,1,1)),
            nn.ReLU(),
            
            nn.Conv3d(64, 64, kernel_size = (1,1,1), stride = (1,1,1)),
            nn.ReLU(),
        
            nn.Conv3d(64, n_classes, kernel_size = (1,1,1), stride = (1,1,1)),
            nn.Sigmoid(),
            nn.Flatten()
        )
        
    def forward(self, x, enc_ftrs):
        enc_ftrs = self.b_net(enc_ftrs)
        x = self.o_net(x)
        x = torch.cat([x, enc_ftrs], dim=1)
        x = self.classifier(x)

        return x


class UNet(nn.Module):
    def __init__(self, enc_chs=(1,64,128, 256, 512,), dec_chs=(512, 256, 128, 64), n_classes=3):
        super().__init__()
        self.encoder     = Encoder(enc_chs)
        self.decoder     = Decoder(dec_chs)
        self.head        = Classification_head(n_classes)

    def forward(self, x):
        enc_ftrs = self.encoder(x)
        out      = self.decoder(enc_ftrs[::-1][0], enc_ftrs[::-1][1:])
        out      = self.head(out, enc_ftrs[-1])
        return out

In [32]:
model = UNet(n_classes=3)
x = torch.randn(1,1,16,16,16)
out = model(x)
out

tensor([[0.4846, 0.5079, 0.5082]], grad_fn=<ViewBackward0>)

In [33]:
from torchsummary import summary
summary(model, torch.randn([1,16,16,16]).shape,device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv3d-1       [-1, 64, 17, 17, 17]             576
              ReLU-2       [-1, 64, 17, 17, 17]               0
            Conv3d-3       [-1, 64, 18, 18, 18]          32,832
             Block-4       [-1, 64, 18, 18, 18]               0
         MaxPool3d-5          [-1, 64, 9, 9, 9]               0
            Conv3d-6      [-1, 128, 10, 10, 10]          65,664
              ReLU-7      [-1, 128, 10, 10, 10]               0
            Conv3d-8      [-1, 128, 11, 11, 11]         131,200
             Block-9      [-1, 128, 11, 11, 11]               0
        MaxPool3d-10         [-1, 128, 5, 5, 5]               0
           Conv3d-11         [-1, 256, 6, 6, 6]         262,400
             ReLU-12         [-1, 256, 6, 6, 6]               0
           Conv3d-13         [-1, 256, 7, 7, 7]         524,544
            Block-14         [-1, 256, 

In [34]:
model.to(device)

UNet(
  (encoder): Encoder(
    (enc_blocks): ModuleList(
      (0): Block(
        (conv1): Conv3d(1, 64, kernel_size=(2, 2, 2), stride=(1, 1, 1), padding=(1, 1, 1))
        (relu): ReLU()
        (conv2): Conv3d(64, 64, kernel_size=(2, 2, 2), stride=(1, 1, 1), padding=(1, 1, 1))
      )
      (1): Block(
        (conv1): Conv3d(64, 128, kernel_size=(2, 2, 2), stride=(1, 1, 1), padding=(1, 1, 1))
        (relu): ReLU()
        (conv2): Conv3d(128, 128, kernel_size=(2, 2, 2), stride=(1, 1, 1), padding=(1, 1, 1))
      )
      (2): Block(
        (conv1): Conv3d(128, 256, kernel_size=(2, 2, 2), stride=(1, 1, 1), padding=(1, 1, 1))
        (relu): ReLU()
        (conv2): Conv3d(256, 256, kernel_size=(2, 2, 2), stride=(1, 1, 1), padding=(1, 1, 1))
      )
      (3): Block(
        (conv1): Conv3d(256, 512, kernel_size=(2, 2, 2), stride=(1, 1, 1), padding=(1, 1, 1))
        (relu): ReLU()
        (conv2): Conv3d(512, 512, kernel_size=(2, 2, 2), stride=(1, 1, 1), padding=(1, 1, 1))
      )


In [35]:
x = torch.randn(128, 1, 16,16,16).detach()
out = model(x.to(device))

In [36]:
out

tensor([[0.4846, 0.5079, 0.5082],
        [0.4846, 0.5079, 0.5083],
        [0.4846, 0.5080, 0.5083],
        [0.4846, 0.5080, 0.5082],
        [0.4846, 0.5079, 0.5083],
        [0.4846, 0.5079, 0.5083],
        [0.4846, 0.5080, 0.5083],
        [0.4846, 0.5080, 0.5083],
        [0.4846, 0.5079, 0.5083],
        [0.4846, 0.5079, 0.5082],
        [0.4846, 0.5079, 0.5083],
        [0.4846, 0.5079, 0.5082],
        [0.4846, 0.5080, 0.5083],
        [0.4846, 0.5079, 0.5083],
        [0.4846, 0.5079, 0.5083],
        [0.4846, 0.5079, 0.5083],
        [0.4846, 0.5080, 0.5083],
        [0.4846, 0.5079, 0.5083],
        [0.4846, 0.5080, 0.5083],
        [0.4846, 0.5079, 0.5082],
        [0.4846, 0.5080, 0.5083],
        [0.4846, 0.5079, 0.5083],
        [0.4846, 0.5079, 0.5083],
        [0.4846, 0.5080, 0.5083],
        [0.4846, 0.5080, 0.5083],
        [0.4846, 0.5080, 0.5083],
        [0.4846, 0.5079, 0.5083],
        [0.4846, 0.5080, 0.5083],
        [0.4846, 0.5079, 0.5082],
        [0.484

# original U-Net

In [99]:
# Original U-Net

class Block(nn.Module):
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.conv1 = nn.Conv2d(in_ch, out_ch, 3)
        self.relu  = nn.ReLU()
        self.conv2 = nn.Conv2d(out_ch, out_ch, 3)
    
    def forward(self, x):
        return self.conv2(self.relu(self.conv1(x)))


class Encoder(nn.Module):
    def __init__(self, chs=(3,64,128,256,512,1024)):
        super().__init__()
        self.enc_blocks = nn.ModuleList([Block(chs[i], chs[i+1]) for i in range(len(chs)-1)])
        self.pool       = nn.MaxPool2d(2)
    
    def forward(self, x):
        ftrs = []
        for block in self.enc_blocks:
            x = block(x)
            ftrs.append(x)
            x = self.pool(x)
        return ftrs


class Decoder(nn.Module):
    def __init__(self, chs=(1024, 512, 256, 128, 64)):
        super().__init__()
        self.chs         = chs
        self.upconvs    = nn.ModuleList([nn.ConvTranspose2d(chs[i], chs[i+1], 2, 2) for i in range(len(chs)-1)])
        self.dec_blocks = nn.ModuleList([Block(chs[i], chs[i+1]) for i in range(len(chs)-1)]) 
        
    def forward(self, x, encoder_features):
        for i in range(len(self.chs)-1):
            x        = self.upconvs[i](x)            
            enc_ftrs = self.crop(encoder_features[i], x)
            x        = torch.cat([x, enc_ftrs], dim=1)
            x        = self.dec_blocks[i](x)
        return x
    
    def crop(self, enc_ftrs, x):
        _, _, H, W = x.shape
        enc_ftrs   = torchvision.transforms.CenterCrop([H, W])(enc_ftrs)
        return enc_ftrs


class UNet(nn.Module):
    def __init__(self, enc_chs=(3,64,128,256,512,1024), dec_chs=(1024, 512, 256, 128, 64), num_class=1, retain_dim=False, out_sz=(572,572)):
        super().__init__()
        self.encoder     = Encoder(enc_chs)
        self.decoder     = Decoder(dec_chs)
        self.head        = nn.Conv2d(dec_chs[-1], num_class, 1)
        self.retain_dim  = retain_dim

    def forward(self, x):
        enc_ftrs = self.encoder(x)
        out      = self.decoder(enc_ftrs[::-1][0], enc_ftrs[::-1][1:])
        out      = self.head(out)
        if self.retain_dim:
            out = F.interpolate(out, out_sz)
        return out

In [100]:
enc_block = Block(1, 64)
x         = torch.randn(1, 1, 572, 572)
enc_block(x).shape

torch.Size([1, 64, 568, 568])

In [101]:
encoder = Encoder()
# input image
x    = torch.randn(1, 3, 572, 572)
ftrs = encoder(x)
for ftr in ftrs: print(ftr.shape)

torch.Size([1, 64, 568, 568])
torch.Size([1, 128, 280, 280])
torch.Size([1, 256, 136, 136])
torch.Size([1, 512, 64, 64])
torch.Size([1, 1024, 28, 28])


In [102]:
decoder = Decoder()
x = torch.randn(1, 1024, 28, 28)
decoder(x, ftrs[::-1][1:]).shape

torch.Size([1, 64, 388, 388])

In [103]:
unet = UNet()
x    = torch.randn(1, 3, 572, 572)
unet(x).shape

torch.Size([1, 1, 388, 388])

In [104]:
summary(unet, torch.randn([3, 572, 572]).shape,device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 570, 570]           1,792
              ReLU-2         [-1, 64, 570, 570]               0
            Conv2d-3         [-1, 64, 568, 568]          36,928
             Block-4         [-1, 64, 568, 568]               0
         MaxPool2d-5         [-1, 64, 284, 284]               0
            Conv2d-6        [-1, 128, 282, 282]          73,856
              ReLU-7        [-1, 128, 282, 282]               0
            Conv2d-8        [-1, 128, 280, 280]         147,584
             Block-9        [-1, 128, 280, 280]               0
        MaxPool2d-10        [-1, 128, 140, 140]               0
           Conv2d-11        [-1, 256, 138, 138]         295,168
             ReLU-12        [-1, 256, 138, 138]               0
           Conv2d-13        [-1, 256, 136, 136]         590,080
            Block-14        [-1, 256, 1