<a href="https://colab.research.google.com/github/IANGECHUKI176/deeplearning/blob/main/pytorch/convnets/DenseNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn

DenseNet [paper](https://arxiv.org/pdf/1608.06993.pdf)

# Part 1: This first part I used [lightning docs](https://lightning.ai/docs/pytorch/stable/notebooks/course_UvA-DL/04-inception-resnet-densenet.html) to code the DenseNet

In [None]:
class DenseLayer(nn.Module):
    def __init__(self,c_in,bn_size,growth_rate,act_fn):
        """
            Inputs:
                c_in - no of input channels
                bn_size - Bottleneck_size (factor of growth size) for output of the 1x1 .Typically between 2 and 4
                growth_rate - No of output channels for the 3x3 convolution
                act_fn - activation class constructor (nn.ReLU)
        """
        super().__init__()
        self.net = nn.Sequential(
            nn.BatchNorm2d(c_in),
            act_fn(),
            nn.Conv2d(c_in,bn_size * growth_rate,kernel_size = 1,bias = False),
            nn.BatchNorm2d(bn_size*growth_rate),
            act_fn(),
            nn.Conv2d(bn_size*growth_rate,growth_rate,kernel_size = 3,padding = 1,bias = False)
        )
    def forward(self,x):
        out = self.net(x)
        out = torch.cat([out,x],dim = 1)
        return out

In [None]:
class DenseBlock(nn.Module):
    def __init__(self,c_in,num_layers,bn_size,growth_rate,act_fn):
        """
        Inputs:
            c_in - no of in_channels
            num_layers - no of dense layers to apply in block
            bn_size - Bottleneck size to use in the dense layers
            growth_rate - growth rate to use in the dense layers
            act_fn - activation func to use in the dense layers
        """
        super().__init__()
        layers = []
        for layer_idx in range(num_layers):
            layer_c_in = c_in + layer_idx * growth_rate
            layers.append(DenseLayer(layer_c_in,bn_size,growth_rate,act_fn))
        self.block = nn.Sequential(*layers)
    def forward(self,x):
        out = self.block(x)
        return out

In [None]:
# blk = DenseBlock(32,6,2,16,nn.ReLU)
# blk

Skip connections enforces dimensions to match, they overcome this by linearly projecting the features
if the dimensions do not match

In [None]:
class TransitionLayer(nn.Module):
    def __init__(self,c_in,c_out,act_fn):
        super().__init__()
        self.transition = nn.Sequential(
            nn.BatchNorm2d(c_in),
            act_fn(),
            nn.Conv2d(c_in,c_out,kernel_size = 1,bias = False),
            nn.AvgPool2d(kernel_size = 2,stride = 2)
        )
    def forward(self,x):
        return self.transition(x)

In [None]:
class DenseNetLightning(nn.Module):
    def __init__(self,num_classes = 10,num_layers = [6,6,6,6],bn_size = 2, growth_rate = 16,act_fn = nn.ReLU):
        super().__init__()
        self._init_parameters()

        c_hidden = bn_size * growth_rate
        self.input_net = nn.Sequential(
            nn.Conv2d(3,out_channels = c_hidden,kernel_size =3,padding = 1)
        )
        blocks = []
        for blk_index,no_layers in enumerate(num_layers):
            blocks.append(DenseBlock(c_in =c_hidden,num_layers = no_layers,bn_size = bn_size,growth_rate = growth_rate,act_fn = act_fn))
            c_hidden = c_hidden + no_layers * growth_rate
            if blk_index < len(num_layers) - 1: # Don't apply transition layer on last block
                blocks.append(TransitionLayer(c_in = c_hidden,c_out = c_hidden // 2,act_fn = act_fn))
                c_hidden = c_hidden // 2

        self.blocks = nn.Sequential(*blocks)
        self.output_net = nn.Sequential(
            nn.BatchNorm2d(c_hidden),
            act_fn(),
            nn.AdaptiveAvgPool2d((1,1)),
            nn.Flatten(),
            nn.Linear(c_hidden,num_classes)
        )

    def _init_parameters(self):
        for m in self.modules():
            if isinstance(m,nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,nonlinearity='relu')
            elif isinstance(m,nn.BatchNorm2d):
                nn.init.constant_(m.weight,1)
                nn.init.constant_(m.bias,0)
    def forward(self,x):
        out = self.input_net(x)
        out = self.blocks(out)
        out = self.output_net(out)
        return out

In [None]:
blk0 = DenseNetLightning()

In [None]:
# from torchsummary import summary
# summary(blk0,(3,224,224))

# Part 2: used this [github page](https://github.com/Mayurji/Image-Classification-PyTorch/blob/main/DenseNet.py).This is the same thing  as part but in a slightly different way

> In ResNet, we see how the skip connection added as identity function from the inputs
to interact with the Conv layers. But in DenseNet, we see instead of adding skip
connection to Conv layers, we can append or concat the output of identity function
with output of Conv layers.

> In ResNet, it is little tedious to make the dimensions to match for adding the skip
connection and Conv Layers, but it is much simpler in DenseNet, as we concat the
both the X and Conv's output.

> The key idea or the reason its called DenseNet is because the next layers not only get
the input from previous layer but also preceeding layers before the previous layer. So
the next layer becomes dense as it loaded with output from previous layers.

>Two blocks comprise DenseNet, one is DenseBlock for concat operation and other is
transition layer for controlling channels meaning dimensions (recall 1x1 Conv).

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

In [None]:
class Bottleneck(nn.Module):
    def __init__(self,in_channels,growth_rate):
        super(Bottleneck,self).__init__()
        intermediate_channels = 4 * growth_rate # 4- bottleneck_size
        self.bn1 = nn.BatchNorm2d(in_channels)
        self.conv1 = nn.Conv2d(in_channels,intermediate_channels,kernel_size=1,bias = False)
        self.bn2 = nn.BatchNorm2d(intermediate_channels)
        self.conv2 = nn.Conv2d(intermediate_channels,growth_rate,kernel_size = 3,padding = 1,bias = False)
    def forward(self,x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = self.conv2(F.relu(self.bn2(out)))
        out = torch.cat((x,out),1)
        return out

In [None]:
class SingleLayer(nn.Module):
    def __init__(self,in_channels,growth_rate):
        super(SingleLayer,self).__init__()
        self.bn1 = nn.BatchNorm2d(in_channels)
        self.conv1 = nn.Conv2d(in_channels,growth_rate,kernel_size = 3,padding = 1,bias = False)
    def forward(self,x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = torch.cat((x,out),1)
        return out

for part 2 the transition layer is named:`Transition` but they are doing the same thing

In [None]:
class Transition(nn.Module):
    def __init__(self,in_channels,out_channels):
        super(Transition,self).__init__()
        self.bn1 = nn.BatchNorm2d(in_channels)
        self.conv1 = nn.Conv2d(in_channels,out_channels,kernel_size = 1,bias = False)
    def forward(self,x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = F.avg_pool2d(out,2)
        return out

In [None]:
class DenseNet(nn.Module):
    """
    nblocks - nDenseblocks here they are predefined,you can use the formular:
                                                                            (depth-4) // 3
                                                                            the pass depht as a paramter
    """
    def __init__(self,block,nblocks,growth_rate = 12,reduction = 0.5,num_classes = 10):
        super(DenseNet,self).__init__()
        self.growth_rate = growth_rate
        num_planes = 2*growth_rate

        self.conv1 = nn.Conv2d(3,num_planes,kernel_size = 3,padding =1 ,bias = False)

        self.dense1= self._make_dense_layers(block,num_planes,nblocks[0])
        num_planes += nblocks[0]*growth_rate
        out_planes = int(math.floor(num_planes*reduction))
        self.trans1 = Transition(num_planes,out_planes)
        num_planes = out_planes

        self.dense2 = self._make_dense_layers(block,num_planes,nblocks[1])
        num_planes += nblocks[1]*growth_rate
        out_planes = int(math.floor(num_planes * reduction))
        self.trans2 = Transition(num_planes,out_planes)
        num_planes = out_planes

        self.dense3 = self._make_dense_layers(block,num_planes,nblocks[2])
        num_planes += nblocks[2]*growth_rate
        out_planes = int(math.floor(num_planes * reduction))
        self.trans3= Transition(num_planes,out_planes)
        num_planes = out_planes

        self.dense4 = self._make_dense_layers(block,num_planes,nblocks[3])
        num_planes += nblocks[3]*growth_rate


        self.bn = nn.BatchNorm2d(num_planes)
        self.linear = nn.Linear(num_planes,num_classes)
    def _make_dense_layers(self,block,in_planes,nblock):
        layers = []
        for _ in range(nblock):
            layers.append(block(in_planes,self.growth_rate))
            in_planes += self.growth_rate
        return nn.Sequential(*layers)

    def forward(self,x):
        out = self.conv1(x)
        out = self.trans1(self.dense1(out))
        out = self.trans2(self.dense2(out))
        out = self.trans3(self.dense3(out))
        out = self.dense4(out)

        out = F.relu(self.bn(out))
        out = F.avg_pool2d(out, out.size(-1))  # Global average pooling
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [None]:
from torchsummary import summary

In [None]:
# def DenseNet121():
#     return DenseNet(Bottleneck, [2,2,2,2], growth_rate=32)
# input = torch.randn(3,224,224)
# blk1 = DenseNet121()

# summary(blk1,(3,224,224))


`DenseNet2` is same as `DenseNet` above only that is uses a for loop which is more concise

In [None]:
class DenseNet2(nn.Module):
    """
    nblocks - nDenseblocks here they are predefined,you can use the formular:
                                                                            (depth-4) // 3
                                                                            the pass depht as a paramter
    """
    def __init__(self,block,nblocks,growth_rate = 12,reduction = 0.5,num_classes = 10):
        super(DenseNet2,self).__init__()
        self.growth_rate = growth_rate
        num_planes = 2*growth_rate

        self.conv1 = nn.Conv2d(3,num_planes,kernel_size = 3,padding =1 ,bias = False)
        # use for loop to eliminate all this code
        dense_blocks = []
        for blk_index,block_count in enumerate(nblocks):
            dense_layer = self._make_dense_layers(block,num_planes,block_count)
            dense_blocks.append(dense_layer)
            num_planes += growth_rate * block_count

            if blk_index < len(nblocks) -1: # dont add translation layers after the last block
                out_planes = int(math.floor(num_planes * reduction))
                dense_blocks.append(Transition(num_planes,out_planes))
                num_planes = out_planes
        self.dense_blocks = nn.Sequential(*dense_blocks)
        self.output_net = nn.Sequential(
            nn.BatchNorm2d(num_planes),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1,1)),
            nn.Flatten(),
            nn.Linear(num_planes,num_classes)
        )
    def _make_dense_layers(self,block,in_planes,nblock):
        layers = []
        for _ in range(nblock):
            layers.append(block(in_planes,self.growth_rate))
            in_planes += self.growth_rate
        return nn.Sequential(*layers)

    def forward(self,x):
        out = self.conv1(x)
        out = self.dense_blocks(out)
        out = self.output_net(out)
        return out

In [None]:
# def DenseNet121():
#     return DenseNet2(Bottleneck, [2,2,2,2], growth_rate=32)

# input = torch.randn(3,224,224)
# blk2 = DenseNet121()
# summary(blk2,(3,224,224))

In [None]:
def DenseNet121():
    return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)

def DenseNet169():
    return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)

def DenseNet201():
    return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)

def DenseNet161():
    return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)

def densenet_cifar():
    return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)