In [6]:
import torch
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.nn.functional as F # All functions that don't have any parameters
from collections import OrderedDict # For saving the model

In [2]:
# Set seed for reproducibility
torch.manual_seed(101)

# Check if CUDA is available
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available. Training on CPU ...')
else:
    print('CUDA is available! Training on GPU ...')

# Defining the transition layers for the DenseNet which do the downsampling (1x1 convolution and pooling)
class _Transition(nn.Sequential):
    def __init__(self, in_channels, out_channels):
        super(_Transition,self).__init__() # call the init function of the parent class
        self.add_module('norm', nn.BatchNorm2d(in_channels)) # Batch Normalization is used to normalize the input layer by re-centering and re-scaling
        self.add_module('relu', nn.ReLU(inplace=True)) # ReLU is used to introduce non-linearity in the network)
        self.add_module('conv', nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False)) # 1x1 convolution is used to reduce the number of channels
        self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2)) # Average pooling is used to downsample the input

CUDA is not available. Training on CPU ...


In [3]:
# implementing the dense layer inside the dense block
class _DenseLayer(nn.Sequential):
    def __init__(self, num_input_features, growth_rate, bn_size, drop_rate, memory_efficient = False ):
        """
        Function for initializing the dense layer
        Args:
            num_input_features (int) - how many input features
            growth_rate (int) - how many filters to add each layer (k in paper)
            bn_size (int) - multiplicative factor for number of bottle neck layers
                (i.e. bn_size * k features in the bottleneck layer)
            drop_rate (float) - dropout rate after each dense layer
            memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient, but slower.
        """
        super(_DenseLayer, self).__init__()
        self.add_module('norm1', nn.BatchNorm2d(num_input_features))
        self.add_module('relu1', nn.ReLU(inplace=True))
        self.add_module('conv1', nn.Conv2d(num_input_features, 
                                         bn_size * growth_rate,
                                        kernel_size=1,
                                        stride=1, bias=False
                                        ))
    
    def bn_function(self, inputs):
        """
        Function for bottleneck layer
        Args:
            inputs (torch.autograd.Variable) - input to the layer
        """
        concated_features = torch.cat(inputs, 1) # concatenate the input features
        bottleneck_output = self.conv1(self.relu1(self.norm1(concated_features))) # apply the 1x1 convolution
        return bottleneck_output
    
    def forward(self, input):
        """
        Function for forward pass of the dense layer
        Args:
            input (torch.autograd.Variable) - input to the layer
        """
        if isinstance(input, torch.Tensor): # if the input is a tensor
            prev_features = [input] # store the input as a list
        else:
            prev_features = input # else store the input as a list
        
        bottleneck_output = self.bn_function(prev_features) # apply the bottleneck layer
        new_features = self.conv2(self.relu2(self.norm2(bottleneck_output))) # apply the 3x3 convolution

        if self.drop_rate > 0:
            new_features = F.dropout(new_features, p=self.drop_rate, training=self.training) # apply dropout if drop_rate > 0
        
        return new_features

In [4]:
# Implementing a DenseBlock which consists of multiple DenseLayers

class _DenseBlock(nn.ModuleDict):
    _version = 2 # version of the DenseBlock

    def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate, memory_efficient=False):
        """
        Function for initializing the DenseBlock
        Args:
            num_layers (int) - number of layers in the block
            num_input_features (int) - number of input features
            bn_size (int) - multiplicative factor for number of bottle neck layers
                (i.e. bn_size * k features in the bottleneck layer)
            growth_rate (int) - how many filters to add each layer (k in paper)
            drop_rate (float) - dropout rate after each dense layer
            memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient, but slower.
        """
        super(_DenseBlock, self).__init__()
        for i in range(num_layers):
            layer = _DenseLayer(num_input_features + i * growth_rate,
                                growth_rate=growth_rate,
                                bn_size=bn_size,
                                drop_rate=drop_rate,
                                memory_efficient=memory_efficient) # create a dense layer
            self.add_module('denselayer%d' % (i + 1), layer) # add the dense layer to the DenseBlock

        def forward(self, init_features):
            """
            Function for forward pass of the DenseBlock
            Args:
                init_features (torch.autograd.Variable) - input to the layer
            """
            features = [init_features] # store the input as a list
            for name, layer in self.items():
                new_features = layer(features) # apply the dense layer
                features.append(new_features) # append the output of the dense layer to the list
            return torch.cat(features, 1) # concatenate the list of features and return

In [9]:
# Implementing DenseNet which is a combination of DenseBlocks and Transition Layers

class DenseNet(nn.Module):
    def __init__(self, growth_rate = 32, block_config = (6,12,24,16),num_init_features=64, bn_size = 4, drop_rate = 0, num_classes = 1000,memory_efficient = False):
        """
        Function for initializing the DenseNet
        Args:
            growth_rate (int) - how many filters to add each layer (k in paper)
            block_config (tuple of 4 ints) - how many layers in each pooling block
            num_init_features (int) - the number of filters to learn in the first convolution layer
            bn_size (int) - multiplicative factor for number of bottle neck layers
                (i.e. bn_size * k features in the bottleneck layer)
            drop_rate (float) - dropout rate after each dense layer
            num_classes (int) - number of classification classes
            memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient, but slower.
        """
        super(DenseNet, self).__init__()

        # First convolution
        self.features = nn.Sequential(OrderedDict([
            ('conv0',nn.Conv2d(3,num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
            ('norm0',nn.BatchNorm2d(num_init_features)),
            ('relu0',nn.ReLU(inplace=True)),
            ('pool0',nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
        ]
        ))

        # Add multiple dense blocks based on block configuration array (block_config)
        num_features = num_init_features
        for i, num_layers in enumerate(block_config):
            block = _DenseBlock(num_layers=num_layers,
                                num_input_features=num_features,
                                bn_size=bn_size,
                                growth_rate=growth_rate,
                                drop_rate=drop_rate,
                                memory_efficient=memory_efficient)
            self.features.add_module('denseblock%d' % (i + 1), block)
            num_features = num_features + num_layers * growth_rate  # compute the number of features after each dense block

            if i != len(block_config) - 1: # if not the last dense block
                # Add transition layer between dense blocks to downsample 
                trans = _Transition(num_input_features=num_features,num_output_features=num_features // 2)
                self.features.add_module('trasition%d'%(i+1),trans)
                num_features = num_features //2 # compute the number of features after each transition layer
        
        # Final batch normalisation 
        self.features.add_module('norm5',nn.BatchNorm2d(num_features))

        # Linear layer
        self.classifier = nn.Linear(num_features, num_classes)

        # Official init from torch repo.
        for m in self.modules():
            if isinstance(m,nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # initialize the convolutional layers with kaiming normal initialization 
            elif isinstance(m,nn.BatchNorm2d):
                nn.init.constant_(m.weight,1) # initialize the batch normalization layers with constant weight of 1
                nn.init.constant_(m.bias,0) # initialize the batch normalization layers with constant bias of 0 
            elif isinstance(m,nn.Linear):
                nn.init.constant_(m.bias,0) # initialize the linear layers with constant bias of 0
    
    def forward(self,x):
        """
        Function for forward pass of the DenseNet
        Args:
            x (torch.autograd.Variable) - input to the layer
        """
        features = self.features(x) # apply the features to the input
        out = F.relu(features,inplace=True) # apply relu activation
        out = F.adaptive_avg_pool2d(out,(1,1)).view(features.size(0),-1) # apply adaptive average pooling
        out = self.classifier(out) # apply the linear layer
        return out

In [10]:
# Implementing DenseNet 121 using the DenseNet class

def _densenet(arch, growth_rate, block_config, num_init_features, pretrained, progress, **kwargs):
    """
    Function for initializing the DenseNet
    Args:
        arch (str) - name of the architecture
        growth_rate (int) - how many filters to add each layer (k in paper)
        block_config (tuple of 4 ints) - how many layers in each pooling block
        num_init_features (int) - the number of filters to learn in the first convolution layer
        pretrained (bool) - If True, returns a model pre-trained on ImageNet
        progress (bool) - If True, displays a progress bar of the download to stderr
        **kwargs (dict) - Additional arguments  
    """
    model = DenseNet(growth_rate, block_config, num_init_features, **kwargs)
    return model

def densenet121(pretrained=False, progress=True, **kwargs):
    return _densenet("DenseNet121",32,(6,12,24,16),64,pretrained,progress,**kwargs)