[Densely Connected Convolutional Networks](https://arxiv.org/pdf/1608.06993.pdf)

[Implementation guide](https://amaarora.github.io/posts/2020-08-02-densenets.html)

[Easy to understand blog post](https://chatbotslife.com/resnets-highwaynets-and-densenets-oh-my-9bb15918ee32)

In [1]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
FOLDERNAME = "cs231n/assignments/assignment2/"
assert FOLDERNAME is not None

In [3]:
import sys
sys.path.append("/content/drive/My Drive/{}".format(FOLDERNAME))

In [5]:
%cd /content/drive/My\ Drive/$FOLDERNAME/cs231n/datasets/
!bash get_datasets.sh
%cd /content/drive/My\ Drive/$FOLDERNAME

/content/drive/My Drive/cs231n/assignments/assignment2/cs231n/datasets
/content/drive/My Drive/cs231n/assignments/assignment2


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np
import torch.nn.functional as F

USE_GPU = True
dtype = torch.float32

if USE_GPU and torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")

print_every = 100

print("using device:", device)

using device: cuda


In [7]:
NUM_TRAIN = 49000

transform = T.Compose([T.ToTensor(),
                       T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ])

cifar10_train = dset.CIFAR10("./cs231n/datasets", train=True, download=True, transform=transform)
loader_train = DataLoader(cifar10_train, batch_size=64, sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cifar10_val = dset.CIFAR10("./cs231n/datasets", train=True, download=True, transform=transform)
loader_val = DataLoader(cifar10_val, batch_size=64, sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))

cifar10_test = dset.CIFAR10("./cs231n/datasets", train = False, download = True, transform=transform)
loader_test = DataLoader(cifar10_test, batch_size=64)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [8]:
from collections import OrderedDict

In [57]:
class TransitionLayer(nn.Sequential):
  "Transition layers between dense blocks"
  def __init__(self, in_channels, out_channels):
    """
    Reduces the depth of the activation maps

    Args:
      in_channels (int)
      out_channels (int)

    """
    super().__init__()

    self.add_module('norm', nn.BatchNorm2d(in_channels))
    self.add_module('relu', nn.ReLU(inplace=True))
    #downsampling the depth of the activation map
    self.add_module('conv', nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False))
    self.add_module('pool', nn.AvgPool2d(kernel_size=2))

class DenseLayer(nn.Module):
  """
  Layer used in each DenseBlock in DenseNet
  """
  def __init__(self, in_channels, growth_rate, bottleneck_size, drop_rate):

    """
    For the computational efficiency reasons, first we reduce the input depth
    to growth_rate*bottleneck_size by appliying convolution with kernel_size=1
    Then, we perform the convolution with kernel_size=3

    Args:
      in_channels (int)
      growth_rate (int): how much the feature maps grows with each layer
      bottleneck_size (int): a multiplier that determines the depth of the intermediate feature maps after kernel_size=1 convolution
      drop_rate (int)

    """
    super().__init__()

    self.norm1 = nn.BatchNorm2d(in_channels)
    self.relu1 = nn.ReLU(inplace=True)
    self.conv1 = nn.Conv2d(in_channels, growth_rate*bottleneck_size, kernel_size=1, bias=False)

    self.norm2 = nn.BatchNorm2d(growth_rate*bottleneck_size)
    self.relu2 = nn.ReLU(inplace=True)
    self.conv2 = nn.Conv2d(growth_rate*bottleneck_size, growth_rate, kernel_size=3, padding=1, bias=False)

    self.drop_rate = float(drop_rate)

  def forward(self, x):
    """
    Args:
          x (Tensor): Input data of dim (M, N, in_channels, H, W).
                      M corresponds to the number of the elements in the list of tensors

    Returns:
          output (Tensor): Output data of dim (N, growth_rate, H, W)
    """
    x = [x] if torch.is_tensor(x) else x
    x =  self.conv1(self.relu1(self.norm1(torch.cat(x, 1))))
    output = self.conv2(self.relu2(self.norm2(x)))

    if self.drop_rate > 0:
      output = F.dropout(output, p=self.drop_rate, training=self.training)
    return output

class DenseBlock(nn.ModuleDict):
  """
  Blocks of DenseLayers used in DenseNet
  """

  def __init__(self, num_layers, in_channels, growth_rate, bottleneck_size, drop_rate):
    """
    DenseBlock connects num_layers of DenseLayer(s) with shared hyperparameters.
    The output of each DenseLayer(s) serves as the input for each of all sebsequent DenseLayer(s)

    Args:
      num_layers (int)
      in_channels (int)
      growth_rate (int): how much the feature maps grows with each layer
      bottleneck_size (int): a multiplier that determines the depth of the intermediate feature maps after kernel_size=1 convolution
      drop_rate (float)
    """

    super().__init__()

    for i in range(num_layers):
      layer = DenseLayer(in_channels+i*growth_rate, growth_rate, bottleneck_size, drop_rate)
      self.add_module(f"danselayer{i+1}", layer)

  def forward(self, x):
    """
    Args:
          x (Tensor): Input data of dim (N, in_channels, H, W).

    Returns:
          output (Tensor): Output data of dim (N, growth_rate, H, W)
    """
    input = [x]

    for name, layer in self.items():
      output = layer(input)
      input.append(output)

    return torch.cat(input, 1)

class DenseNet(nn.Module):
  """
  Architecture:
    1. NORM->ReLU->CONV
    2. DenseBlock->[TransitionLayer->DenseBlock] * N
    3. NORM->ReLU-POOL-LINEAR
  """

  def __init__(self, block_config=(12,12,12), in_channels=32, growth_rate=16, bottleneck_size=4, drop_rate=0, num_classes=10):
    """
    The first sequence NORM->ReLU->CONV produces in_channels activation maps that is fed into the sequence of blocks,
    each with the number of layers according to the block_config, spaced by the transition layer.
    At the end of the last dense block, a global average pooling is performed before the softmax classifier.

    Args:
        block_config (tuple): number of layers for each consequtive denseblock
        in_channels (int)
        growth_rate (int): how much the feature maps grows with each layer
        bottleneck_size (int): a multiplier that determines the depth of the intermediate feature maps after kernel_size=1 convolution
        drop_rate (float)
        num_classes (int)
    """
    super().__init__()

    self.features = nn.Sequential(OrderedDict([
        ('norm0', nn.BatchNorm2d(3)),
        ('relu0', nn.ReLU(inplace=True)),
        ('conv0', nn.Conv2d(3, in_channels, kernel_size=7, padding=3, bias=False))
    ]))

    num_features = in_channels

    for i, num_layers in enumerate(block_config):
      block = DenseBlock(num_layers, num_features, growth_rate, bottleneck_size, drop_rate)
      self.features.add_module(f'denseblock{i+1}', block)
      num_features += num_layers*growth_rate

      if i != len(block_config) - 1:
        trans = TransitionLayer(num_features, num_features//2)
        self.features.add_module(f'transition{i+1}', trans)
        num_features = num_features//2

    self.features.add_module(f'norm{i+2}', nn.BatchNorm2d(num_features))
    self.features.add_module(f'relu{i+2}', nn.ReLU(inplace=True))

    self.classifier = nn.Linear(num_features, num_classes)

    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight)
      elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)
      elif isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight)

  def forward(self, x):
    """
    Args:
      x(Tensor): input data of dim (N, 3, H, W)

    Returns:
      Out(Tensor): output data of dim (N, 10)
    """
    out = F.adaptive_avg_pool2d(self.features(x), (1,1))
    out = self.classifier(torch.flatten(out, start_dim=1))

    return out


In [62]:
learning_rate = 0.02

model = DenseNet()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [63]:
def check_accuracy(loader, model):
  if loader.dataset.train:
    print("Checking accuracy on validation set")
  else:
    print("Checking accuracy on test set")
  num_correct, num_samples = 0, 0
  model.to(device=device)
  model.eval()
  with torch.no_grad():
    for x, y in loader:
      x = x.to(device=device, dtype=dtype)
      y = y.to(device=device, dtype=torch.long)
      scores = model(x)
      _, preds = scores.max(1)
      num_correct += (y==preds).sum()
      num_samples += preds.size(0)
    acc = float(num_correct/num_samples)
    print(("Got %d / %d correct (%.2f)") % (num_correct, num_samples, acc*100))
  return acc

In [64]:
def train(model, optimizer, epochs=1):
  model = model.to(device=device)
  for e in range(epochs):
    for t, (x,y) in enumerate(loader_train):
      model.train()
      x = x.to(device=device, dtype=dtype)
      y = y.to(device=device, dtype=torch.long)

      scores = model(x)
      loss = F.cross_entropy(scores, y)

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      if t%print_every == 0:
        print("Iteration %d, loss = %.4f" % (t, loss.item()))
        check_accuracy(loader_val, model)

In [65]:
train(model, optimizer, epochs=10)

Iteration 0, loss = 2.5389
Checking accuracy on validation set
Got 87 / 1000 correct (8.70)
Iteration 100, loss = 2.0260
Checking accuracy on validation set
Got 219 / 1000 correct (21.90)
Iteration 200, loss = 1.8031
Checking accuracy on validation set
Got 283 / 1000 correct (28.30)
Iteration 300, loss = 1.7538
Checking accuracy on validation set
Got 281 / 1000 correct (28.10)
Iteration 400, loss = 1.6534
Checking accuracy on validation set
Got 355 / 1000 correct (35.50)
Iteration 500, loss = 1.4788
Checking accuracy on validation set
Got 407 / 1000 correct (40.70)
Iteration 600, loss = 1.5120
Checking accuracy on validation set
Got 402 / 1000 correct (40.20)
Iteration 700, loss = 1.5767
Checking accuracy on validation set
Got 407 / 1000 correct (40.70)
Iteration 0, loss = 1.7096
Checking accuracy on validation set
Got 291 / 1000 correct (29.10)
Iteration 100, loss = 1.2818
Checking accuracy on validation set
Got 455 / 1000 correct (45.50)
Iteration 200, loss = 1.2541
Checking accuracy

In [66]:
best_model = model
check_accuracy(loader_test, best_model)

Checking accuracy on test set
Got 8220 / 10000 correct (82.20)


0.8219999670982361