In [1]:
import torch
from torch import nn, optim
from torch.utils.data import random_split, DataLoader
from torchinfo import summary
from torchvision import datasets, transforms, models

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(DEVICE)

cuda


## Implementatation

In [2]:
class BottleNeckBlock(nn.Module):
    def __init__(self, in_channels, growth_rate):
        super().__init__()
        self.norm1 = nn.BatchNorm2d(in_channels)
        self.relu1 = nn.ReLU()
        self.conv1 = nn.Conv2d(in_channels, growth_rate * 4, kernel_size=1, bias=False)
        self.norm2 = nn.BatchNorm2d(growth_rate * 4)
        self.relu2 = nn.ReLU()
        self.conv2 = nn.Conv2d(growth_rate * 4, growth_rate, kernel_size=3, padding=1, bias=False)
    
    def forward(self, x):
        x = self.norm1(x)
        x = self.relu1(x)
        x = self.conv1(x)
        x = self.norm2(x)
        x = self.relu2(x)
        x = self.conv2(x)
        return x


class DenseBlock(nn.Sequential):
    def __init__(self, input_channels, growth_rate, n_blocks):
        super().__init__()
        self.blocks = nn.ModuleList(
            [BottleNeckBlock(input_channels + growth_rate * i, growth_rate) for i in range(n_blocks)])
    
    def forward(self, x):
        featuremap_list = [x]
        for block in self.blocks:
            block_in = torch.concat(featuremap_list, dim=1)
            block_out = block(block_in)
            featuremap_list.append(block_out)
        dense_out = torch.concat(featuremap_list, dim=1)
        return dense_out


class DenseNet(nn.Module):
    def __init__(self, n_channels, n_classes, growth_rate, compression_factor):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(n_channels, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
        )
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.dense_block1 = DenseBlock(64, growth_rate, 6)
        self.transition1 = self.make_transition(256, compression_factor)
        self.dense_block2 = DenseBlock(128, growth_rate, 12)
        self.transition2 = self.make_transition(512, compression_factor)
        self.dense_block3 = DenseBlock(256, growth_rate, 24)
        self.transition3 = self.make_transition(1024, compression_factor)
        self.dense_block4 = DenseBlock(512, growth_rate, 16)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(1024, n_classes)
        )
    
    def make_transition(self, in_channels, compression_factor):
        layer = nn.Sequential(
            nn.BatchNorm2d(in_channels),
            nn.Conv2d(in_channels, round(in_channels * compression_factor), kernel_size=1, bias=False),
            nn.AvgPool2d(kernel_size=2, stride=2)
        )
        return layer
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.dense_block1(x)
        x = self.transition1(x)
        x = self.dense_block2(x)
        x = self.transition2(x)
        x = self.dense_block3(x)
        x = self.transition3(x)
        x = self.dense_block4(x)
        x = self.avgpool(x)
        x = self.classifier(x)
        return x

In [3]:
x_batch = torch.randn(1, 3, 224, 224)

model = DenseNet(3, 1000, 32, 0.5)
print(model(x_batch).shape)


torch.Size([1, 1000])


In [4]:
densenet_model = DenseNet(3, 1000, 32, 0.5)
summary(densenet_model, input_size=(1, 3, 224, 224),
        col_names=['input_size', 'output_size', 'num_params', 'mult_adds'], depth=3, device='cpu')

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #                   Mult-Adds
DenseNet                                 [1, 3, 224, 224]          [1, 1000]                 --                        --
├─Sequential: 1-1                        [1, 3, 224, 224]          [1, 64, 112, 112]         --                        --
│    └─Conv2d: 2-1                       [1, 3, 224, 224]          [1, 64, 112, 112]         9,408                     118,013,952
│    └─BatchNorm2d: 2-2                  [1, 64, 112, 112]         [1, 64, 112, 112]         128                       128
│    └─ReLU: 2-3                         [1, 64, 112, 112]         [1, 64, 112, 112]         --                        --
├─MaxPool2d: 1-2                         [1, 64, 112, 112]         [1, 64, 56, 56]           --                        --
├─DenseBlock: 1-3                        [1, 64, 56, 56]           [1, 256, 56, 56]          --                        --
│    └─

In [5]:
densenet_torch_model = models.densenet121()
summary(densenet_torch_model, input_size=(1, 3, 224, 224),
        col_names=['input_size', 'output_size', 'num_params', 'mult_adds'], depth=3, device='cpu')

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #                   Mult-Adds
DenseNet                                 [1, 3, 224, 224]          [1, 1000]                 --                        --
├─Sequential: 1-1                        [1, 3, 224, 224]          [1, 1024, 7, 7]           --                        --
│    └─Conv2d: 2-1                       [1, 3, 224, 224]          [1, 64, 112, 112]         9,408                     118,013,952
│    └─BatchNorm2d: 2-2                  [1, 64, 112, 112]         [1, 64, 112, 112]         128                       128
│    └─ReLU: 2-3                         [1, 64, 112, 112]         [1, 64, 112, 112]         --                        --
│    └─MaxPool2d: 2-4                    [1, 64, 112, 112]         [1, 64, 56, 56]           --                        --
│    └─_DenseBlock: 2-5                  [1, 64, 56, 56]           [1, 256, 56, 56]          --                        --
│    │ 

## Training

In [1]:
from pathlib import Path

TRAIN_RATIO = 0.8
data_dir = Path('./data/')

transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
])

train_ds = datasets.CIFAR100(data_dir, train=True, download=True, transform=transform)
train_ds, val_ds = random_split(train_ds, (TRAIN_RATIO, 1 - TRAIN_RATIO))
val_ds.transform = transform
test_ds = datasets.CIFAR100(data_dir, train=False, download=True, transform=transform)

NameError: name 'transforms' is not defined

In [7]:
import wandb
from src.engine import *

config = dict(batch_size=64, lr=5e-4, epochs=20, dataset='CIFAR100')
with wandb.init(project='pytorch-study', name='DenseNet', config=config) as run:
    w_config = run.config
    train_dl = DataLoader(train_ds, batch_size=w_config.batch_size, shuffle=True)
    val_dl = DataLoader(val_ds, batch_size=w_config.batch_size, shuffle=True)
    
    n_classes = len(train_ds.dataset.classes)
    densenet_model = DenseNet(3, n_classes, 32, 0.5).to(DEVICE)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(densenet_model.parameters(), lr=w_config.lr)
    
    loss_history, acc_history = train(densenet_model, train_dl, val_dl, criterion, optimizer, w_config.epochs, DEVICE, run) 

Epoch=20: 100%|██████████| 20/20 [1:41:46<00:00, 305.32s/it, train_loss=0.318, train_acc=89.43%, val_loss=2.413, val_acc=55.56%]
