In [1]:
import torch
import torch.nn as nn
from torch.nn import functional as F 
import torchviz
import sys; sys.path.insert(0, '../')
from exp import nb_d2l_utils

In [2]:
torch.__version__

'1.3.1'

In [3]:
class DenseLayer(nn.Module):
    def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
        """
        bn_size (int) - multiplicative factor for number of bottle neck layers
          (i.e. bn_size * k features in the bottleneck layer)
        """
        super().__init__()
        # bottle neck
        self.add_module('norm1', nn.BatchNorm2d(num_input_features))
        self.add_module('relu1', nn.ReLU(inplace=True))
        self.add_module('conv1', nn.Conv2d(num_input_features, bn_size *
                                           growth_rate, kernel_size=1, stride=1,
                                           bias=False))
        self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate))
        self.add_module('relu2', nn.ReLU(inplace=True))
        self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate,
                                           kernel_size=3, stride=1, padding=1,
                                           bias=False))
        self.drop_rate = float(drop_rate)
    
    def forward(self, x):
        if isinstance(x, torch.Tensor):
            prev_features = [x]
        else:
            prev_features = x
        x = torch.cat(prev_features, 1)
        x = self.conv1(self.relu1(self.norm1(x)))
        x = self.conv2(self.relu2(self.norm2(x)))
        if self.drop_rate > 0:
            x = F.dropout(x, self.drop_rate)
        return x
    
class DenseBlock(nn.Module):
    def __init__(self, num_layers, num_input_features, growth_rate, bn_size=4, drop_rate=0):
        super().__init__()
        for i in range(num_layers):
            layer = DenseLayer(
                num_input_features + i * growth_rate,
                growth_rate=growth_rate,
                bn_size=bn_size,
                drop_rate=drop_rate,
            )
            self.add_module('denselayer%d' % (i + 1), layer)
    
    def forward(self, x):
        features = [x]
        for l in self.children():
            y = l(features)
            features.append(y)
        return torch.cat(features, 1)

class Transition(nn.Sequential):
    def __init__(self, num_in_feat, num_out_feat):
        super().__init__()
        self.add_module('norm', nn.BatchNorm2d(num_in_feat))
        self.add_module('relu', nn.ReLU(inplace=True))
        self.add_module('conv', nn.Conv2d(num_in_feat, num_out_feat, kernel_size=1, stride=1))
        self.add_module('avgpool', nn.AvgPool2d(kernel_size=2, stride=2))

In [4]:
DenseLayer(3, 16, 4, 0)(torch.randn((1,3,224,224))).shape

torch.Size([1, 16, 224, 224])

In [5]:
DenseBlock(2, 3, 16, 4, 0)(torch.randn((1,3,224,224))).shape

torch.Size([1, 35, 224, 224])

In [6]:
from collections import OrderedDict

class DenseNet(nn.Module):
    def __init__(self, num_init_features, block_config, growth_rate, num_classes=10, bn_size=4, drop_rate=0):
        super().__init__()
        self.features = nn.Sequential(
            OrderedDict([
                ('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
                ('norm0', nn.BatchNorm2d(num_init_features)),
                ('relu0', nn.ReLU(inplace=True)),
                ('max_pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) # kernel=3, padding=1, 不能用 AdaptiveMaxPool2d
            ])
        )
        num_features = num_init_features
        for i, num_layer in enumerate(block_config):
            # dense block
            dense_block = DenseBlock(num_layer, num_features, growth_rate, bn_size, drop_rate)
            self.features.add_module('dense_block%d' % (i + 1), dense_block)
            num_features = num_features + num_layer*growth_rate
            # transition layer
            if i != len(block_config) - 1:
                trans_layer = Transition(num_features, num_features//2)
                self.features.add_module('transition%d' % (i + 1), trans_layer)
                num_features = num_features//2
        
        # 最后
        self.final = nn.Sequential(
            OrderedDict([
                ('norm5', nn.BatchNorm2d(num_features)),
                ('relu5', nn.ReLU(inplace=True)),
                ('avgpool5', nn.AdaptiveAvgPool2d(1)),
                ('flatten', nn.Flatten(1)),
                ('linear', nn.Linear(num_features, num_classes))
            ])
        )
        
    def forward(self, x):
        x = self.features(x)
        x = self.final(x)
        return x

In [7]:
model = DenseNet(64, (6, 12, 24, 16), 32)

In [8]:
model

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (max_pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (dense_block1): DenseBlock(
      (denselayer1): DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (re

In [9]:
x = torch.randn((1,3,224,224))

In [10]:
model(x).shape

torch.Size([1, 10])

In [11]:
def remove_sequential(network, all_layers=None):
    if all_layers is None:
        all_layers = []
    for layer in network.children():
        if type(layer) == nn.Sequential: # if sequential layer, apply recursively to layers in sequential layer
            remove_sequential(layer, all_layers)
        if type(layer) in (DenseBlock, Transition): # if sequential layer, apply recursively to layers in sequential layer
            all_layers.append(layer)
        if list(layer.children()) == []: # if leaf node, add it to list
            all_layers.append(layer)
    return all_layers

def layer_description(model, x):
    for layer in remove_sequential(model):
        x = layer(x)
        print(layer.__class__.__name__,'Output shape:\t',x.shape)

In [12]:
layer_description(model, x = torch.randn((1,3,224,224)))

Conv2d Output shape:	 torch.Size([1, 64, 112, 112])
BatchNorm2d Output shape:	 torch.Size([1, 64, 112, 112])
ReLU Output shape:	 torch.Size([1, 64, 112, 112])
MaxPool2d Output shape:	 torch.Size([1, 64, 56, 56])
DenseBlock Output shape:	 torch.Size([1, 256, 56, 56])
Transition Output shape:	 torch.Size([1, 128, 28, 28])
DenseBlock Output shape:	 torch.Size([1, 512, 28, 28])
Transition Output shape:	 torch.Size([1, 256, 14, 14])
DenseBlock Output shape:	 torch.Size([1, 1024, 14, 14])
Transition Output shape:	 torch.Size([1, 512, 7, 7])
DenseBlock Output shape:	 torch.Size([1, 1024, 7, 7])
BatchNorm2d Output shape:	 torch.Size([1, 1024, 7, 7])
ReLU Output shape:	 torch.Size([1, 1024, 7, 7])
AdaptiveAvgPool2d Output shape:	 torch.Size([1, 1024, 1, 1])
Flatten Output shape:	 torch.Size([1, 1024])
Linear Output shape:	 torch.Size([1, 10])


In [13]:
# x = torch.randn((1,3,224,224))
# torchviz.make_dot(model(x), params=dict(list(model.named_parameters()) + [('x', x)]))

In [14]:
# x = torch.randn((1,1,28,28))
# model = LeNet()

# with torch.onnx.set_training(model, False):
#     trace, _ = torch.jit.get_trace_graph(model, args=(x,))
# torchviz.make_dot_from_trace(trace)


In [15]:
torch.save(model, 'models/densenet.pt')
torch.save(model.features.dense_block1, 'models/denseblock.pt')
torch.save(model.features.dense_block1.denselayer1, 'models/denselayer.pt')

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


In [16]:
dummy_input = torch.randn((1,3,224,224))
torch.onnx.export(model, dummy_input, "models/densenet.onnx", verbose=True)

graph(%input.1 : Float(1, 3, 224, 224),
      %features.conv0.weight : Float(64, 3, 7, 7),
      %features.norm0.weight : Float(64),
      %features.norm0.bias : Float(64),
      %features.norm0.running_mean : Float(64),
      %features.norm0.running_var : Float(64),
      %features.norm0.num_batches_tracked : Long(),
      %features.dense_block1.denselayer1.norm1.weight : Float(64),
      %features.dense_block1.denselayer1.norm1.bias : Float(64),
      %features.dense_block1.denselayer1.norm1.running_mean : Float(64),
      %features.dense_block1.denselayer1.norm1.running_var : Float(64),
      %features.dense_block1.denselayer1.norm1.num_batches_tracked : Long(),
      %features.dense_block1.denselayer1.conv1.weight : Float(128, 64, 1, 1),
      %features.dense_block1.denselayer1.norm2.weight : Float(128),
      %features.dense_block1.denselayer1.norm2.bias : Float(128),
      %features.dense_block1.denselayer1.norm2.running_mean : Float(128),
      %features.dense_block1.denselayer1