In [1]:
import torch
import torch.nn as nn

In [3]:
class WiderBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, projection=None, drop_p=0.3):
        # drop_p = 0.3 for CIFAR, 0.4 for SVHN
        super().__init__()

        self.residual = nn.Sequential(nn.BatchNorm2d(in_channels),
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias = False),
                                      nn.BatchNorm2d(out_channels),
                                      nn.ReLU(inplace=True),
                                      nn.Dropout(drop_p),
                                      nn.Conv2d(out_channels, out_channels, 3, padding=1, bias = False))

        self.projection = projection

    def forward(self, x):
        residual = self.residual(x)

        if self.projection is not None:
            shortcut = self.projection(x)
        else:
            shortcut = x

        out = residual + shortcut
        return out

class WRN(nn.Module):
    def __init__(self, depth, k, num_classes=1000, init_weights=True):
        super().__init__()
        N = int((depth-4)/3/2)

        self.in_channels = 16

        self.conv1 = nn.Conv2d(3, 16, 3, padding=1, bias = False)
        self.stage1 = self.make_stage(16*k, N, stride = 1)
        self.stage2 = self.make_stage(32*k, N, stride = 2)
        self.stage3 = self.make_stage(64*k, N, stride = 2)
        self.bn = nn.BatchNorm2d(64*k)
        self.relu = nn.ReLU(inplace=True)
        self.avg_pool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(64*k, num_classes)

        # weight initialization
        if init_weights:
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
                elif isinstance(m, nn.Linear):
                    nn.init.normal_(m.weight, 0, 0.01)
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.conv1(x)
        x = self.stage1(x)
        x = self.stage2(x)
        x = self.stage3(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.avg_pool(x)
        x = torch.flatten(x, start_dim=1)
        x = self.fc(x)
        return x

    def make_stage(self, out_channels, num_blocks, stride):

        if stride != 1 or self.in_channels != out_channels:
            projection = nn.Conv2d(self.in_channels, out_channels, 1, stride=stride, bias = False)
        else:
            projection = None

        layers = []
        layers += [WiderBlock(self.in_channels, out_channels, stride, projection)]
        self.in_channels = out_channels
        for _ in range(1, num_blocks):
            layers += [WiderBlock(self.in_channels, out_channels)]

        return nn.Sequential(*layers)

In [5]:
model = WRN(depth=28, k=10, num_classes=10)

from torchinfo import summary
summary(model, (2,3, 224, 224), device="cpu")

Layer (type:depth-idx)                   Output Shape              Param #
WRN                                      [2, 10]                   --
├─Conv2d: 1-1                            [2, 16, 224, 224]         432
├─Sequential: 1-2                        [2, 160, 224, 224]        --
│    └─WiderBlock: 2-1                   [2, 160, 224, 224]        --
│    │    └─Sequential: 3-1              [2, 160, 224, 224]        253,792
│    │    └─Conv2d: 3-2                  [2, 160, 224, 224]        2,560
│    └─WiderBlock: 2-2                   [2, 160, 224, 224]        --
│    │    └─Sequential: 3-3              [2, 160, 224, 224]        461,440
│    └─WiderBlock: 2-3                   [2, 160, 224, 224]        --
│    │    └─Sequential: 3-4              [2, 160, 224, 224]        461,440
│    └─WiderBlock: 2-4                   [2, 160, 224, 224]        --
│    │    └─Sequential: 3-5              [2, 160, 224, 224]        461,440
├─Sequential: 1-3                        [2, 320, 112, 112]  

In [7]:
x = torch.randn(2,3,32,32)
print(model(x).shape)

torch.Size([2, 10])
