In [1]:
import torch
import torch.nn as nn

In [3]:
class Bottleneck(nn.Module):
    expansion = 2
    def __init__(self, in_channels, inner_channels, cardinality, stride = 1, projection = None):
        super().__init__()

        self.residual = nn.Sequential(nn.Conv2d(in_channels, inner_channels, 1, bias=False),
                                      nn.BatchNorm2d(inner_channels),
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(inner_channels, inner_channels, 3, stride=stride, padding=1, groups = cardinality, bias=False),
                                      nn.BatchNorm2d(inner_channels),
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(inner_channels, inner_channels * self.expansion, 1, bias=False),
                                      nn.BatchNorm2d(inner_channels * self.expansion))
        self.projection = projection
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):

        residual = self.residual(x)

        if self.projection is not None:
            shortcut = self.projection(x)
        else:
            shortcut = x

        out = self.relu(residual + shortcut)
        return out

class ResNeXt(nn.Module):
    def __init__(self, block, num_block_list, cardinality, num_classes = 1000, zero_init_residual = True):
        super().__init__()

        self.in_channels = 64
        self.cardinality = cardinality

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.stage1 = self.make_stage(block, 128, num_block_list[0], stride=1)
        self.stage2 = self.make_stage(block, 256, num_block_list[1], stride=2)
        self.stage3 = self.make_stage(block, 512, num_block_list[2], stride=2)
        self.stage4 = self.make_stage(block, 1024, num_block_list[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(1024 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, block):
                    nn.init.constant_(m.residual[-1].weight, 0)

    def make_stage(self, block, inner_channels, num_blocks, stride = 1):

        if stride != 1 or self.in_channels != inner_channels * block.expansion:
            projection = nn.Sequential(
                nn.Conv2d(self.in_channels, inner_channels * block.expansion, 1, stride=stride, bias=False),
                nn.BatchNorm2d(inner_channels * block.expansion))
        else:
            projection = None

        layers = []
        layers += [block(self.in_channels, inner_channels, self.cardinality, stride, projection)]
        self.in_channels = inner_channels * block.expansion
        for _ in range(1, num_blocks):
            layers += [block(self.in_channels, inner_channels, self.cardinality)]

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.stage1(x)
        x = self.stage2(x)
        x = self.stage3(x)
        x = self.stage4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

In [5]:
def resnext50(**kwargs):
    return ResNeXt(Bottleneck, [3, 4, 6, 3], cardinality=32, **kwargs)

def resnext101(**kwargs):
    return ResNeXt(Bottleneck, [3, 4, 23, 3], cardinality=32, **kwargs)

In [7]:
model = resnext50()

from torchinfo import summary
summary(model, input_size=(2,3,224,224), device='cpu')

Layer (type:depth-idx)                   Output Shape              Param #
ResNeXt                                  [2, 1000]                 --
├─Conv2d: 1-1                            [2, 64, 112, 112]         9,408
├─BatchNorm2d: 1-2                       [2, 64, 112, 112]         128
├─ReLU: 1-3                              [2, 64, 112, 112]         --
├─MaxPool2d: 1-4                         [2, 64, 56, 56]           --
├─Sequential: 1-5                        [2, 256, 56, 56]          --
│    └─Bottleneck: 2-1                   [2, 256, 56, 56]          --
│    │    └─Sequential: 3-1              [2, 256, 56, 56]          46,592
│    │    └─Sequential: 3-2              [2, 256, 56, 56]          16,896
│    │    └─ReLU: 3-3                    [2, 256, 56, 56]          --
│    └─Bottleneck: 2-2                   [2, 256, 56, 56]          --
│    │    └─Sequential: 3-4              [2, 256, 56, 56]          71,168
│    │    └─ReLU: 3-5                    [2, 256, 56, 56]          --

In [9]:
x = torch.randn(2,3,224,224)
print(model(x).shape)

torch.Size([2, 1000])


## Grouped convolution

In [12]:
print(nn.Conv2d(128, 256, 3, groups=1).weight.shape)
print(nn.Conv2d(128, 256, 3, groups=32).weight.shape)

torch.Size([256, 128, 3, 3])
torch.Size([256, 4, 3, 3])


In [16]:
print(nn.Conv2d(128, 50, 3, groups=32).weight.shape)

ValueError: out_channels must be divisible by groups