In [None]:
class Stem(nn.Module):
    def __init__(self):
        super(Stem, self).__init__()

        # Initial Convolution
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=0)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=0)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)

        # Mixed 4x4 Convolution
        self.mixed_conv1 = nn.Conv2d(64, 96, kernel_size=3, stride=2, padding=0)

        # Mixed 7x7 Convolution
        self.mixed_conv2 = nn.Conv2d(64, 64, kernel_size=1, padding=0)
        self.mixed_conv3 = nn.Conv2d(64, 96, kernel_size=3, padding=0)

        # Pooling
        self.max_pool1 = nn.MaxPool2d(3, stride=2, padding=0)

        # Reduction
        self.conv4 = nn.Conv2d(160, 64, kernel_size=1, padding=0)
        self.conv5 = nn.Conv2d(64, 96, kernel_size=3, padding=0)
        self.conv6 = nn.Conv2d(160, 64, kernel_size=1, padding=0)
        self.conv7 = nn.Conv2d(64, 64, kernel_size=(7, 1), padding=(3, 0))
        self.conv8 = nn.Conv2d(64, 64, kernel_size=(1, 7), padding=(0, 3))
        self.conv9 = nn.Conv2d(64, 96, kernel_size=3, padding=0)

        # Final Max Pooling
        self.max_pool2 = nn.MaxPool2d(3, stride=2, padding=0)
        self.mixed_conv2 = nn.Conv2d(192, 192, kernel_size=3, stride=2, padding=0)

    def forward(self, x):
        # Initial Conv Layers
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))

        # Two parallel strided operations
        x0 = self.mixed_conv1(x)
        x1 = self.max_pool1(x)

        # Concatenation
        x = torch.cat((x0, x1), dim=1)

        # First 7x7 Mixed Conv
        x0 = F.relu(self.conv4(x))
        x0 = F.relu(self.conv5(x0))

        # Second 7x7 Mixed Conv
        x1 = F.relu(self.conv6(x))
        x1 = F.relu(self.conv7(x1))
        x1 = F.relu(self.conv8(x1))
        x1 = F.relu(self.conv9(x1))

        # Concatenation
        x = torch.cat((x0, x1), dim=1)

        # Final Max Pooling
        x0 = self.max_pool2(x)
        x1 = self.mixed_conv2(x)

        # Final Concatenation
        x = torch.cat((x0, x1), dim=1)

        return x

class InceptionResNetA(nn.Module):
    def __init__(self, in_channels):
        super(InceptionResNetA, self).__init__()

        self.branch0 = nn.Sequential(
            nn.Conv2d(in_channels, 32, kernel_size=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True)
        )


        self.branch1 = nn.Sequential(
            nn.Conv2d(in_channels, 32, kernel_size=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True)
        )

        self.branch2 = nn.Sequential(
            nn.Conv2d(in_channels, 32, kernel_size=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 48, kernel_size=3, padding=1),
            nn.BatchNorm2d(48),
            nn.ReLU(inplace=True),
            nn.Conv2d(48, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True)
        )

        self.reduction1x1 = nn.Sequential(
            nn.Conv2d(128, 384, kernel_size=1),  ### original 384\
            nn.BatchNorm2d(384),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        branch0 = self.branch0(x)

        branch1 = self.branch1(x)

        branch2 = self.branch2(x)

        branches = [branch0, branch1, branch2]
        mixed = torch.cat(branches, dim=1)

        up = self.reduction1x1(mixed)

        out = up * 0.1 + x
        ####relu
        return out

class ReductionA(nn.Module):
    def __init__(self, in_channels, k, l, m, n):
        super(ReductionA, self).__init__()
        self.branch1 = nn.Sequential(
            nn.Conv2d(in_channels, n, kernel_size=3, stride=2, padding=0, bias=False),
            nn.BatchNorm2d(n),
            nn.ReLU(inplace=True)
        )

        self.branch2 = nn.Sequential(
            nn.Conv2d(in_channels, k, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(k),
            nn.ReLU(inplace=True),
            nn.Conv2d(k, l, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(l),
            nn.ReLU(inplace=True),
            nn.Conv2d(l, m, kernel_size=3, stride=2, padding=0, bias=False),
            nn.BatchNorm2d(m),
            nn.ReLU(inplace=True)
        )

        self.branch3 = nn.MaxPool2d(3, stride=2, padding=0)

    def forward(self, x):
        x1 = self.branch1(x)
        x2 = self.branch2(x)
        x3 = self.branch3(x)

        return torch.cat((x1, x2, x3), 1)

class InceptionResNetB(nn.Module):
    def __init__(self, in_channels):
        super(InceptionResNetB, self).__init__()

        self.branch0 = nn.Sequential(
            nn.Conv2d(in_channels, 192, kernel_size=1),
            nn.BatchNorm2d(192),
            nn.ReLU(inplace=True)
        )


        self.branch1 = nn.Sequential(
            nn.Conv2d(in_channels, 128, kernel_size=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 160, kernel_size=(1, 7), padding=(0, 3)),
            nn.BatchNorm2d(160),
            nn.ReLU(inplace=True),
            nn.Conv2d(160, 192, kernel_size=(7, 1), padding=(3, 0)),
            nn.BatchNorm2d(192),
            nn.ReLU(inplace=True)
        )

        self.reduction1x1 = nn.Sequential(
            nn.Conv2d(384, in_channels, kernel_size=1),    #### change channel
            nn.BatchNorm2d(in_channels),
            nn.ReLU()

        )


    def forward(self, x):
        branch0 = self.branch0(x)
        branch1 = self.branch1(x)

        branches = [branch0, branch1]
        mixed = torch.cat(branches, dim=1)

        up = self.reduction1x1(mixed)

        out = up * 0.1 + x
        return out

class ReductionB(nn.Module):
    def __init__(self, in_channels):
        super(ReductionB, self).__init__()
        self.branch1 = nn.Sequential(
            nn.Conv2d(in_channels, 256, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 384, kernel_size=3, stride=2, padding=0, bias=False),
            nn.BatchNorm2d(384),
            nn.ReLU(inplace=True)
        )

        self.branch2 = nn.Sequential(
            nn.Conv2d(in_channels, 256, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 288, kernel_size=3, stride=2, padding=0, bias=False),
            nn.BatchNorm2d(288),
            nn.ReLU(inplace=True)
        )

        self.branch3 = nn.Sequential(
            nn.Conv2d(in_channels, 256, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 288, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(288),
            nn.ReLU(inplace=True),
            nn.Conv2d(288, 320, kernel_size=3, stride=2, padding=0, bias=False),
            nn.BatchNorm2d(320),
            nn.ReLU(inplace=True)
        )

        self.branch4 = nn.MaxPool2d(3, stride=2, padding=0)

    def forward(self, x):
        x1 = self.branch1(x)
        x2 = self.branch2(x)
        x3 = self.branch3(x)
        x4 = self.branch4(x)

        return torch.cat((x1, x2, x3, x4), 1)

class InceptionResNetC(nn.Module):
    def __init__(self, in_channels):
        super(InceptionResNetC, self).__init__()

        self.branch0 = nn.Conv2d(in_channels, 192, kernel_size=1)

        self.branch1 = nn.Sequential(
            nn.Conv2d(in_channels, 192, kernel_size=1),
            nn.BatchNorm2d(192),
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 224, kernel_size=(1, 3), padding=(0, 1)),
            nn.BatchNorm2d(224),
            nn.ReLU(inplace=True),
            nn.Conv2d(224, 256, kernel_size=(3, 1), padding=(1, 0)),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
        )

        self.reduction1x1 = nn.Sequential(
            nn.Conv2d(448, in_channels, kernel_size=1),
            nn.BatchNorm2d(in_channels),
            nn.ReLU(inplace=True)
        )



    def forward(self, x):
        branch0 = self.branch0(x)
        branch1 = self.branch1(x)

        branches = [branch0, branch1]
        mixed = torch.cat(branches, dim=1)

        up = self.reduction1x1(mixed)

        out = up * 0.1 + x

        return out

class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.downsample = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = nn.ReLU()(out)
        out = self.conv2(out)
        out = self.bn2(out)

        identity = self.downsample(x)

        out += identity
        out = nn.ReLU()(out)

        return out
