In [1]:
import torch.nn as nn
from torchsummary import summary

In [2]:
class EntryflowConv(nn.Module):
    """
    First Part in Entry Flow having only Convolution layers.
    In Xception:
        in_channel = 3
        out_channel = 64
    """
    def __init__(self,in_channel,out_channel):
        super(EntryflowConv,self).__init__()

        self.conv1 = nn.Conv2d(in_channels=in_channel,out_channels=32,kernel_size=3,stride=2)
        self.conv2 = nn.Conv2d(in_channels=32,out_channels=out_channel,kernel_size=3)

        self.relu = nn.ReLU()

    def forward(self,x):
        # 299x299x3
        x = self.conv1(x)
        x = self.relu(x)
        # 149x149x32

        x = self.conv2(x)
        x = self.relu(x)
        # 147x147x64

        return x

In [3]:
sample = EntryflowConv(in_channel=3,out_channel=64)
summary(sample,input_size=(3,299,299))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 149, 149]             896
              ReLU-2         [-1, 32, 149, 149]               0
            Conv2d-3         [-1, 64, 147, 147]          18,496
              ReLU-4         [-1, 64, 147, 147]               0
Total params: 19,392
Trainable params: 19,392
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 1.02
Forward/backward pass size (MB): 31.94
Params size (MB): 0.07
Estimated Total Size (MB): 33.04
----------------------------------------------------------------


In [4]:
class DepthwiseSeparable(nn.Module):
    """
    Depthwise Separable Convolution is Depthwise Convolution + Pointwise Convolution.
        Depthwise Convolution : Convolution over each channel independently
            Divide input channels into "in_channel" groups and then apply convolution over each
            Group independently : Depth is not used
        Pointwise Convolution : Normal Convolution with kernel Size (1,1)
            Only depth Used.

    In Xception Architecture the Order of operation is different:
        Pointwise Convolution + Depthwise Convolution

    groups : No of groups the input channel should be divided into
             For depthwise convolution = in_channel
    padding = default: "same" (1 for kernel_size = 3)
    """
    def __init__(self,in_channel,out_channel,kernel_size,stride=1,padding=1):
        super(DepthwiseSeparable,self).__init__()

        self.pointwise = nn.Conv2d(in_channels=in_channel,out_channels=out_channel,kernel_size=1)
        self.depthwise = nn.Conv2d(in_channels=out_channel,out_channels=out_channel,kernel_size=kernel_size,stride=stride,padding=padding,groups=out_channel)

    def forward(self,x):
        x = self.pointwise(x)
        x = self.depthwise(x)

        return x

In [5]:
# Padding = 1 ('same') in all such layers
sample = DepthwiseSeparable(in_channel=64,out_channel=128,kernel_size=3)
summary(sample,input_size=(64,147,147))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1        [-1, 128, 147, 147]           8,320
            Conv2d-2        [-1, 128, 147, 147]           1,280
Total params: 9,600
Trainable params: 9,600
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 5.28
Forward/backward pass size (MB): 42.21
Params size (MB): 0.04
Estimated Total Size (MB): 47.52
----------------------------------------------------------------


In [6]:
class EntryflowSeparable(nn.Module):
    """
    This part contains depthwise separable convolutions and is repeated 3 times in original implementation.

        in_channel, out_channel : Different for each repetition
        pool_padding: default :1 , Padding value for max_pool layer
        kernel_size = 3 : For all repetitions
        relu_extra : bool, default : false : Whether or not put a relu layer in the beginning
    """
    def __init__(self,in_channel,out_channel,pool_padding=1,relu_extra=False):
        super(EntryflowSeparable,self).__init__()

        # 1st branch
        self.sepconv1 = DepthwiseSeparable(in_channel=in_channel,out_channel=out_channel,kernel_size=3)
        self.sepconv2 = DepthwiseSeparable(in_channel=out_channel,out_channel=out_channel,kernel_size=3)
        self.maxpool = nn.MaxPool2d(kernel_size=3,stride=2,padding=pool_padding)
        self.relu = nn.ReLU()
        self.relu_extra = relu_extra

        # 2nd branch (left)
        self.conv = nn.Conv2d(in_channels=in_channel,out_channels=out_channel,kernel_size=1,stride=2)

    def forward(self,x):
        # 2nd branch
        y = self.conv(x)

        # 1st branch
        if self.relu_extra:
            x = self.relu(x)
        x = self.sepconv1(x)
        x = self.relu(x)

        x = self.sepconv2(x)

        x = self.maxpool(x)

        # Add two branch
        x = x + y
        return x

In [7]:
sample = EntryflowSeparable(in_channel=64,out_channel=128)
summary(sample,input_size=(64,147,147))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 128, 74, 74]           8,320
            Conv2d-2        [-1, 128, 147, 147]           8,320
            Conv2d-3        [-1, 128, 147, 147]           1,280
DepthwiseSeparable-4        [-1, 128, 147, 147]               0
              ReLU-5        [-1, 128, 147, 147]               0
            Conv2d-6        [-1, 128, 147, 147]          16,512
            Conv2d-7        [-1, 128, 147, 147]           1,280
DepthwiseSeparable-8        [-1, 128, 147, 147]               0
         MaxPool2d-9          [-1, 128, 74, 74]               0
Total params: 35,712
Trainable params: 35,712
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 5.28
Forward/backward pass size (MB): 158.41
Params size (MB): 0.14
Estimated Total Size (MB): 163.82
-----------------------------------------

In [8]:
sample = EntryflowSeparable(in_channel=128,out_channel=256,relu_extra=True)
summary(sample,input_size=(128,74,74))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 256, 37, 37]          33,024
              ReLU-2          [-1, 128, 74, 74]               0
            Conv2d-3          [-1, 256, 74, 74]          33,024
            Conv2d-4          [-1, 256, 74, 74]           2,560
DepthwiseSeparable-5          [-1, 256, 74, 74]               0
              ReLU-6          [-1, 256, 74, 74]               0
            Conv2d-7          [-1, 256, 74, 74]          65,792
            Conv2d-8          [-1, 256, 74, 74]           2,560
DepthwiseSeparable-9          [-1, 256, 74, 74]               0
        MaxPool2d-10          [-1, 256, 37, 37]               0
Total params: 136,960
Trainable params: 136,960
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 2.67
Forward/backward pass size (MB): 85.56
Params size (MB): 0.52
Estimated 

In [9]:
class EntryFlow(nn.Module):
    """
    Entry Flow Part of Xception :

        EntryflowConv + 3 x EntryflowSeparable
        in_channel = 3
        out_channel = 728
    """
    def __init__(self):
        super(EntryFlow,self).__init__()
        self.conv = EntryflowConv(in_channel=3,out_channel=64)
        self.sep1 = EntryflowSeparable(in_channel=64,out_channel=128)
        self.sep2 = EntryflowSeparable(in_channel=128,out_channel=256,relu_extra=True)
        self.sep3 = EntryflowSeparable(in_channel=256,out_channel=728,relu_extra=True)

    def forward(self,x):
        x = self.conv(x)
        x = self.sep1(x)
        x = self.sep2(x)
        x = self.sep3(x)

        return x

In [10]:
xception_entry = EntryFlow()
summary(xception_entry,input_size=(3,299,299))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 149, 149]             896
              ReLU-2         [-1, 32, 149, 149]               0
            Conv2d-3         [-1, 64, 147, 147]          18,496
              ReLU-4         [-1, 64, 147, 147]               0
     EntryflowConv-5         [-1, 64, 147, 147]               0
            Conv2d-6          [-1, 128, 74, 74]           8,320
            Conv2d-7        [-1, 128, 147, 147]           8,320
            Conv2d-8        [-1, 128, 147, 147]           1,280
DepthwiseSeparable-9        [-1, 128, 147, 147]               0
             ReLU-10        [-1, 128, 147, 147]               0
           Conv2d-11        [-1, 128, 147, 147]          16,512
           Conv2d-12        [-1, 128, 147, 147]           1,280
DepthwiseSeparable-13        [-1, 128, 147, 147]               0
        MaxPool2d-14          [-1, 128