In [1]:
import torch.nn as nn
from torchsummary import summary

In [2]:
class EntryflowConv(nn.Module):
    """
    First Part in Entry Flow having only Convolution layers.
    In Xception:
        in_channel = 3
        out_channel = 64
    """
    def __init__(self,in_channel,out_channel):
        super(EntryflowConv,self).__init__()

        self.conv1 = nn.Conv2d(in_channels=in_channel,out_channels=32,kernel_size=3,stride=2)
        self.conv2 = nn.Conv2d(in_channels=32,out_channels=out_channel,kernel_size=3)
        self.bnm1 = nn.BatchNorm2d(32)
        self.bnm2 = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU()

    def forward(self,x):
        # 299x299x3
        x = self.conv1(x)
        x = self.bnm1(x)
        x = self.relu(x)
        # 149x149x32

        x = self.conv2(x)
        x = self.bnm2(x)
        x = self.relu(x)
        # 147x147x64

        return x

In [3]:
sample = EntryflowConv(in_channel=3,out_channel=64)
summary(sample,input_size=(3,299,299))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 149, 149]             896
       BatchNorm2d-2         [-1, 32, 149, 149]              64
              ReLU-3         [-1, 32, 149, 149]               0
            Conv2d-4         [-1, 64, 147, 147]          18,496
       BatchNorm2d-5         [-1, 64, 147, 147]             128
              ReLU-6         [-1, 64, 147, 147]               0
Total params: 19,584
Trainable params: 19,584
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 1.02
Forward/backward pass size (MB): 47.91
Params size (MB): 0.07
Estimated Total Size (MB): 49.01
----------------------------------------------------------------


In [4]:
class DepthwiseSeparable(nn.Module):
    """
    Depthwise Separable Convolution is Depthwise Convolution + Pointwise Convolution.
        Depthwise Convolution : Convolution over each channel independently
            Divide input channels into "in_channel" groups and then apply convolution over each
            Group independently : Depth is not used
        Pointwise Convolution : Normal Convolution with kernel Size (1,1)
            Only depth Used.

    In Xception Architecture the Order of operation is different:
        Pointwise Convolution + Depthwise Convolution

    groups : No of groups the input channel should be divided into
             For depthwise convolution = in_channel
    padding = default: "same" (1 for kernel_size = 3)
    """
    def __init__(self,in_channel,out_channel,kernel_size,stride=1,padding=1):
        super(DepthwiseSeparable,self).__init__()

        self.pointwise = nn.Conv2d(in_channels=in_channel,out_channels=out_channel,kernel_size=1)
        self.depthwise = nn.Conv2d(in_channels=out_channel,out_channels=out_channel,kernel_size=kernel_size,stride=stride,padding=padding,groups=out_channel)

    def forward(self,x):
        x = self.pointwise(x)
        x = self.depthwise(x)

        return x

In [5]:
# Padding = 1 ('same') in all such layers
sample = DepthwiseSeparable(in_channel=64,out_channel=128,kernel_size=3)
summary(sample,input_size=(64,147,147))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1        [-1, 128, 147, 147]           8,320
            Conv2d-2        [-1, 128, 147, 147]           1,280
Total params: 9,600
Trainable params: 9,600
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 5.28
Forward/backward pass size (MB): 42.21
Params size (MB): 0.04
Estimated Total Size (MB): 47.52
----------------------------------------------------------------


In [6]:
class EntryflowSeparable(nn.Module):
    """
    This part contains depthwise separable convolutions and is repeated 3 times in original implementation.

        in_channel, out_channel : Different for each repetition
        pool_padding: default :1 , Padding value for max_pool layer
        kernel_size = 3 : For all repetitions
        relu_extra : bool, default : false : Whether or not put a relu layer in the beginning
    """
    def __init__(self,in_channel,out_channel,pool_padding=1,relu_extra=False):
        super(EntryflowSeparable,self).__init__()

        # 1st branch
        self.sepconv1 = DepthwiseSeparable(in_channel=in_channel,out_channel=out_channel,kernel_size=3)
        self.bnm1 = nn.BatchNorm2d(out_channel)
        self.sepconv2 = DepthwiseSeparable(in_channel=out_channel,out_channel=out_channel,kernel_size=3)
        self.bnm2 = nn.BatchNorm2d(out_channel)
        self.maxpool = nn.MaxPool2d(kernel_size=3,stride=2,padding=pool_padding)
        self.relu = nn.ReLU()
        self.relu_extra = relu_extra

        # 2nd branch (left)
        self.conv = nn.Conv2d(in_channels=in_channel,out_channels=out_channel,kernel_size=1,stride=2)
        self.bnmy = nn.BatchNorm2d(out_channel)

    def forward(self,x):
        # 2nd branch
        y = self.conv(x)
        y = self.bnmy(y)

        # 1st branch
        if self.relu_extra:
            x = self.relu(x)
        x = self.sepconv1(x)
        x = self.bnm1(x)
        x = self.relu(x)

        x = self.sepconv2(x)
        x = self.bnm2(x)
        x = self.maxpool(x)

        # Add two branch
        x = x + y
        return x

In [7]:
sample = EntryflowSeparable(in_channel=64,out_channel=128)
summary(sample,input_size=(64,147,147))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 128, 74, 74]           8,320
       BatchNorm2d-2          [-1, 128, 74, 74]             256
            Conv2d-3        [-1, 128, 147, 147]           8,320
            Conv2d-4        [-1, 128, 147, 147]           1,280
DepthwiseSeparable-5        [-1, 128, 147, 147]               0
       BatchNorm2d-6        [-1, 128, 147, 147]             256
              ReLU-7        [-1, 128, 147, 147]               0
            Conv2d-8        [-1, 128, 147, 147]          16,512
            Conv2d-9        [-1, 128, 147, 147]           1,280
DepthwiseSeparable-10        [-1, 128, 147, 147]               0
      BatchNorm2d-11        [-1, 128, 147, 147]             256
        MaxPool2d-12          [-1, 128, 74, 74]               0
Total params: 36,480
Trainable params: 36,480
Non-trainable params: 0
--------------------------------

In [8]:
sample = EntryflowSeparable(in_channel=128,out_channel=256,relu_extra=True)
summary(sample,input_size=(128,74,74))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 256, 37, 37]          33,024
       BatchNorm2d-2          [-1, 256, 37, 37]             512
              ReLU-3          [-1, 128, 74, 74]               0
            Conv2d-4          [-1, 256, 74, 74]          33,024
            Conv2d-5          [-1, 256, 74, 74]           2,560
DepthwiseSeparable-6          [-1, 256, 74, 74]               0
       BatchNorm2d-7          [-1, 256, 74, 74]             512
              ReLU-8          [-1, 256, 74, 74]               0
            Conv2d-9          [-1, 256, 74, 74]          65,792
           Conv2d-10          [-1, 256, 74, 74]           2,560
DepthwiseSeparable-11          [-1, 256, 74, 74]               0
      BatchNorm2d-12          [-1, 256, 74, 74]             512
        MaxPool2d-13          [-1, 256, 37, 37]               0
Total params: 138,496
Trainable params

In [9]:
class EntryFlow(nn.Module):
    """
    Entry Flow Part of Xception :

        EntryflowConv + 3 x EntryflowSeparable
        in_channel = 3
        out_channel = 728
    """
    def __init__(self):
        super(EntryFlow,self).__init__()
        self.conv = EntryflowConv(in_channel=3,out_channel=64)
        self.sep1 = EntryflowSeparable(in_channel=64,out_channel=128)
        self.sep2 = EntryflowSeparable(in_channel=128,out_channel=256,relu_extra=True)
        self.sep3 = EntryflowSeparable(in_channel=256,out_channel=728,relu_extra=True)

    def forward(self,x):
        x = self.conv(x)
        x = self.sep1(x)
        x = self.sep2(x)
        x = self.sep3(x)

        return x

In [10]:
xception_entry = EntryFlow()
summary(xception_entry,input_size=(3,299,299))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 149, 149]             896
       BatchNorm2d-2         [-1, 32, 149, 149]              64
              ReLU-3         [-1, 32, 149, 149]               0
            Conv2d-4         [-1, 64, 147, 147]          18,496
       BatchNorm2d-5         [-1, 64, 147, 147]             128
              ReLU-6         [-1, 64, 147, 147]               0
     EntryflowConv-7         [-1, 64, 147, 147]               0
            Conv2d-8          [-1, 128, 74, 74]           8,320
       BatchNorm2d-9          [-1, 128, 74, 74]             256
           Conv2d-10        [-1, 128, 147, 147]           8,320
           Conv2d-11        [-1, 128, 147, 147]           1,280
DepthwiseSeparable-12        [-1, 128, 147, 147]               0
      BatchNorm2d-13        [-1, 128, 147, 147]             256
             ReLU-14        [-1, 128, 

In [11]:
class MiddleflowSeperable(nn.Module):
    """
    This part contains depthwise separable convolutions and is repeated 3 times in original implementation.

        in_channel, out_channel : Both of them are actually equal!
        kernel_size = 3 : For all repetitions
    """
    def __init__(self,in_channel,out_channel):
        super(MiddleflowSeperable,self).__init__()

        # 1st branch
        self.sep1 = DepthwiseSeparable(in_channel=in_channel,out_channel=out_channel,kernel_size=3)
        self.sep2 = DepthwiseSeparable(in_channel=out_channel,out_channel=out_channel,kernel_size=3)
        self.sep3 = DepthwiseSeparable(in_channel=out_channel,out_channel=out_channel,kernel_size=3)
        self.bnm = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU()

    def forward(self,x):
        # 2nd branch
        y = x

        # 1st branch
        x = self.relu(x)
        x = self.sep1(x)
        x = self.bnm(x)

        x = self.relu(x)
        x = self.sep2(x)
        x = self.bnm(x)

        x = self.relu(x)
        x = self.sep3(x)
        x = self.bnm(x)

        # Add two branch
        x = x + y
        return x

In [12]:
model = MiddleflowSeperable(in_channel=728,out_channel=728)
summary(model=model,input_size=(728,19,19))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
              ReLU-1          [-1, 728, 19, 19]               0
            Conv2d-2          [-1, 728, 19, 19]         530,712
            Conv2d-3          [-1, 728, 19, 19]           7,280
DepthwiseSeparable-4          [-1, 728, 19, 19]               0
       BatchNorm2d-5          [-1, 728, 19, 19]           1,456
              ReLU-6          [-1, 728, 19, 19]               0
            Conv2d-7          [-1, 728, 19, 19]         530,712
            Conv2d-8          [-1, 728, 19, 19]           7,280
DepthwiseSeparable-9          [-1, 728, 19, 19]               0
      BatchNorm2d-10          [-1, 728, 19, 19]           1,456
             ReLU-11          [-1, 728, 19, 19]               0
           Conv2d-12          [-1, 728, 19, 19]         530,712
           Conv2d-13          [-1, 728, 19, 19]           7,280
DepthwiseSeparable-14          [-1, 728

In [13]:
class MiddleFlow(nn.Module):
    """
    This is the Middle Flow part -
        MiddleFlowSeperable is repeated 8 times 
    
    input_size = (728,19,19)
    output_size = (728,19,19)       
    """
    def __init__(self):
        super(MiddleFlow,self).__init__()
        self.sep = MiddleflowSeperable(in_channel=728,out_channel=728)
    def forward(self,x):
        for i in range(8):
            x = self.sep(x)
        return x

In [14]:
sample = MiddleFlow()
summary(model=sample,input_size=(728,19,19))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
              ReLU-1          [-1, 728, 19, 19]               0
            Conv2d-2          [-1, 728, 19, 19]         530,712
            Conv2d-3          [-1, 728, 19, 19]           7,280
DepthwiseSeparable-4          [-1, 728, 19, 19]               0
       BatchNorm2d-5          [-1, 728, 19, 19]           1,456
              ReLU-6          [-1, 728, 19, 19]               0
            Conv2d-7          [-1, 728, 19, 19]         530,712
            Conv2d-8          [-1, 728, 19, 19]           7,280
DepthwiseSeparable-9          [-1, 728, 19, 19]               0
      BatchNorm2d-10          [-1, 728, 19, 19]           1,456
             ReLU-11          [-1, 728, 19, 19]               0
           Conv2d-12          [-1, 728, 19, 19]         530,712
           Conv2d-13          [-1, 728, 19, 19]           7,280
DepthwiseSeparable-14          [-1, 728

In [15]:
class ExitflowSeperable(nn.Module):
    """
    This part contains depthwise separable convolutions and is repeated 2 times in original implementation with max pool layer.

        in_channel, out_channel : Both of them are different
        kernel_size = 3 : For all repetitions
        max pool kernel_size :3 with stride:2
    """
    def __init__(self,in_channel,out_channel,padding=1):
        super(ExitflowSeperable,self).__init__()

        #1st branch
        self.sep1 = DepthwiseSeparable(in_channel=in_channel,out_channel=in_channel,kernel_size=3)
        self.sep2 = DepthwiseSeparable(in_channel=in_channel,out_channel=out_channel,kernel_size=3)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=3,stride=2,padding=padding)
        
        #2nd branch
        self.conv = nn.Conv2d(in_channels=in_channel,out_channels=out_channel,kernel_size=1,stride=2)
        
        self.bnm1 = nn.BatchNorm2d(in_channel)
        self.bnm2 = nn.BatchNorm2d(out_channel)

    def forward(self,x):

        #2nd branch 
        y = self.conv(x)
        y = self.bnm2(y)

        #1st branch
        x = self.relu(x)
        x = self.sep1(x)
        x = self.bnm1(x)
        x = self.relu(x)
        x = self.sep2(x)
        x = self.bnm2(x)
        x = self.pool(x)

        return x+y    

In [16]:
sample = ExitflowSeperable(in_channel=728,out_channel=1024)
summary(model=sample,input_size=(728,19,19))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 1024, 10, 10]         746,496
       BatchNorm2d-2         [-1, 1024, 10, 10]           2,048
              ReLU-3          [-1, 728, 19, 19]               0
            Conv2d-4          [-1, 728, 19, 19]         530,712
            Conv2d-5          [-1, 728, 19, 19]           7,280
DepthwiseSeparable-6          [-1, 728, 19, 19]               0
       BatchNorm2d-7          [-1, 728, 19, 19]           1,456
              ReLU-8          [-1, 728, 19, 19]               0
            Conv2d-9         [-1, 1024, 19, 19]         746,496
           Conv2d-10         [-1, 1024, 19, 19]          10,240
DepthwiseSeparable-11         [-1, 1024, 19, 19]               0
      BatchNorm2d-12         [-1, 1024, 19, 19]           2,048
        MaxPool2d-13         [-1, 1024, 10, 10]               0
Total params: 2,046,776
Trainable para

In [17]:
class ExitFlow(nn.Module):
    """
    This part contains ExitFlowSeperable part with 2 different depthwise seperable convolutions followed by Global Avgerage Pool(Avg Pool of kernel size 10) and connecting with output layer

    input_size  :(728,19,19)
    output_size :(output_layer)
    """
    def __init__(self,in_channel=728,out_channel=1024,first_layer=1536,second_layer=2048,output_layer=1000):
        super(ExitFlow,self).__init__()
        self.block = ExitflowSeperable(in_channel=in_channel,out_channel=out_channel)
        self.sep1 = DepthwiseSeparable(in_channel=1024,out_channel=first_layer,kernel_size=3)
        self.bnm1 = nn.BatchNorm2d(first_layer)
        self.sep2 = DepthwiseSeparable(in_channel=first_layer,out_channel=second_layer,kernel_size=3)
        self.bnm2 = nn.BatchNorm2d(second_layer)
        self.relu = nn.ReLU()
        self.pool = nn.AvgPool2d(kernel_size=10)
        self.flatten = nn.Flatten()
        self.output = nn.Linear(in_features=second_layer,out_features=output_layer)

    def forward(self,x):
        x = self.block(x)
        x = self.sep1(x)
        x = self.bnm1(x)
        x = self.relu(x)
        x = self.sep2(x)
        x = self.bnm2(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.flatten(x)
        x = self.output(x)
        return x

In [18]:
sample = ExitFlow()
summary(model=sample,input_size=(728,19,19))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 1024, 10, 10]         746,496
       BatchNorm2d-2         [-1, 1024, 10, 10]           2,048
              ReLU-3          [-1, 728, 19, 19]               0
            Conv2d-4          [-1, 728, 19, 19]         530,712
            Conv2d-5          [-1, 728, 19, 19]           7,280
DepthwiseSeparable-6          [-1, 728, 19, 19]               0
       BatchNorm2d-7          [-1, 728, 19, 19]           1,456
              ReLU-8          [-1, 728, 19, 19]               0
            Conv2d-9         [-1, 1024, 19, 19]         746,496
           Conv2d-10         [-1, 1024, 19, 19]          10,240
DepthwiseSeparable-11         [-1, 1024, 19, 19]               0
      BatchNorm2d-12         [-1, 1024, 19, 19]           2,048
        MaxPool2d-13         [-1, 1024, 10, 10]               0
ExitflowSeperable-14         [-1, 1024

In [19]:
class Xception(nn.Module):
    """
        Now, this is the final part where we merge all the flow i.e. entry, middle and exit flow to get the Xception Model
    """
    def __init__(self):
        super(Xception,self).__init__()
        self.entry = EntryFlow()
        self.mid = MiddleFlow()
        self.exit = ExitFlow()
    def forward(self,x):
        x = self.entry(x)
        x = self.mid(x)
        x = self.exit(x)
        return x

In [20]:
xception = Xception()
summary(model=xception,input_size=(3,299,299))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 149, 149]             896
       BatchNorm2d-2         [-1, 32, 149, 149]              64
              ReLU-3         [-1, 32, 149, 149]               0
            Conv2d-4         [-1, 64, 147, 147]          18,496
       BatchNorm2d-5         [-1, 64, 147, 147]             128
              ReLU-6         [-1, 64, 147, 147]               0
     EntryflowConv-7         [-1, 64, 147, 147]               0
            Conv2d-8          [-1, 128, 74, 74]           8,320
       BatchNorm2d-9          [-1, 128, 74, 74]             256
           Conv2d-10        [-1, 128, 147, 147]           8,320
           Conv2d-11        [-1, 128, 147, 147]           1,280
DepthwiseSeparable-12        [-1, 128, 147, 147]               0
      BatchNorm2d-13        [-1, 128, 147, 147]             256
             ReLU-14        [-1, 128, 