In [1]:
import torch.nn as nn
from torchsummary import summary

In [2]:
class EntryflowConv(nn.Module):
    """
    First Part in Entry Flow having only Convolution layers.
    In Xception:
        in_channel = 3
        out_channel = 64
    """
    def __init__(self,in_channel,out_channel):
        super(EntryflowConv,self).__init__()

        self.conv1 = nn.Conv2d(in_channels=in_channel,out_channels=32,kernel_size=3,stride=2)
        self.conv2 = nn.Conv2d(in_channels=32,out_channels=out_channel,kernel_size=3)

        self.relu = nn.ReLU()

    def forward(self,x):
        # 299x299x3
        x = self.conv1(x)
        x = self.relu(x)
        # 149x149x32

        x = self.conv2(x)
        x = self.relu(x)
        # 147x147x64

        return x

In [3]:
sample = EntryflowConv(in_channel=3,out_channel=64)
summary(sample,input_size=(3,299,299))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 149, 149]             896
              ReLU-2         [-1, 32, 149, 149]               0
            Conv2d-3         [-1, 64, 147, 147]          18,496
              ReLU-4         [-1, 64, 147, 147]               0
Total params: 19,392
Trainable params: 19,392
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 1.02
Forward/backward pass size (MB): 31.94
Params size (MB): 0.07
Estimated Total Size (MB): 33.04
----------------------------------------------------------------


In [4]:
class DepthwiseSeparable(nn.Module):
    """
    Depthwise Separable Convolution is Depthwise Convolution + Pointwise Convolution.
        Depthwise Convolution : Convolution over each channel independently
            Divide input channels into "in_channel" groups and then apply convolution over each
            Group independently : Depth is not used
        Pointwise Convolution : Normal Convolution with kernel Size (1,1)
            Only depth Used.

    In Xception Architecture the Order of operation is different:
        Pointwise Convolution + Depthwise Convolution

    groups : No of groups the input channel should be divided into
             For depthwise convolution = in_channel
    padding = default: "same" (1 for kernel_size = 3)
    """
    def __init__(self,in_channel,out_channel,kernel_size,stride=1,padding=1):
        super(DepthwiseSeparable,self).__init__()

        self.pointwise = nn.Conv2d(in_channels=in_channel,out_channels=out_channel,kernel_size=1)
        self.depthwise = nn.Conv2d(in_channels=out_channel,out_channels=out_channel,kernel_size=kernel_size,stride=stride,padding=padding,groups=out_channel)

    def forward(self,x):
        x = self.pointwise(x)
        x = self.depthwise(x)

        return x

In [5]:
# Padding = 1 ('same') in all such layers
sample = DepthwiseSeparable(in_channel=64,out_channel=128,kernel_size=3)
summary(sample,input_size=(64,147,147))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1        [-1, 128, 147, 147]           8,320
            Conv2d-2        [-1, 128, 147, 147]           1,280
Total params: 9,600
Trainable params: 9,600
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 5.28
Forward/backward pass size (MB): 42.21
Params size (MB): 0.04
Estimated Total Size (MB): 47.52
----------------------------------------------------------------


In [6]:
class EntryflowSeparable(nn.Module):
    """
    This part contains depthwise separable convolutions and is repeated 3 times in original implementation.

        in_channel, out_channel : Different for each repetition
        pool_padding: default :1 , Padding value for max_pool layer
        kernel_size = 3 : For all repetitions
        relu_extra : bool, default : false : Whether or not put a relu layer in the beginning
    """
    def __init__(self,in_channel,out_channel,pool_padding=1,relu_extra=False):
        super(EntryflowSeparable,self).__init__()

        # 1st branch
        self.sepconv1 = DepthwiseSeparable(in_channel=in_channel,out_channel=out_channel,kernel_size=3)
        self.sepconv2 = DepthwiseSeparable(in_channel=out_channel,out_channel=out_channel,kernel_size=3)
        self.maxpool = nn.MaxPool2d(kernel_size=3,stride=2,padding=pool_padding)
        self.relu = nn.ReLU()
        self.relu_extra = relu_extra

        # 2nd branch (left)
        self.conv = nn.Conv2d(in_channels=in_channel,out_channels=out_channel,kernel_size=1,stride=2)

    def forward(self,x):
        # 2nd branch
        y = self.conv(x)

        # 1st branch
        if self.relu_extra:
            x = self.relu(x)
        x = self.sepconv1(x)
        x = self.relu(x)

        x = self.sepconv2(x)

        x = self.maxpool(x)

        # Add two branch
        x = x + y
        return x

In [7]:
sample = EntryflowSeparable(in_channel=64,out_channel=128)
summary(sample,input_size=(64,147,147))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 128, 74, 74]           8,320
            Conv2d-2        [-1, 128, 147, 147]           8,320
            Conv2d-3        [-1, 128, 147, 147]           1,280
DepthwiseSeparable-4        [-1, 128, 147, 147]               0
              ReLU-5        [-1, 128, 147, 147]               0
            Conv2d-6        [-1, 128, 147, 147]          16,512
            Conv2d-7        [-1, 128, 147, 147]           1,280
DepthwiseSeparable-8        [-1, 128, 147, 147]               0
         MaxPool2d-9          [-1, 128, 74, 74]               0
Total params: 35,712
Trainable params: 35,712
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 5.28
Forward/backward pass size (MB): 158.41
Params size (MB): 0.14
Estimated Total Size (MB): 163.82
-----------------------------------------

In [8]:
sample = EntryflowSeparable(in_channel=128,out_channel=256,relu_extra=True)
summary(sample,input_size=(128,74,74))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 256, 37, 37]          33,024
              ReLU-2          [-1, 128, 74, 74]               0
            Conv2d-3          [-1, 256, 74, 74]          33,024
            Conv2d-4          [-1, 256, 74, 74]           2,560
DepthwiseSeparable-5          [-1, 256, 74, 74]               0
              ReLU-6          [-1, 256, 74, 74]               0
            Conv2d-7          [-1, 256, 74, 74]          65,792
            Conv2d-8          [-1, 256, 74, 74]           2,560
DepthwiseSeparable-9          [-1, 256, 74, 74]               0
        MaxPool2d-10          [-1, 256, 37, 37]               0
Total params: 136,960
Trainable params: 136,960
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 2.67
Forward/backward pass size (MB): 85.56
Params size (MB): 0.52
Estimated 

In [9]:
class EntryFlow(nn.Module):
    """
    Entry Flow Part of Xception :

        EntryflowConv + 3 x EntryflowSeparable
        in_channel = 3
        out_channel = 728
    """
    def __init__(self):
        super(EntryFlow,self).__init__()
        self.conv = EntryflowConv(in_channel=3,out_channel=64)
        self.sep1 = EntryflowSeparable(in_channel=64,out_channel=128)
        self.sep2 = EntryflowSeparable(in_channel=128,out_channel=256,relu_extra=True)
        self.sep3 = EntryflowSeparable(in_channel=256,out_channel=728,relu_extra=True)

    def forward(self,x):
        x = self.conv(x)
        x = self.sep1(x)
        x = self.sep2(x)
        x = self.sep3(x)

        return x

In [10]:
xception_entry = EntryFlow()
summary(xception_entry,input_size=(3,299,299))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 149, 149]             896
              ReLU-2         [-1, 32, 149, 149]               0
            Conv2d-3         [-1, 64, 147, 147]          18,496
              ReLU-4         [-1, 64, 147, 147]               0
     EntryflowConv-5         [-1, 64, 147, 147]               0
            Conv2d-6          [-1, 128, 74, 74]           8,320
            Conv2d-7        [-1, 128, 147, 147]           8,320
            Conv2d-8        [-1, 128, 147, 147]           1,280
DepthwiseSeparable-9        [-1, 128, 147, 147]               0
             ReLU-10        [-1, 128, 147, 147]               0
           Conv2d-11        [-1, 128, 147, 147]          16,512
           Conv2d-12        [-1, 128, 147, 147]           1,280
DepthwiseSeparable-13        [-1, 128, 147, 147]               0
        MaxPool2d-14          [-1, 128

In [11]:
class MiddleflowSeperable(nn.Module):
    """
    This part contains depthwise separable convolutions and is repeated 3 times in original implementation.

        in_channel, out_channel : Both of them are actually equal!
        kernel_size = 3 : For all repetitions
    """
    def __init__(self,in_channel,out_channel):
        super(MiddleflowSeperable,self).__init__()

        # 1st branch
        self.sepconv1 = DepthwiseSeparable(in_channel=in_channel,out_channel=out_channel,kernel_size=3)
        self.sepconv2 = DepthwiseSeparable(in_channel=out_channel,out_channel=out_channel,kernel_size=3)
        self.sepconv3 = DepthwiseSeparable(in_channel=out_channel,out_channel=out_channel,kernel_size=3)
        self.relu = nn.ReLU()

        # 2nd branch
        self.conv = nn.Conv2d(in_channels=in_channel,out_channels=out_channel,kernel_size=1,stride=1)

    def forward(self,x):
        # 2nd branch
        y = self.conv(x)

        # 1st branch
        x = self.relu(x)
        x = self.sepconv1(x)

        x = self.relu(x)
        x = self.sepconv2(x)

        x = self.relu(x)
        x = self.sepconv3(x)

        # Add two branch
        x = x + y
        return x

In [12]:
model = MiddleflowSeperable(in_channel=728,out_channel=728)
summary(model=model,input_size=(728,19,19))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 728, 19, 19]         530,712
              ReLU-2          [-1, 728, 19, 19]               0
            Conv2d-3          [-1, 728, 19, 19]         530,712
            Conv2d-4          [-1, 728, 19, 19]           7,280
DepthwiseSeparable-5          [-1, 728, 19, 19]               0
              ReLU-6          [-1, 728, 19, 19]               0
            Conv2d-7          [-1, 728, 19, 19]         530,712
            Conv2d-8          [-1, 728, 19, 19]           7,280
DepthwiseSeparable-9          [-1, 728, 19, 19]               0
             ReLU-10          [-1, 728, 19, 19]               0
           Conv2d-11          [-1, 728, 19, 19]         530,712
           Conv2d-12          [-1, 728, 19, 19]           7,280
DepthwiseSeparable-13          [-1, 728, 19, 19]               0
Total params: 2,144,688
Trainable para

In [13]:
class MiddleFlow(nn.Module):
    """
    This is the Middle Flow part -
        MiddleFlowSeperable is repeated 8 times 
    
    input_size = (19,19,728)
    output_size = (19,19,728)       
    """
    def __init__(self):
        super(MiddleFlow,self).__init__()
        self.sep = MiddleflowSeperable(in_channel=728,out_channel=728)
    def forward(self,x):
        for i in range(0,8):
            x = self.sep(x)
        return x

In [14]:
sample = MiddleFlow()
summary(model=sample,input_size=(728,19,19))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 728, 19, 19]         530,712
              ReLU-2          [-1, 728, 19, 19]               0
            Conv2d-3          [-1, 728, 19, 19]         530,712
            Conv2d-4          [-1, 728, 19, 19]           7,280
DepthwiseSeparable-5          [-1, 728, 19, 19]               0
              ReLU-6          [-1, 728, 19, 19]               0
            Conv2d-7          [-1, 728, 19, 19]         530,712
            Conv2d-8          [-1, 728, 19, 19]           7,280
DepthwiseSeparable-9          [-1, 728, 19, 19]               0
             ReLU-10          [-1, 728, 19, 19]               0
           Conv2d-11          [-1, 728, 19, 19]         530,712
           Conv2d-12          [-1, 728, 19, 19]           7,280
DepthwiseSeparable-13          [-1, 728, 19, 19]               0
MiddleflowSeperable-14          [-1, 7

In [15]:
class ExitflowSeperable(nn.Module):
    """
    This part contains depthwise separable convolutions and is repeated 2 times in original implementation with max pool layer.

        in_channel, out_channel : Both of them are different
        kernel_size = 3 : For all repetitions
        max pool kernel_size :3 with stride:2
    """
    def __init__(self,in_channel,out_channel,padding=1):
        super(ExitflowSeperable,self).__init__()

        #1st branch
        self.sep1 = DepthwiseSeparable(in_channel=in_channel,out_channel=in_channel,kernel_size=3)
        self.sep2 = DepthwiseSeparable(in_channel=in_channel,out_channel=out_channel,kernel_size=3)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=3,stride=2,padding=padding)
        
        #2nd branch
        self.conv = nn.Conv2d(in_channels=in_channel,out_channels=out_channel,kernel_size=1,stride=2)

    def forward(self,x):

        #2nd branch 
        y = self.conv(x)

        #1st branch
        x = self.relu(x)
        x = self.sep1(x)
        x = self.relu(x)
        x = self.sep2(x)
        x = self.pool(x)

        return x+y    

In [16]:
sample = ExitflowSeperable(in_channel=728,out_channel=1024)
summary(model=sample,input_size=(728,19,19))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 1024, 10, 10]         746,496
              ReLU-2          [-1, 728, 19, 19]               0
            Conv2d-3          [-1, 728, 19, 19]         530,712
            Conv2d-4          [-1, 728, 19, 19]           7,280
DepthwiseSeparable-5          [-1, 728, 19, 19]               0
              ReLU-6          [-1, 728, 19, 19]               0
            Conv2d-7         [-1, 1024, 19, 19]         746,496
            Conv2d-8         [-1, 1024, 19, 19]          10,240
DepthwiseSeparable-9         [-1, 1024, 19, 19]               0
        MaxPool2d-10         [-1, 1024, 10, 10]               0
Total params: 2,041,224
Trainable params: 2,041,224
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 1.00
Forward/backward pass size (MB): 20.05
Params size (MB): 7.79
Estima

In [17]:
class ExitFlow(nn.Module):
    """
    This part contains ExitFlowSeperable part with 2 different depthwise seperable convolutions followed by Global Avgerage Pool(Avg Pool of kernel size 10) and connecting with output layer

    input_size  :(19,19,728)
    output_size :(output_layer)
    """
    def __init__(self,in_channel=728,out_channel=1024,first_layer=1536,second_layer=2048,output_layer=1000):
        super(ExitFlow,self).__init__()
        self.block = ExitflowSeperable(in_channel=in_channel,out_channel=out_channel)
        self.sep1 = DepthwiseSeparable(in_channel=1024,out_channel=first_layer,kernel_size=3)
        self.sep2 = DepthwiseSeparable(in_channel=first_layer,out_channel=second_layer,kernel_size=3)
        self.relu = nn.ReLU()
        self.pool = nn.AvgPool2d(kernel_size=10)
        self.flatten = nn.Flatten()
        self.output = nn.Linear(in_features=second_layer,out_features=output_layer)

    def forward(self,x):
        x = self.block(x)
        x = self.sep1(x)
        x = self.relu(x)
        x = self.sep2(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.flatten(x)
        x = self.output(x)
        return x

In [18]:
sample = ExitFlow()
summary(model=sample,input_size=(728,19,19))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 1024, 10, 10]         746,496
              ReLU-2          [-1, 728, 19, 19]               0
            Conv2d-3          [-1, 728, 19, 19]         530,712
            Conv2d-4          [-1, 728, 19, 19]           7,280
DepthwiseSeparable-5          [-1, 728, 19, 19]               0
              ReLU-6          [-1, 728, 19, 19]               0
            Conv2d-7         [-1, 1024, 19, 19]         746,496
            Conv2d-8         [-1, 1024, 19, 19]          10,240
DepthwiseSeparable-9         [-1, 1024, 19, 19]               0
        MaxPool2d-10         [-1, 1024, 10, 10]               0
ExitflowSeperable-11         [-1, 1024, 10, 10]               0
           Conv2d-12         [-1, 1536, 10, 10]       1,574,400
           Conv2d-13         [-1, 1536, 10, 10]          15,360
DepthwiseSeparable-14         [-1, 1536

In [19]:
class Xception(nn.Module):
    """
        Now, this is the final part where we merge all the flow i.e. entry, middle and exit flow to get the Xception Model
    """
    def __init__(self):
        super(Xception,self).__init__()
        self.entry = EntryFlow()
        self.mid = MiddleFlow()
        self.exit = ExitFlow()
    def forward(self,x):
        x = self.entry(x)
        x = self.mid(x)
        x = self.exit(x)
        return x

In [20]:
xception = Xception()
summary(model=xception,input_size=(3,299,299))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 149, 149]             896
              ReLU-2         [-1, 32, 149, 149]               0
            Conv2d-3         [-1, 64, 147, 147]          18,496
              ReLU-4         [-1, 64, 147, 147]               0
     EntryflowConv-5         [-1, 64, 147, 147]               0
            Conv2d-6          [-1, 128, 74, 74]           8,320
            Conv2d-7        [-1, 128, 147, 147]           8,320
            Conv2d-8        [-1, 128, 147, 147]           1,280
DepthwiseSeparable-9        [-1, 128, 147, 147]               0
             ReLU-10        [-1, 128, 147, 147]               0
           Conv2d-11        [-1, 128, 147, 147]          16,512
           Conv2d-12        [-1, 128, 147, 147]           1,280
DepthwiseSeparable-13        [-1, 128, 147, 147]               0
        MaxPool2d-14          [-1, 128