## We Will Train Two Neural Networks; One a Seperable Depthwise Convolution and the Other an Ordinary Convolution

This code is adapted from an official [PyTorch tutorial](https://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html).

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [3]:
class Depth_sc(nn.Module):
    def __init__(self,nin,nout,k):
        super(Depth_sc, self).__init__()
        self.depth = nn.Conv2d(nin, nin*8, (1,k),groups=nin)
        self.point = nn.Conv2d(nin*8, nout, (k,1))
        self.act = nn.ReLU6()

    def forward(self, x):
        x = self.act(self.point(self.depth(x)))
        return x
    
class OrdConv(nn.Module):
    def __init__(self,nin,nout,k):
        super(OrdConv, self).__init__()
        self.layer = nn.Conv2d(nin, nout, k)
        self.act = nn.ReLU6()

    def forward(self, x):
        x = self.act(self.layer(x))
        return x
    
    
class Net_SC(nn.Module):
    def __init__(self):
        super(Net_SC,self).__init__()
        self.l1 = OrdConv(3,16,3)
        self.l2 = OrdConv(16,32,5)
        self.pool =  nn.MaxPool2d(2,2)
        self.bn = nn.BatchNorm2d(32)
        self.out = nn.Linear(800,10)
        
    def forward(self,x):
        x = self.pool(self.l1(x))
        x = self.pool(self.l2(x))
        x = self.bn(x)
        x = x.view(4,-1)
        x = self.out(x)
        return x
    
class Net_Ord(nn.Module):
    def __init__(self):
        super(Net_Ord,self).__init__()
        self.l1 = Depth_sc(3,16,3)
        self.l2 = Depth_sc(16,32,5)
        self.pool =  nn.MaxPool2d(2,2)
        self.bn = nn.BatchNorm2d(32)
        self.out = nn.Linear(800,10)
        
    def forward(self,x):
        x = self.pool(self.l1(x))
        x = self.pool(self.l2(x))
        x = self.bn(x)
        x = x.view(4,-1)
        x = self.out(x)
        return x

### Depthwise Seperable Test

In [4]:
net = Net_SC()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [5]:
for epoch in range(3):

    running_loss = 0
    for i, data in enumerate(trainloader, 0):
        optimizer.zero_grad()
        inputs, labels = data

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 2000 == 1999:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0

[1,  2000] loss: 1.837
[1,  4000] loss: 1.573
[1,  6000] loss: 1.444
[1,  8000] loss: 1.377
[1, 10000] loss: 1.366
[1, 12000] loss: 1.318
[2,  2000] loss: 1.280
[2,  4000] loss: 1.251
[2,  6000] loss: 1.229
[2,  8000] loss: 1.220
[2, 10000] loss: 1.209
[2, 12000] loss: 1.179
[3,  2000] loss: 1.131
[3,  4000] loss: 1.136
[3,  6000] loss: 1.152
[3,  8000] loss: 1.151
[3, 10000] loss: 1.106
[3, 12000] loss: 1.129


### Ordinary Test

In [6]:
net = Net_Ord()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [7]:
for epoch in range(3):

    running_loss = 0
    for i, data in enumerate(trainloader, 0):
        optimizer.zero_grad()
        inputs, labels = data

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 2000 == 1999:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0

[1,  2000] loss: 1.836
[1,  4000] loss: 1.601
[1,  6000] loss: 1.493
[1,  8000] loss: 1.426
[1, 10000] loss: 1.413
[1, 12000] loss: 1.339
[2,  2000] loss: 1.279
[2,  4000] loss: 1.271
[2,  6000] loss: 1.264
[2,  8000] loss: 1.200
[2, 10000] loss: 1.204
[2, 12000] loss: 1.193
[3,  2000] loss: 1.143
[3,  4000] loss: 1.142
[3,  6000] loss: 1.127
[3,  8000] loss: 1.121
[3, 10000] loss: 1.135
[3, 12000] loss: 1.126


According to the Jupyter extension ExecuteTime, the network using seperable convolutions takes about 54 seconds per epoch, while the network using an ordinary convolution takes about 93 seconds per epoch.