## Code for implementing scale invariant CNNs.

Here are the results of scale invariance CNNs applied on MNIST. The train images are taken from MNIST directly where as test images are scaled by a factor sampled uniformly from [0.5,2].

```
RESULTS OF MULTISCALE CNN (bilinear)
Train accuracy of the model: 99.160 %
tensor(59497, device='cuda:0') 60000
Test accuracy of the model: 82.702 %
tensor(8271, device='cuda:0') 10000
```

Here are the results of scale invariance CNNs applied on MNIST. The train images are taken from MNIST directly where as test images are scaled by a factor sampled uniformly from [0.5,2.5].

```
RESULTS OF MULTISCALE CNN (bilinear)
Train accuracy of the model: 99.210 %
tensor(59527, device='cuda:0') 60000
Test accuracy of the model: 72.533 %
tensor(7254, device='cuda:0') 10000
```




### Importing required libraries and setting things up

In [10]:
import torch.nn as nn
import torch.nn.functional as F
import torch

!pip install torchviz



### Rewriting Conv2d to implement the scale invariant convolutions


#### Loading the base class

In [31]:
from torch.nn.modules.utils import _single, _pair, _triple
from torch.nn.modules.conv import *

def _reverse_repeat_tuple(t, n):
    """Reverse the order of `t` and repeat each element for `n` times.
    This can be used to translate padding arg used by Conv and Pooling modules
    to the ones used by `F.pad`.
    """
    return tuple(x for x in reversed(t) for _ in range(n))

class _ConvNd(Module):

    __constants__ = ['stride', 'padding', 'dilation', 'groups',
                     'padding_mode', 'output_padding', 'in_channels',
                     'out_channels', 'kernel_size']
    __annotations__ = {'bias': Optional[torch.Tensor]}

    def __init__(self, in_channels, out_channels, kernel_size, stride,
                 padding, dilation, transposed, output_padding,
                 groups, bias, padding_mode):
        super(_ConvNd, self).__init__()
        if in_channels % groups != 0:
            raise ValueError('in_channels must be divisible by groups')
        if out_channels % groups != 0:
            raise ValueError('out_channels must be divisible by groups')
        valid_padding_modes = {'zeros', 'reflect', 'replicate', 'circular'}
        if padding_mode not in valid_padding_modes:
            raise ValueError("padding_mode must be one of {}, but got padding_mode='{}'".format(
                valid_padding_modes, padding_mode))
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.transposed = transposed
        self.output_padding = output_padding
        self.groups = groups
        self.padding_mode = padding_mode
        # `_reversed_padding_repeated_twice` is the padding to be passed to
        # `F.pad` if needed (e.g., for non-zero padding types that are
        # implemented as two ops: padding + conv). `F.pad` accepts paddings in
        # reverse order than the dimension.
        self._reversed_padding_repeated_twice = _reverse_repeat_tuple(self.padding, 2)
        if transposed:
            self.weight = Parameter(torch.Tensor(
                in_channels, out_channels // groups, *kernel_size))
        else:
            self.weight = Parameter(torch.Tensor(
                out_channels, in_channels // groups, *kernel_size))
        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        init.normal_(self.weight)
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)

    def extra_repr(self):
        s = ('{in_channels}, {out_channels}, kernel_size={kernel_size}'
             ', stride={stride}')
        if self.padding != (0,) * len(self.padding):
            s += ', padding={padding}'
        if self.dilation != (1,) * len(self.dilation):
            s += ', dilation={dilation}'
        if self.output_padding != (0,) * len(self.output_padding):
            s += ', output_padding={output_padding}'
        if self.groups != 1:
            s += ', groups={groups}'
        if self.bias is None:
            s += ', bias=False'
        if self.padding_mode != 'zeros':
            s += ', padding_mode={padding_mode}'
        return s.format(**self.__dict__)

    def __setstate__(self, state):
        super(_ConvNd, self).__setstate__(state)
        if not hasattr(self, 'padding_mode'):
            self.padding_mode = 'zeros'

#### Writing out new convolutional filter. Same number of parameters but convolutions at multiple scales.

In [32]:
#
import random


class Conv2dMultiScale(_ConvNd):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
                 padding=0, dilation=1, groups=1,
                 bias=True, padding_mode='zeros', levels=2, pooling_mode='max'):
        cur_size = kernel_size
        kernel_size = _pair(kernel_size)
        stride = _pair(stride)
        padding = _pair(padding)
        dilation = _pair(dilation)
        super(Conv2dMultiScale, self).__init__(
            in_channels, out_channels, kernel_size, stride, padding, dilation,
            False, _pair(0), groups, bias, padding_mode)
        # self.scale = nn.UpsamplingBilinear2d(size=(8,8))
        self.scale = []
        for i in range(levels - 1):
          self.scale.append(nn.UpsamplingBilinear2d(size=(cur_size + 2, cur_size + 2)))
        # self.scale = nn.functional.interpolate(size=(5,5), mode='bicubic')
        self.pooling_mode = pooling_mode

    def _conv_forward(self, input, weight_para):

        # Typically this is the only thing that done
        out1 = F.conv2d(input, weight_para, self.bias, self.stride,
                        self.padding, self.dilation, self.groups)
        # print(type(weight_para))
        weight = weight_para[:]
        # print(type(weight))
        for scale in self.scale:
          # Upscaling the weights
          weight = scale(weight)
          # Adjusting padding to keep same output side
          padding = tuple(x+1 for x in self.padding)
          # Running convolution on the bigger scale
          out2 = F.conv2d(input, weight, self.bias, self.stride,
                        padding, self.dilation, self.groups)
          #crop the output feature map
          out2 = F.interpolate(out2, size=out1.shape[2:], mode='bilinear', align_corners=True)
          # print(type(out1))
          # print(type(out2))
          if (self.pooling_mode == 'avg'):
            out1 = out1 + out2
          elif (self.pooling_mode == 'max'):
            out1 = torch.max(out1, out2)
            
        return out1
    def forward(self, input):
        return self._conv_forward(input, self.weight)

In [13]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
# Understanding sizes of CNNs, ensuring it works
kernelSize = 3
m = Conv2dMultiScale(1, 32, kernelSize, stride=1, padding=0, levels=3, pooling_mode='max')
input = torch.randn(20, 1, 32, 32)
print(input.shape)
output = m(input)
print(output.shape)
# output = F.max_pool2d(output, 2)
# print(output.shape)
# m2 = Conv2dMultiScale(32, 64, kernelSize, stride=1, padding=0, levels=3)
# output = m2(output)
# print(output.shape)
# output = F.max_pool2d(output, 2)
# print(output.shape)

torch.Size([20, 1, 32, 32])
torch.Size([20, 32, 30, 30])


###MNIST Test

In [15]:
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import random

batch_size=64

train_transform = transforms.Compose([
    transforms.ToTensor()
])

test_transform = transforms.Compose([
    transforms.RandomAffine(degrees=0, scale=(1,2)),
    transforms.ToTensor(),
])

train_data = dsets. CIFAR10(root = './data', train = True,
                        transform = train_transform, download = True)

test_data = dsets.CIFAR10(root = './data', train = False,
                       transform = test_transform)

train_gen = torch.utils.data.DataLoader(dataset = train_data,
                                             batch_size = batch_size,
                                             shuffle = True)

test_gen = torch.utils.data.DataLoader(dataset = test_data,
                                      batch_size = batch_size, 
                                      shuffle = False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data


###Train and Test Function

In [16]:
def train(net, lr=0.001, num_epochs=10, batch_size=64):  
  if torch.cuda.is_available():
    net.cuda()
  loss_function = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam( net.parameters(), lr=lr)

  for epoch in range(num_epochs):
    for i ,(images,labels) in enumerate(train_gen):
      if torch.cuda.is_available():
        images = images.cuda()
        labels = labels.cuda()
      
      optimizer.zero_grad()
      outputs = net(images)
      loss = loss_function(outputs, labels)
      loss.backward()
      optimizer.step()
      
      if (i+1) % 100 == 0:
        print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                  %(epoch+1, num_epochs, i+1, len(train_data)//batch_size, loss.item()))
        
def test(net):  
  if(net.multiScale):
    print('RESULTS OF MULTISCALE CNN')
  else:
    print('RESULTS OF STANDARD CNN')

  correct = 0
  total = 0
  # loss_function = nn.CrossEntropyLoss()
  for images,labels in train_gen:
    if torch.cuda.is_available():
      images = images.cuda()
      labels = labels.cuda()
    
    output = net(images)
    # loss = loss_function(outputs, labels)
    _, predicted = torch.max(output,1)
    correct += (predicted == labels).sum()
    total += labels.size(0)
  train_acc = (100*correct.cpu().numpy())/(total+1)
  print('Train accuracy of the model: %.3f %%' %(train_acc))
  print(correct, total)

  correct = 0
  total = 0
  for images,labels in test_gen:
    if torch.cuda.is_available():
      images = images.cuda()
      labels = labels.cuda()
    
    output = net(images)
    # loss = loss_function(outputs, labels)
    _, predicted = torch.max(output,1)
    correct += (predicted == labels).sum()
    total += labels.size(0)
  test_acc = (100*correct.cpu().numpy())/(total+1)
  print('Test accuracy of the model: %.3f %%' %(test_acc))
  print(correct, total)

###Define the Model

In [41]:
class Net(nn.Module):
    def __init__(self, multiScale=True, cat = True, level_1=3, level_2=2):
        super(Net, self).__init__()
        self.multiScale = multiScale
        self.cat = cat
        self.level_1 = level_1
        self.level_2 = level_2
        if(multiScale):
          if (not cat):
            self.conv1 = Conv2dMultiScale(3, 64, 3, 1, pooling_mode='avg',levels=level_1)
            self.conv2 = Conv2dMultiScale(64, 64, 3, 1, pooling_mode='avg',levels=level_2)
            self.fc = nn.Linear(64 * 6 * 6, 384)
          else:
            self.conv1 = Conv2dMultiScale(3, 64, 3, 1, pooling_mode='cat',levels=level_1)
            self.conv2 = Conv2dMultiScale(64, 64, 3, 1, pooling_mode='cat',levels=level_2)
            self.fc = nn.Linear(64 * 6 * 6, 384)
        else:
          self.conv1 = nn.Conv2d(3, 64, 5, 1)
          self.conv2 = nn.Conv2d(64, 64, 5, 1)
          self.fc = nn.Linear(64 * 5 * 5, 384)
        self.fc1 = nn.Linear(384, 192)
        self.fc2 = nn.Linear(192, 10)

    def forward(self, x):
        if (not self.cat or not self.multiScale):
          #pooling operation:
          x = self.conv1(x)        
          x = F.relu(x)    
          x = F.max_pool2d(x, 2)  
          x = self.conv2(x)       
          x = F.relu(x)      
          x = F.max_pool2d(x, 2)     
          x = torch.flatten(x, 1)       
          x = self.fc(x)
          x = F.relu(x)      
          x = self.fc1(x)
          x = F.relu(x)
          x = self.fc2(x)
          output = F.log_softmax(x, dim=1)
        else:
          pass
        return output


net = Net(multiScale=True, cat=False)

In [None]:
# net(torch.randn(20, 1, 28, 28))

###Train, Test and Results

In [42]:
train(net, lr=0.001, num_epochs=10)
# train(net, lr=0.003, num_epochs=5)
train(net, lr=0.0001, num_epochs=20)

Epoch [1/10], Step [100/781], Loss: 5.7623
Epoch [1/10], Step [200/781], Loss: 7.1643
Epoch [1/10], Step [300/781], Loss: 2.5556
Epoch [1/10], Step [400/781], Loss: 1.7007
Epoch [1/10], Step [500/781], Loss: 2.0281
Epoch [1/10], Step [600/781], Loss: 1.6484
Epoch [1/10], Step [700/781], Loss: 1.4830
Epoch [2/10], Step [100/781], Loss: 1.6040
Epoch [2/10], Step [200/781], Loss: 2.0097
Epoch [2/10], Step [300/781], Loss: 1.3310
Epoch [2/10], Step [400/781], Loss: 1.5470
Epoch [2/10], Step [500/781], Loss: 1.4550
Epoch [2/10], Step [600/781], Loss: 1.4153
Epoch [2/10], Step [700/781], Loss: 1.4469
Epoch [3/10], Step [100/781], Loss: 1.1666
Epoch [3/10], Step [200/781], Loss: 1.2905
Epoch [3/10], Step [300/781], Loss: 1.3657
Epoch [3/10], Step [400/781], Loss: 1.3757
Epoch [3/10], Step [500/781], Loss: 1.2555
Epoch [3/10], Step [600/781], Loss: 1.3657
Epoch [3/10], Step [700/781], Loss: 1.5478
Epoch [4/10], Step [100/781], Loss: 1.3361
Epoch [4/10], Step [200/781], Loss: 1.2477
Epoch [4/10

In [40]:
test(net)

RESULTS OF MULTISCALE CNN
Train accuracy of the model: 95.422 %
tensor(47712, device='cuda:0') 50000
Test accuracy of the model: 46.955 %
tensor(4696, device='cuda:0') 10000
