## Code for implementing scale invariant CNNs.

Here are the results of scale invariance CNNs applied on MNIST. The train images are taken from MNIST directly where as test images are scaled by a factor sampled uniformly from [0.5,2].

```
RESULTS OF MULTISCALE CNN (bilinear)
Train accuracy of the model: 99.160 %
tensor(59497, device='cuda:0') 60000
Test accuracy of the model: 82.702 %
tensor(8271, device='cuda:0') 10000
```

```
RESULTS OF MULTISCALE CNN (bicubic)
Train accuracy of the model: 99.160 %
tensor(59497, device='cuda:0') 60000
Test accuracy of the model: 82.702 %
tensor(8271, device='cuda:0') 10000
```

```
RESULTS OF STANDARD CNN
Train accuracy of the model: 98.982 %
tensor(59390, device='cuda:0') 60000
Test accuracy of the model: 77.052 %
tensor(7706, device='cuda:0') 10000
```


Here are the results of scale invariance CNNs applied on MNIST. The train images are taken from MNIST directly where as test images are scaled by a factor sampled uniformly from [0.5,2.5].

```
RESULTS OF MULTISCALE CNN (bilinear)
Train accuracy of the model: 99.210 %
tensor(59527, device='cuda:0') 60000
Test accuracy of the model: 72.533 %
tensor(7254, device='cuda:0') 10000
```

```
RESULTS OF MULTISCALE CNN (bicubic)
Train accuracy of the model: 99.160 %
tensor(59497, device='cuda:0') 60000
Test accuracy of the model: 82.702 %
tensor(8271, device='cuda:0') 10000
```

```
RESULTS OF STANDARD CNN
Train accuracy of the model: 99.090 %
tensor(59455, device='cuda:0') 60000
Test accuracy of the model: 66.873 %
tensor(6688, device='cuda:0') 10000
```




### Importing required libraries and setting things up

In [188]:
import torch.nn as nn
import torch.nn.functional as F
import torch

!pip install torchviz



### Rewriting Conv2d to implement the scale invariant convolutions


#### Loading the base class

In [189]:
from torch.nn.modules.utils import _single, _pair, _triple
from torch.nn.modules.conv import *

def _reverse_repeat_tuple(t, n):
    """Reverse the order of `t` and repeat each element for `n` times.
    This can be used to translate padding arg used by Conv and Pooling modules
    to the ones used by `F.pad`.
    """
    return tuple(x for x in reversed(t) for _ in range(n))

class _ConvNd(Module):

    __constants__ = ['stride', 'padding', 'dilation', 'groups',
                     'padding_mode', 'output_padding', 'in_channels',
                     'out_channels', 'kernel_size']
    __annotations__ = {'bias': Optional[torch.Tensor]}

    def __init__(self, in_channels, out_channels, kernel_size, stride,
                 padding, dilation, transposed, output_padding,
                 groups, bias, padding_mode):
        super(_ConvNd, self).__init__()
        if in_channels % groups != 0:
            raise ValueError('in_channels must be divisible by groups')
        if out_channels % groups != 0:
            raise ValueError('out_channels must be divisible by groups')
        valid_padding_modes = {'zeros', 'reflect', 'replicate', 'circular'}
        if padding_mode not in valid_padding_modes:
            raise ValueError("padding_mode must be one of {}, but got padding_mode='{}'".format(
                valid_padding_modes, padding_mode))
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.transposed = transposed
        self.output_padding = output_padding
        self.groups = groups
        self.padding_mode = padding_mode
        # `_reversed_padding_repeated_twice` is the padding to be passed to
        # `F.pad` if needed (e.g., for non-zero padding types that are
        # implemented as two ops: padding + conv). `F.pad` accepts paddings in
        # reverse order than the dimension.
        self._reversed_padding_repeated_twice = _reverse_repeat_tuple(self.padding, 2)
        if transposed:
            self.weight = Parameter(torch.Tensor(
                in_channels, out_channels // groups, *kernel_size))
        else:
            self.weight = Parameter(torch.Tensor(
                out_channels, in_channels // groups, *kernel_size))
        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)

    def extra_repr(self):
        s = ('{in_channels}, {out_channels}, kernel_size={kernel_size}'
             ', stride={stride}')
        if self.padding != (0,) * len(self.padding):
            s += ', padding={padding}'
        if self.dilation != (1,) * len(self.dilation):
            s += ', dilation={dilation}'
        if self.output_padding != (0,) * len(self.output_padding):
            s += ', output_padding={output_padding}'
        if self.groups != 1:
            s += ', groups={groups}'
        if self.bias is None:
            s += ', bias=False'
        if self.padding_mode != 'zeros':
            s += ', padding_mode={padding_mode}'
        return s.format(**self.__dict__)

    def __setstate__(self, state):
        super(_ConvNd, self).__setstate__(state)
        if not hasattr(self, 'padding_mode'):
            self.padding_mode = 'zeros'

#### Writing out new convolutional filter. Same number of parameters but convolutions at multiple scales.

In [190]:
#
import random

class Conv2dMultiScale(_ConvNd):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
                 padding=0, dilation=1, groups=1,
                 bias=True, padding_mode='zeros', bicubic=False):
        kernel_size = _pair(kernel_size)
        stride = _pair(stride)
        padding = _pair(padding)
        dilation = _pair(dilation)
        super(Conv2dMultiScale, self).__init__(
            in_channels, out_channels, kernel_size, stride, padding, dilation,
            False, _pair(0), groups, bias, padding_mode)
        self.scale = nn.UpsamplingBilinear2d(size=(8,8))

        self.bicubic = bicubic
        # self.scale = nn.functional.interpolate(size=(5,5), mode='bicubic')

    def _conv_forward(self, input, weight):

        # Typically this is the only thing that done
        out1 = F.conv2d(input, weight, self.bias, self.stride,
                        self.padding, self.dilation, self.groups)
        # print(out1.shape, "out1", weight.shape)
        
        # Upscaling the weights
        # We try bilinear and bicubic and test their performance
        
        if (self.bicubic):
          weight = F.interpolate(weight, size=(5,5), mode='bicubic')
        else:
          norm1 = torch.norm(weight)
          weight = self.scale(weight)
          norm2 = torch.norm(weight)
        # Adjusting padding to keep same output side
        padding = tuple(x+1 for x in self.padding)
        
        # Running convolution on the bigger scale
        out2 = F.conv2d(input, weight, self.bias, self.stride,
                        padding, self.dilation, self.groups)
        # print(out2.shape, "out2", weight.shape)
        
        
        # Returning the result
        out2 = F.interpolate(out2, size=out1.shape[2:], mode='bicubic')
        # print(out1.shape)
        # print(out2.shape)
        return out1 + out2

    def forward(self, input):
        return self._conv_forward(input, self.weight)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [191]:
# Understanding sizes of CNNs, ensuring it works
kernelSize = 3
m = Conv2dMultiScale(5, 3, kernelSize, stride=1, padding=(kernelSize - 1) // 2)
input = torch.randn(20, 5, 28, 28)
print(input.shape)
output = m(input)
output.shape

torch.Size([20, 5, 28, 28])


  "See the documentation of nn.Upsample for details.".format(mode))


torch.Size([20, 3, 28, 28])

### Testing it on MNIST

In [192]:
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import random

num_classes = 10 # number of output classes discrete range [0,9]
num_epochs = 2 # number of times which the entire dataset is passed throughout the model
batch_size = 64  # the size of input data took for one iteration
lr = 1e-3 # size of step

train_transform = transforms.Compose([
    transforms.ToTensor()
])

test_transform = transforms.Compose([
    transforms.RandomAffine(degrees=0, scale=(0.5,2)),
    transforms.ToTensor(),
])

train_data = dsets.MNIST(root = './data', train = True,
                        transform = train_transform, download = True)

test_data = dsets.MNIST(root = './data', train = False,
                       transform = _test_transform())

train_gen = torch.utils.data.DataLoader(dataset = train_data,
                                             batch_size = batch_size,
                                             shuffle = True)

test_gen = torch.utils.data.DataLoader(dataset = test_data,
                                      batch_size = batch_size, 
                                      shuffle = False)



In [193]:
class Net(nn.Module):
    def __init__(self, multiScale=True):
        super(Net, self).__init__()
        self.multiScale = multiScale
        if(multiScale):
          self.conv1 = Conv2dMultiScale(1, 32, 3, 1)
          self.conv2 = Conv2dMultiScale(32, 64, 3, 1)
        else:
          self.conv1 = nn.Conv2d(1, 32, 3, 1)
          self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.fc = nn.Linear(9216, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        nn.Dropout()
        x = self.conv2(x)
        nn.Dropout()
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        nn.Dropout()
        output = F.log_softmax(x, dim=1)
        return output


net = Net(multiScale=True)

In [194]:
if torch.cuda.is_available():
  net.cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam( net.parameters(), lr=lr)

for epoch in range(num_epochs):
  for i ,(images,labels) in enumerate(train_gen):
    if torch.cuda.is_available():
      images = images.cuda()
      labels = labels.cuda()
    
    optimizer.zero_grad()
    outputs = net(images)
    loss = loss_function(outputs, labels)
    loss.backward()
    optimizer.step()
    
    if (i+1) % 100 == 0:
      print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                 %(epoch+1, num_epochs, i+1, len(train_data)//batch_size, loss.item()))

  "See the documentation of nn.Upsample for details.".format(mode))


Epoch [1/2], Step [100/937], Loss: 0.1299
Epoch [1/2], Step [200/937], Loss: 0.1726
Epoch [1/2], Step [300/937], Loss: 0.0964
Epoch [1/2], Step [400/937], Loss: 0.2244
Epoch [1/2], Step [500/937], Loss: 0.1703
Epoch [1/2], Step [600/937], Loss: 0.1206
Epoch [1/2], Step [700/937], Loss: 0.0563
Epoch [1/2], Step [800/937], Loss: 0.0726
Epoch [1/2], Step [900/937], Loss: 0.1046
Epoch [2/2], Step [100/937], Loss: 0.1755
Epoch [2/2], Step [200/937], Loss: 0.0946
Epoch [2/2], Step [300/937], Loss: 0.0389
Epoch [2/2], Step [400/937], Loss: 0.0427
Epoch [2/2], Step [500/937], Loss: 0.0435
Epoch [2/2], Step [600/937], Loss: 0.0792
Epoch [2/2], Step [700/937], Loss: 0.0173
Epoch [2/2], Step [800/937], Loss: 0.0894
Epoch [2/2], Step [900/937], Loss: 0.0208


In [195]:
if(net.multiScale):
  print('RESULTS OF MULTISCALE CNN')
else:
  print('RESULTS OF STANDARD CNN')

correct = 0
total = 0

for images,labels in train_gen:
  if torch.cuda.is_available():
    images = images.cuda()
    labels = labels.cuda()
  
  output = net(images)
  _, predicted = torch.max(output,1)
  correct += (predicted == labels).sum()
  total += labels.size(0)
train_acc = (100*correct.cpu().numpy())/(total+1)
print('Train accuracy of the model: %.3f %%' %(train_acc))
print(correct, total)

correct = 0
total = 0
for images,labels in test_gen:
  if torch.cuda.is_available():
    images = images.cuda()
    labels = labels.cuda()
  
  output = net(images)
  _, predicted = torch.max(output,1)
  correct += (predicted == labels).sum()
  total += labels.size(0)
test_acc = (100*correct.cpu().numpy())/(total+1)
print('Test accuracy of the model: %.3f %%' %(test_acc))
print(correct, total)

RESULTS OF MULTISCALE CNN


  "See the documentation of nn.Upsample for details.".format(mode))


Train accuracy of the model: 98.562 %
tensor(59138, device='cuda:0') 60000
Test accuracy of the model: 80.402 %
tensor(8041, device='cuda:0') 10000
