## 1. Standard Import

In [1]:
from IPython import display

import matplotlib_inline.backend_inline

matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

In [2]:
import numpy as np
import matplotlib.pyplot as plt

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional  as F

## Sample problem

### Convolve an image of size 1x256x256 to produce a 1x252x84 result

In [6]:
# Parameters
inChans  = 1         # RGB channel (1 indicating grey-scale channel)
imsize   = [256,256] # (heightxwidth)
outChans = 1
krnSize  = 5         # should be an odd number
stride   = (1,3)     # (1 : controls the heigth  and 3 :controls the  width)
padding  = 0

# create the instance
c = nn.Conv2d(inChans,outChans,krnSize,stride,padding)

# create an image
img = torch.rand(1,inChans,imsize[0],imsize[1])

# run convolution and compute its shape
resimg = c(img)
empSize = torch.squeeze(resimg).shape

# compute the size of the result according to the formula
expectSize = np.array([outChans,0,0],dtype=int)
expectSize[1] = np.floor( (imsize[0]+2*padding-krnSize)/stride[0] ) + 1
expectSize[2] = np.floor( (imsize[1]+2*padding-krnSize)/stride[1] ) + 1

# check the size of the output
print(f'Obtained size: {list(empSize)}')
print(f'Expected size: {expectSize}')

Obtained size: [252, 84]
Expected size: [  1 252  84]


## Real problems

#### Convolve an image of size 3x196x96 to produce a 5x66x49 result

In [15]:
# Parameters
# empirical formula for stride  : 
# (num of pixel along dimension//output pixel along dimension ) +1 

inChans  = 3
imsize   = [196,96]  # height x width
outChans = 5
krnSize  = 7
stride   = (3,2)  # 3 : controls height and 2 : controls width
padding  = 4

# create the instance
c = nn.Conv2d(inChans,outChans,krnSize,stride,padding)

# create an image
img = torch.rand(1,inChans,imsize[0],imsize[1])

# run convolution and compute its shape
resimg2 = c(img)
empSize2 = torch.squeeze(resimg2).shape

# compute the size of the result according to the formula
expectSize2 = np.array([outChans,0,0],dtype=int)
expectSize2[1] = np.floor( (imsize[0]+2*padding-krnSize)/stride[0] ) + 1
expectSize2[2] = np.floor( (imsize[1]+2*padding-krnSize)/stride[1] ) + 1

# check the size of the output
print(f'obtained size: {list(empSize2)}')
print(f'Expected size: {expectSize2}')


obtained size: [5, 66, 49]
Expected size: [ 5 66 49]
