# Practice Prior to Implementing the CNN

In [1]:
import torch
import torch.nn as nn

Multiple ways of creating 2D convolution; unnamed integer parameters: **1)** number of color channels of a given input image, **2)** number of output channels (how many filters we want from the given layer), **3)** kernel size.

In [7]:
nn.Conv2d(3, 16, 3)                 # applying 2D convolution to an img
nn.Conv2d(3, 16, 3, padding=1)      # padding added
nn.Conv2d(3, 16, (3, 4), padding=1) # non square kernel
nn.Conv2d(3, 16, 3, stride=2)       # stride added
nn.Conv2d(3, 16, (3, 4), 
          stride=(3, 3),
          padding=(1, 2))           # non square stride and padding

Conv2d(3, 16, kernel_size=(3, 4), stride=(3, 3), padding=(1, 2))

Reasons pooling layers are used: reduction in the number of computations, prevention of overfitting, positional invariance (i. e. recognition of a feature regardless of its position in the image).

Integer paramater passed to pooling layers is the size of the square kernel used.

In [16]:
max_pool = nn.MaxPool2d(3, stride=1)            # define a max pool
a = torch.FloatTensor(3, 5, 5).random_(0, 10)   # tensor to perform on
# print(a)
# max_pool(a)

avg_pool = nn.AvgPool2d(3, stride=1)            # define average pool
a = torch.FloatTensor(3, 5, 5).random_(0, 10)
print(a)
avg_pool(a)

tensor([[[2., 8., 7., 1., 1.],
         [0., 6., 8., 2., 6.],
         [8., 1., 1., 4., 3.],
         [2., 9., 1., 0., 4.],
         [8., 3., 0., 8., 8.]],

        [[8., 4., 2., 7., 0.],
         [2., 9., 4., 9., 2.],
         [8., 0., 9., 6., 8.],
         [3., 5., 2., 9., 8.],
         [3., 2., 4., 1., 3.]],

        [[8., 9., 8., 0., 0.],
         [4., 1., 5., 5., 1.],
         [7., 7., 2., 4., 2.],
         [0., 8., 0., 7., 7.],
         [6., 3., 8., 4., 7.]]])


tensor([[[4.5556, 4.2222, 3.6667],
         [4.0000, 3.5556, 3.2222],
         [3.6667, 3.0000, 3.2222]],

        [[5.1111, 5.5556, 5.2222],
         [4.6667, 5.8889, 6.3333],
         [4.0000, 4.2222, 5.5556]],

        [[5.6667, 4.5556, 3.0000],
         [3.7778, 4.3333, 3.6667],
         [4.5556, 4.7778, 4.5556]]])

Exploring various transforms.

In [17]:
from torchvision import transforms

transforms.ToTensor()               # tensor from the image
transforms.Normalize((0.5), (0.5))  # normalization
transforms.Resize((10, 10))
transforms.CenterCrop(10)           # cropping
transforms.Pad(1, 0)                # padding
transforms.Pad((1, 2, 2, 3), padding_mode='reflect')

transforms.Compose([                # chaining transforms
    transforms.CenterCrop(10),
    transforms.ToTensor(),
])

Compose(
    CenterCrop(size=(10, 10))
    ToTensor()
)

Exploring data augmentation for computer vision.

In [18]:
transforms.RandomCrop(10)
transforms.RandomCrop((10, 20))

transforms.RandomHorizontalFlip(p=0.3)
transforms.RandomVerticalFlip(p=0.3)

transforms.ColorJitter(             # color, contrast, saturation, hue
    0.25, 0.25, 0.25, 0.25)

transforms.RandomRotation(10)

transforms.Compose([
    transforms.RandomRotation(10),
    transforms.ToTensor()
])

Compose(
    RandomRotation(degrees=(-10, 10), resample=False, expand=False)
    ToTensor()
)