<a href="https://colab.research.google.com/github/DavoodSZ1993/Dive_into_Deep_Learning/blob/main/07_CNNs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 7 Convolutional Neural Networks

## 7.2 Convolutions for Images



### 7.2.1 The Cross-Correlation Operation

In [1]:
!pip install d2l==1.0.0-alpha1.post0 --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.0/93.0 KB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.0/121.0 KB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.6/83.6 KB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m20.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import torch
from torch import nn
from d2l import torch as d2l



In [3]:
def corr2d(X, K):
  h, w = K.shape
  Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
  
  for i in range(Y.shape[0]):
    for j in range(Y.shape[1]):
      Y[i, j] = (X[i:i + h, j:j + w] * K).sum()

  return Y

In [4]:
X = torch.tensor([[0.0, 1.0, 2.0],
                  [3.0, 4.0, 5.0],
                  [6.0, 7.0, 8.0]])
K = torch.tensor([[0.0, 1.0],
                  [2.0, 3.0]])

corr2d(X, K)

tensor([[19., 25.],
        [37., 43.]])

### 7.2.2 Convolutional Layers

In [5]:
class Conv2D(nn.Module):
  def __init__(self, kernel_size):
    super().__init__()

    self.weight = nn.Parameter(torch.rand(kernel_size))
    self.bias = nn.Parameter(torch.zeros(1))

  def forward(self, X):
    return corr2d(X, self.weight) + self.bias

### 7.2.3 Object Edge Detection in Images

In [8]:
X = torch.ones((6, 8))
X[:, 2:6] = 0
X, X.shape

(tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
         [1., 1., 0., 0., 0., 0., 1., 1.],
         [1., 1., 0., 0., 0., 0., 1., 1.],
         [1., 1., 0., 0., 0., 0., 1., 1.],
         [1., 1., 0., 0., 0., 0., 1., 1.],
         [1., 1., 0., 0., 0., 0., 1., 1.]]), torch.Size([6, 8]))

In [7]:
K = torch.tensor([[1.0, -1.0]])
K.shape

torch.Size([1, 2])

In [10]:
Y = corr2d(X, K)
Y

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [11]:
corr2d(X.t(), K)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])

### 7.2.4 Learning a Kernel

In [14]:
conv2d = nn.LazyConv2d(1, kernel_size=(1, 2), bias=False)

X = X.reshape((1, 1, 6, 8))
Y = Y.reshape((1, 1, 6, 7))

lr = 3e-2 # Learning rate
num_epochs = 10

for i in range(num_epochs):
  Y_hat = conv2d(X)
  l = (Y_hat - Y) ** 2
  conv2d.zero_grad()
  l.sum().backward()

  # Update the kernel
  conv2d.weight.data[:] -= lr * conv2d.weight.grad

  if (i + 1) % 2 == 0:                                    # % is modulus operator in python 
    print(f'epoch {i + 1}, loss {l.sum(): .3f}')

epoch 2, loss  9.068
epoch 4, loss  3.089
epoch 6, loss  1.160
epoch 8, loss  0.458
epoch 10, loss  0.185


In [15]:
conv2d.weight.data.reshape((1, 2))

tensor([[ 1.0358, -0.9475]])

## 7.3 Padding and Stride

### 7.3.1 Padding

In [16]:
import torch
from torch import nn

In [17]:
def comp_conv2d(conv2d, X):
  
  X = X.reshape((1, 1) + X.shape)
  Y = conv2d(X)

  return Y.reshape(Y.shape[2:])

In [20]:
conv2d = nn.LazyConv2d(1, kernel_size=3, padding=1)
X = torch.rand(size=(8, 8))

comp_conv2d(conv2d, X).shape

torch.Size([8, 8])

In [19]:
conv2d = nn.LazyConv2d(1, kernel_size=(5, 3), padding=(2, 1))

comp_conv2d(conv2d, X).shape



torch.Size([8, 8])

### 7.3.2 Stride

In [21]:
conv2d = nn.LazyConv2d(1, kernel_size=3, padding=1, stride=2)

comp_conv2d(conv2d, X).shape



torch.Size([4, 4])

In [23]:
conv2d = nn.LazyConv2d(1, kernel_size=(3, 5), padding=(0, 1), stride=(3, 4))

comp_conv2d(conv2d, X).shape



torch.Size([2, 2])