# 6.2.1. The Cross-Correlation Operation

In [4]:
import torch
from torch import nn
from d2l import torch as d2l

def corr2d(X,K): # X: Input, K: Kernal
    """Compute 2D cross-correlation"""
    h,w = K.shape
    Y = torch.zeros((X.shape[0]-h+1, X.shape[1]-w+1)) # Y=activation map
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i,j] = (X[i:i+h, j:j+w]*K).sum()
    return Y

X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])
corr2d(X, K)

tensor([[19., 25.],
        [37., 43.]])

# 6.2.2. Convolution Layers

In [6]:
class Conv2D(nn.Module):
    def __init__(self, kernel_size):
        super().__init__()
        self.weight = nn.Parameter(torch.rand(kernel_size))
        self.bias = nn.Parameter(torch.zeros(1))
        
    def forward(self,x):
        return corr2d(x, self.weight) + self.bias

# 6.2.3. Object Edge Detection in Images

A Simple application of a Convolutional Layer

-> detecting the edge of an object in an image by finding the lcoation of pixel change.

-> We construct an "image" of (6,8). The middle four columns are black(0) and the rest are white(1).

In [7]:
X = torch.ones((6,8))
X[:, 2:6] = 0
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

Then, construct kernal composed with (1,2) size.

-> Perform cross-correlation operation with the input.

-> If horizontally adjacent elements are the same, the output is 0.

-> Otherwise, the output is non-zero.

In [8]:
K = torch.tensor([[1.0, -1.0]])
Y = corr2d(X, K)
Y

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

# 6.2.4. Learning a Kernel

우리가 지금 확인하려는 이미지의 구성이 0,1이라는 것을 알기 때문에, kernel을 -1,1로 값을 주고서 쉽게 결과를 얻었다.

그러나, larger kernels, successive layers of convolutions를 고려해야 된다면 가장 좋은 kernel의 value를 얻는 것은 불가능.

따라서, kernel을 학습해야한다.


처음 kernel 값은 random하게 initialization해주고, iteration마다 squared error을 사용하여 Y값과 비교해준다.

In [9]:
conv2d = nn.Conv2d(1,1,kernel_size=(1,2), bias=False)

X = X.reshape((1,1,6,8))
Y = Y.reshape((1,1,6,7))
lr = 3e-2

for i in range(10):
    Y_hat = conv2d(X)
    l = (Y_hat-Y)**2
    conv2d.zero_grad()
    l.sum().backward()
    
    conv2d.weight.data[:] -= lr * conv2d.weight.grad
    if (i+1) % 2==0:
        print(f'batch {i+1}, loss {l.sum():.3f}')

batch 2, loss 11.046
batch 4, loss 2.169
batch 6, loss 0.493
batch 8, loss 0.136
batch 10, loss 0.044


[W NNPACK.cpp:79] Could not initialize NNPACK! Reason: Unsupported hardware.


In [10]:
conv2d.weight.data.reshape((1,2))

tensor([[ 1.0036, -0.9640]])