In [1]:
import torch 

In [2]:
# 2D correlation function 

def corr2d(X, K):  
    """
    X : input 
    K : kernel 
    """
    h, w = K.shape
    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))

    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i: i + h, j: j + w] * K).sum() # sum of element-wise product
    return Y

In [3]:
X = torch.Tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
K = torch.Tensor([[0, 1], [2, 3]])

corr2d(X, K) # correlation function 

tensor([[19., 25.],
        [37., 43.]])

* import ```torch.nn``` package for convolution layer 
* The parameters of the convolutional layer are precisely the values that constitute the kernel and the scalar bias. 
* The forward computation function ```forward``` calls the ```corr2d``` function and adds the bias.

In [4]:
import torch.nn as nn 

*  we declare ```weight``` and ```bias``` as the two model parameters.

In [5]:
class Conv2D(nn.Module):
    def __init__(self, kernel_size, **kwargs):
        super(Conv2D, self).__init__(**kwargs)
        
        self.weight = torch.rand(kernel_size,dtype=torch.float32, requires_grad=True)
        self.bias = torch.zeros((1,), dtype=torch.float32, requires_grad=True)

    def forward(self, x):
        
        return corr2d(x, self.weight) + self.bias

***

* Object Edge Detection in Images 
* ***Conv layer***: detecting the edge of an object in an image by finding the location of the pixel change.

In [6]:
X = torch.ones((6, 8))   # 6x8 shape 
X[:, 2:6] = 0            # [white, Black, white ]

X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

* kernel K with a height of 1 and width of 2.

In [7]:
K = torch.Tensor([[1, -1]]) # kenel = [1, -1]

* Using ```corr2d``` function 
* Using ```Conv2D``` layer 
* Compare them 

In [8]:
Y = corr2d(X, K)
Y

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [9]:
X.t()     # [white, 
          #  black, 
          #  white]

tensor([[1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.]])

* Let's apply the kernel to the transposed image. As expected, it vanishes. The kernel K only detects vertical edges.

In [10]:
corr2d(X.t(), K)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])

***

* Learning a Kernel 
* First, construct a convolutional layer and initialize its kernel as a random array
* In each iteration, we will use the squared error to compare Y and the output of the convolutional layer
* Then, calculate the gradient to update the weight

Ignores the bias for simplicity in this time.

In [11]:
"""
- Construct a convolutional layer with 1 output channel
(channels will be introduced in the following section)
- and a kernel array shape of (1, 2)
"""

conv2d = nn.Conv2d(in_channels=1,out_channels=1, kernel_size=(1, 2),bias=False) #For sake of simplicity ignoring bias


"""
The two-dimensional convolutional layer uses four-dimensional input and
output in the format of (example channel, height, width), where the batch
size (number of examples in the batch) and the number of channels are both 1
"""
X = X.reshape((1, 1, 6, 8))   # [B, C, H, W] = [1, 1, 6, 8]
Y = Y.reshape((1, 1, 6, 7))


for i in range(10):      # 10-iteration 
    Y_hat = conv2d(X)    # prediction 
    l = (Y_hat - Y) ** 2 # Squared Error
   
    conv2d.zero_grad()   # init. zero_grad 
    l.sum().backward()   
    
    # For the sake of simplicity, we ignore the bias here
    conv2d.weight.data[:] -= 3e-2 * conv2d.weight.grad   # weight update
                                                         # w <- w - a*(dLoss/dw)
    
    if (i + 1) % 2 == 0:
        print('batch %d, loss %.3f' % (i + 1, l.sum()))

batch 2, loss 0.992
batch 4, loss 0.197
batch 6, loss 0.045
batch 8, loss 0.013
batch 10, loss 0.004


* the error has dropped to a small value after 10 iterations. 
* Now, we will take a look at the kernel array we learned.


ndeed, the learned kernel array is remarkably close to the kernel array K=\[1, -1\] we defined earlier.

In [12]:
conv2d.weight.data.reshape((1, 2))

tensor([[ 1.0013, -0.9890]])