## 6.2 Convolutions for Images

In [1]:
from d2l import mxnet as d2l
from mxnet import autograd,np,npx
from mxnet.gluon import nn
npx.set_np()

In [2]:
# 6.2.1 The Cross-Correlation Operation
def corr2d(X,K):
    """Compute 2D cross-correlation."""
    h,w=K.shape
    Y=np.zeros((X.shape[0]-h+1,X.shape[1]-w+1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i,j]=d2l.reduce_sum(X[i:i+h,j:j+w]*K)
    return Y

X=np.arange(9).reshape((3,3))
K=np.arange(4).reshape((2,2))
corr2d(X,K)

array([[19., 25.],
       [37., 43.]])

In [3]:
# 6.2.2 Convolutional Layers
class Conv2D(nn.Block):
    def __init__(self, kernel_size, **kwargs):
        super().__init__(**kwargs)
        self.weight=self.params.get('weight',shape=kernel_size)
        self.bias=self.params.get('bias',shape=(1,))
    def forward(self,x):
        return corr2d(x,self.weight.data()+self.bais.data())

In [4]:
# 6.2.3 Object Edge Detection in Images
X=np.ones((6,8))
X[:,2:6]=0
X

array([[1., 1., 0., 0., 0., 0., 1., 1.],
       [1., 1., 0., 0., 0., 0., 1., 1.],
       [1., 1., 0., 0., 0., 0., 1., 1.],
       [1., 1., 0., 0., 0., 0., 1., 1.],
       [1., 1., 0., 0., 0., 0., 1., 1.],
       [1., 1., 0., 0., 0., 0., 1., 1.]])

In [5]:
K=np.array([[1.0,-1.0]])
Y=corr2d(X,K)
Y

array([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
       [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
       [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
       [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
       [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
       [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [6]:
# K 是 Kernel，只能用于检测垂直边界，检测水平边界则图形消失
corr2d(X.T,K)

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [9]:
# 6.2.4 Learning a Kernel
conv2d=nn.Conv2D(1,kernel_size=(1,2),use_bias=False)
conv2d.initialize()

# conv2d 需要四维的输入和输出数据(example, channel, height, width)
# example = 每个批次中的数据个数，channel= 图形的通道数
X=X.reshape(1,1,6,8)
Y=Y.reshape(1,1,6,7)

for i in range(10):
    with autograd.record():
        Y_hat=conv2d(X)
        l=(Y_hat-Y)**2
    l.backward()
    # 更新卷积核的参数
    conv2d.weight.data()[:]-=3e-2*conv2d.weight.grad()
    if (i+1)%2==0:
        print(f'batch {i+1}, loss {float(l.sum()):.3f}')

batch 2, loss 5.063
batch 4, loss 0.864
batch 6, loss 0.151
batch 8, loss 0.028
batch 10, loss 0.006


In [None]:
# 6.2.5 Cross-Correlation and Convolution
# 6.2.6 Feature Map and Receptive Field