In [2]:
import torch
from torch import nn

# cross-correlation
def corr2d(X,K):
    h, w = K.shape
    Y = torch.zeros(X.shape[0] - h + 1,X.shape[1]-w+1)
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i,j] = (X[i:i+h,j:j+w]*K).sum()
    return Y

In [3]:
# conv_layer
class Conv2D(nn.Module):
    def __init__(self,kernel_size):
        super().__init__()
        self.weight = nn.Parameter(torch.rand(kernel_size))
        self.bias = nn.Parameter(torch.zeros(1))
    def forward(self,x):
        return corr2d(x,self.weight)+ self.bias

In [4]:
# detect
X = torch.ones((8,8))
X[2:6,2:6] = 0

K = torch.tensor([[1.0,-1.0]]) # conv kernel h = 1; w = 2
# 接下来，我们构造一个高度为 1 、宽度为 2 的卷积核K。
# 当进行互相关运算时，如果水平相邻的两元素相同，则输出为零，
# 否则输出为非零。
Y = corr2d(X,K)
Y2 = corr2d(X.t(),K).t()

In [1]:
# learn to creat a kernel
# initiate a conv kernel with random elements
conv2d = nn.Conv2d(1,1,kernel_size=(1,2),bias=False)
print(conv2d.weight.data)
X = X.reshape(1,1,8,8) # set batch number:1 and channel:1
Y = Y.reshape(1,1,8,7)
lr = 1e-2

for i in range(101):
    Y_hat = conv2d(X) #kernel size: 1 x 1 x 1 x 2
    l = (Y_hat-Y)**2
    conv2d.zero_grad()
    l.sum().backward()
    conv2d.weight.data[:] -= lr*conv2d.weight.grad
    if i % 10 == 0:
        print(f'epoch {i+1}:loss:{l.sum()}')
        print(conv2d.weight.data)

NameError: name 'nn' is not defined

In [12]:
# padding and stride

# padding
def comp_conv2d(conv2d,X):
    X = X.reshape((1,1)+X.shape) # connect tow tuples
    Y = conv2d(X)
    return Y.reshape(Y.shape[2:]) # only need height and width
 # add an element in each side of every cow and every column
 # as conv will decrease elements in input matrix
conv2d = nn.Conv2d(1,1,kernel_size=3,padding=(1,1))
X = torch.rand(size=(8,8))
comp_conv2d(conv2d,X).shape
# stride
conv2d = nn.Conv2d(1,1,kernel_size=3,padding=(1,1),stride=(2,2)) # total padding:2 stride = 2
# if input: width: 8 -> output: width: (8-3+1+2)/2+1 = 4 same to the height
X = torch.rand(size = (8,8))
comp_conv2d(conv2d,X).shape #out: torch,size([4,4])

torch.Size([4, 4])

In [41]:
#multiple input channel
def corr2d_mult_in(X,K):
    """X has multiple input channel (shape:c,h,w) also is K"""
    return sum([corr2d(x,k) for x,k in zip(X,K)])
X = torch.randn(size=(2,8,8))
# k = nn.Conv2d(kernel_size=(3,3),in_channels=2,out_channels=1)
k = torch.randn(size=(2,3,3)) # kernel
corr2d_mult_in(X,k)

#multiple output channel
def corr2d_mult_in_out(X,K):
    """the dim of K should be 4 to let output has multiple channel"""
    return torch.stack([corr2d_mult_in(X,k)for k in K],0) # stack in dim 0
# get a 4 dim K
K = torch.stack([k,k+1,k+2],0) # 3*2*3*3
K.shape
corr2d_mult_in_out(X,K)

# 1x1 kernel: use to calculate value on a point with multiple input channel (like sum())
# as same as full connection layer
def corr2d_multi_in_out_1x1(X,K):
    channel_i,w,h = X.shape
    channel_o = k.shape[0]
    X = X.reshape((channel_i,h*w))
    K = K.reshape((channel_o,channel_i))
    Y = torch.matmul(K,X)
    return Y.reshape((channel_o,w,h)) # 1x1 would not change w and h only changes channels

In [20]:
# pooling layer
# 降低卷积层对位置的敏感性，同时降低对空间降采样表示的敏感性。
import torch
from torch import nn

# maximum pooling layer
# output the maximum value in an input region
# average pooling layer
# output the average value in an input regin
# so that a little change in position of pixel would not affect the final output
# like the corr calculation, the pool layer also has something like stride and padding?(may be not)
def pool2d(X, pool_size, mode='max'):
    p_h,p_w = pool_size
    Y = torch.zeros(X.shape[0] - p_h+1,X.shape[1] - p_w + 1)
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode == 'max':
                Y[i,j] = X[i:i+p_h,j:j+p_w].max()
            elif mode=='avg':
                Y[i,j] = X[i:i+p_h,j:j+p_w].mean()
    return Y

X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
PX = pool2d(X, (2, 2))
# print(X)
# print(PX)

# use pool layer in torch
pool2d_max_torch = nn.MaxPool2d((3,3))
pool2d_max_torch(X.reshape((1,1,3,3))) # attention the shape of input
# add padding and stride to pool layer
pool2d_max_torch = nn.MaxPool2d(kernel_size=(3,3),padding=1,stride=1)
pool2d_max_torch(X.reshape((1,1,3,3))) # attention the shape of input

pool2d_max_torch = nn.MaxPool2d(kernel_size=(3,3),padding=1,stride=2)
pool2d_max_torch(X.reshape((1,1,3,3))) # attention the shape of input

# not same to the conv kernel, input can also have multiple channel but the calculation would affect same channel, in other word, output channel would be same to the input channel
# multiple input channel
X = X.reshape((1,1,3,3)) # batch size: 1, channel: 1, height: 3, wight: 3
X = torch.cat((X,X+1),1) # construct multiple channel
pool2d_max_torch = nn.MaxPool2d(kernel_size=(3,3), padding=1, stride=1)
print(X)
print(pool2d_max_torch(X))

tensor([[[[0., 1., 2.],
          [3., 4., 5.],
          [6., 7., 8.]],

         [[1., 2., 3.],
          [4., 5., 6.],
          [7., 8., 9.]]]])
tensor([[[[4., 5., 5.],
          [7., 8., 8.],
          [7., 8., 8.]],

         [[5., 6., 6.],
          [8., 9., 9.],
          [8., 9., 9.]]]])


In [1]:
# LeNet
# input: nx1x28x28
import torch
from torch import nn
from d2l import load_data_fashion_mnist
from d2l import train_ch6_gpu

net = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),  # nx6x28x28
    nn.AvgPool2d(kernel_size=2, stride=2),  # nx6x14x14
    nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),  # nx16x10x10
    nn.AvgPool2d(kernel_size=2, stride=2),  # nx16x5x5
    nn.Flatten(),  # nx400
    #  full connection block
    nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),
    nn.Linear(120, 84), nn.Sigmoid(),
    nn.Linear(84, 10)
)

X = torch.rand((1,1,28,28),dtype=torch.float32)

net(X)
for layer in net:
    X = layer(X)
    print(layer.__class__.__name__,'output shape: \t\t',X.shape)

batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size=batch_size)

lr, num_epochs = 0.9, 10
train_ch6_gpu(net,train_iter=train_iter,test_iter=test_iter,num_epochs=num_epochs,lr=lr,device='cuda:0')

Conv2d output shape: 		 torch.Size([1, 6, 28, 28])
Sigmoid output shape: 		 torch.Size([1, 6, 28, 28])
AvgPool2d output shape: 		 torch.Size([1, 6, 14, 14])
Conv2d output shape: 		 torch.Size([1, 16, 10, 10])
Sigmoid output shape: 		 torch.Size([1, 16, 10, 10])
AvgPool2d output shape: 		 torch.Size([1, 16, 5, 5])
Flatten output shape: 		 torch.Size([1, 400])
Linear output shape: 		 torch.Size([1, 120])
Sigmoid output shape: 		 torch.Size([1, 120])
Linear output shape: 		 torch.Size([1, 84])
Sigmoid output shape: 		 torch.Size([1, 84])
Linear output shape: 		 torch.Size([1, 10])
net will be trained on cuda:0
batch_0: train_l:2.439532995223999, train_accuracy:0.12890625, time_count:0.2802743911743164
batch_10: train_l:2.5029625459150835, train_accuracy:0.10014204545454546, time_count:0.004795074462890625
batch_20: train_l:2.4219164167131697, train_accuracy:0.09691220238095238, time_count:0.004835844039916992
batch_30: train_l:2.3912743137728785, train_accuracy:0.09954637096774194, time_c

In [1]:
# Lenet with BatchNormal in Conv
# LeNet
# input: nx1x28x28
import torch
from torch import nn
from d2l import load_data_fashion_mnist
from d2l import train_ch6_gpu

net = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2),nn.BatchNorm2d(6), nn.Sigmoid(),  # nx6x28x28
    nn.AvgPool2d(kernel_size=2, stride=2),  # nx6x14x14
    nn.Conv2d(6, 16, kernel_size=5),nn.BatchNorm2d(16), nn.Sigmoid(),  # nx16x10x10
    nn.AvgPool2d(kernel_size=2, stride=2),  # nx16x5x5
    nn.Flatten(),  # nx400
    #  full connection block
    nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),
    nn.Linear(120, 84), nn.Sigmoid(),
    nn.Linear(84, 10)
)

X = torch.rand((1,1,28,28),dtype=torch.float32)

net(X)
for layer in net:
    X = layer(X)
    print(layer.__class__.__name__,'output shape: \t\t',X.shape)

batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size=batch_size)

lr, num_epochs = 0.9, 10
train_ch6_gpu(net,train_iter=train_iter,test_iter=test_iter,num_epochs=num_epochs,lr=lr,device='cuda:0')

Conv2d output shape: 		 torch.Size([1, 6, 28, 28])
BatchNorm2d output shape: 		 torch.Size([1, 6, 28, 28])
Sigmoid output shape: 		 torch.Size([1, 6, 28, 28])
AvgPool2d output shape: 		 torch.Size([1, 6, 14, 14])
Conv2d output shape: 		 torch.Size([1, 16, 10, 10])
BatchNorm2d output shape: 		 torch.Size([1, 16, 10, 10])
Sigmoid output shape: 		 torch.Size([1, 16, 10, 10])
AvgPool2d output shape: 		 torch.Size([1, 16, 5, 5])
Flatten output shape: 		 torch.Size([1, 400])
Linear output shape: 		 torch.Size([1, 120])
Sigmoid output shape: 		 torch.Size([1, 120])
Linear output shape: 		 torch.Size([1, 84])
Sigmoid output shape: 		 torch.Size([1, 84])
Linear output shape: 		 torch.Size([1, 10])
net will be trained on cuda:0
batch_0: train_l:2.4651753902435303, train_accuracy:0.125, time_count:0.245985746383667
batch_10: train_l:2.441270524805242, train_accuracy:0.10653409090909091, time_count:0.004595518112182617
batch_20: train_l:2.333327259336199, train_accuracy:0.13392857142857142, time_c

In [3]:
# Lenet with BatchNormal in Linear
# LeNet
# input: nx1x28x28
import torch
from torch import nn
from d2l import load_data_fashion_mnist
from d2l import train_ch6_gpu

net = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2),nn.BatchNorm2d(6),nn.Sigmoid(),  # nx6x28x28
    nn.AvgPool2d(kernel_size=2, stride=2),  # nx6x14x14
    nn.Conv2d(6, 16, kernel_size=5),nn.BatchNorm2d(16),nn.Sigmoid(),  # nx16x10x10
    nn.AvgPool2d(kernel_size=2, stride=2),  # nx16x5x5
    nn.Flatten(),  # nx400
    #  full connection block
    nn.Linear(16 * 5 * 5, 120),nn.BatchNorm1d(120) ,nn.Sigmoid(),
    nn.Linear(120, 84), nn.BatchNorm1d(84),nn.Sigmoid(),
    nn.Linear(84, 10)
)

X = torch.rand((256,1,28,28),dtype=torch.float32)

net(X)
for layer in net:
    X = layer(X)
    print(layer.__class__.__name__,'output shape: \t\t',X.shape)

batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size=batch_size)


lr, num_epochs = 1, 15
train_ch6_gpu(net,train_iter=train_iter,test_iter=test_iter,num_epochs=num_epochs,lr=lr,device='cuda:0')

Conv2d output shape: 		 torch.Size([256, 6, 28, 28])
BatchNorm2d output shape: 		 torch.Size([256, 6, 28, 28])
Sigmoid output shape: 		 torch.Size([256, 6, 28, 28])
AvgPool2d output shape: 		 torch.Size([256, 6, 14, 14])
Conv2d output shape: 		 torch.Size([256, 16, 10, 10])
BatchNorm2d output shape: 		 torch.Size([256, 16, 10, 10])
Sigmoid output shape: 		 torch.Size([256, 16, 10, 10])
AvgPool2d output shape: 		 torch.Size([256, 16, 5, 5])
Flatten output shape: 		 torch.Size([256, 400])
Linear output shape: 		 torch.Size([256, 120])
BatchNorm1d output shape: 		 torch.Size([256, 120])
Sigmoid output shape: 		 torch.Size([256, 120])
Linear output shape: 		 torch.Size([256, 84])
BatchNorm1d output shape: 		 torch.Size([256, 84])
Sigmoid output shape: 		 torch.Size([256, 84])
Linear output shape: 		 torch.Size([256, 10])
net will be trained on cuda:0
batch_0: train_l:2.6358017921447754, train_accuracy:0.0390625, time_count:0.007390022277832031
batch_10: train_l:1.8872987140308728, train_ac