# Module

In [2]:
import torch
from torch import nn

In [3]:
class MLP(nn.Module):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        self.hidden = nn.Linear(784, 256)
        self.act = nn.ReLU()
        self.output = nn.Linear(256, 10)
        
    def forward(self, x):
        o = self.act(self.hidden(x))
        return self.output(o)

In [4]:
net = MLP()
X = torch.randn((2, 784))
print(net)
print(net(X)) # Forward

MLP(
  (hidden): Linear(in_features=784, out_features=256, bias=True)
  (act): ReLU()
  (output): Linear(in_features=256, out_features=10, bias=True)
)
tensor([[ 0.0359,  0.5077,  0.2475,  0.2759, -0.0408,  0.1040, -0.0623, -0.0326,
          0.0038,  0.2356],
        [ 0.2110,  0.2071, -0.1685, -0.0423, -0.0872,  0.4703,  0.0632,  0.0918,
          0.0154,  0.3298]], grad_fn=<AddmmBackward0>)


## Common layers

In [19]:
# This is a layer to show  in forward() has no args inside
class MinusMean(nn.Module):
    def __init__(self, **kargs):
        super(MinusMean, self).__init__(**kargs)
    
    def forward(self, x):
        mean = x.mean()
        return x - mean

net = MinusMean()
X = torch.arange(0, 6, 2, dtype=torch.float32)
print(X)
print(net(X))

tensor([0., 2., 4.])
tensor([-2.,  0.,  2.])


In [6]:
# This class is to show is there are model parameters
class ParamLayer(nn.Module):
    def __init__(self, **kwargs):
        super(ParamLayer, self).__init__(**kwargs)
        self.params = nn.ParameterList([nn.Parameter(torch.randn(4, 4)) for i in range(3)])
        #                                            创建一个 4x4 的随机矩阵 重复3次
        #              创建成paramList
        # params : []Parameters = {<4x4>, <4x4>, <4x4>}
        self.params.append(nn.Parameter(torch.randn(4, 1)))
        # params : []Parameters = {<4x4>, <4x4>, <4x4>, <4x1>}
        
    def forward(self, x):
        for i in range(len(self.params)):
            x = torch.mm(x, self.params[i]) # mm(mtx, mtx) -> 相乘
        return x
net = ParamLayer()
print(net)

ParamLayer(
  (params): ParameterList(
      (0): Parameter containing: [torch.float32 of size 4x4]
      (1): Parameter containing: [torch.float32 of size 4x4]
      (2): Parameter containing: [torch.float32 of size 4x4]
      (3): Parameter containing: [torch.float32 of size 4x1]
  )
)


In [7]:
# 下面的例子，给出了使用字典的实现
class DictLayer(nn.Module):
    def __init__(self):
        super(DictLayer, self).__init__()
        self.params = nn.ParameterDict({
                'linear1': nn.Parameter(torch.randn(4, 4)),
                'linear2': nn.Parameter(torch.randn(4, 1))
        })
        # paramts = {
        #     'linear1': <4, 4>,
        #     'linear2': <4, 1>   
        # }
        self.params.update({'linear3': nn.Parameter(torch.randn(4, 2))})
        # paramts = {
        #     'linear1': <4, 4>,
        #     'linear2': <4, 1>,
        #     'linear3': <4, 2>
        # }

    def forward(self, x, choice='linear1'):
        return torch.mm(x, self.params[choice])

net = DictLayer()
print(net)

DictLayer(
  (params): ParameterDict(
      (linear1): Parameter containing: [torch.FloatTensor of size 4x4]
      (linear2): Parameter containing: [torch.FloatTensor of size 4x1]
      (linear3): Parameter containing: [torch.FloatTensor of size 4x2]
  )
)


## Conv2D

Cross-correlation layer

![image.png](img/corr2d.png)

In [10]:
# 2维 卷积层

# 互相关操作（Cross-correlation）
# X 为输入，K 为 Kernel
def corr2d(X, K):
    kern_h, kern_w = K.shape
    X, K = X.float(), K.float()
    
    x_h, x_w = X.shape
    Y = torch.zeros(x_h-kern_h+1, x_w-kern_w+1) # step 为 1 时，是这样的
    for i in range(Y.shape[0]): # 根据 Y 的高进行循环
        for j in range(Y.shape[1]): # 根据 Y 的宽进行循环
            Y[i, j] = (X[i:i+kern_h, j:j+kern_w] * K).sum() # 如图进行
    return Y


class Conv2D(nn.Module):
    def __init__(self, kern_size):
        super(Conv2D, self).__init__()
        self.weight = nn.Parameter(torch.randn(kern_size))
        self.bias = nn.Parameter(torch.rand(1))
        
    def forward(self, x):
        return corr2d(x, self.weight) + self.bias

In [10]:
def comp_conv2d(conv2d, X):
    print("X =", X)
    print("X_shape =", X.shape)
    print("X_shape_added =", (1, 1) + X.shape)
    X = X.view((1, 1) + X.shape) # 8x8 -> 1x1x8x8
    print("X_V =", X)
    Y = conv2d(X) # 
    print("Y =", Y)
    print("Y_shape =", Y.shape)
    return Y.view(Y.shape[2:])

conv2dx = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, padding=1) # padding 是两侧的，因此是2行与2列

X = torch.rand(8, 8)
computed = comp_conv2d(conv2d=conv2dx, X=X)
computed.shape

X = tensor([[0.8524, 0.8213, 0.8802, 0.9012, 0.8227, 0.8512, 0.2370, 0.8938],
        [0.8969, 0.3820, 0.4716, 0.5227, 0.4212, 0.1732, 0.3654, 0.5197],
        [0.2489, 0.3316, 0.9528, 0.4103, 0.8053, 0.7263, 0.6859, 0.2286],
        [0.1610, 0.7672, 0.9025, 0.6521, 0.7532, 0.8518, 0.1074, 0.1946],
        [0.1487, 0.1424, 0.1636, 0.0298, 0.8794, 0.1225, 0.1049, 0.1791],
        [0.9035, 0.9541, 0.1282, 0.3487, 0.0459, 0.3504, 0.9695, 0.7530],
        [0.9452, 0.1585, 0.7824, 0.3253, 0.4599, 0.8345, 0.6125, 0.1951],
        [0.0355, 0.0646, 0.2585, 0.0912, 0.4140, 0.2836, 0.4103, 0.2219]])
X_shape = torch.Size([8, 8])
X_shape_added = (1, 1, 8, 8)
X_V = tensor([[[[0.8524, 0.8213, 0.8802, 0.9012, 0.8227, 0.8512, 0.2370, 0.8938],
          [0.8969, 0.3820, 0.4716, 0.5227, 0.4212, 0.1732, 0.3654, 0.5197],
          [0.2489, 0.3316, 0.9528, 0.4103, 0.8053, 0.7263, 0.6859, 0.2286],
          [0.1610, 0.7672, 0.9025, 0.6521, 0.7532, 0.8518, 0.1074, 0.1946],
          [0.1487, 0.1424, 0.1636, 

torch.Size([8, 8])

In [11]:
# 使用高为5、宽为3的卷积核。在⾼和宽两侧的填充数分别为2和1
conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(5, 3), padding=(2, 1))
comp_conv2d(conv2d, X).shape

X = tensor([[0.8524, 0.8213, 0.8802, 0.9012, 0.8227, 0.8512, 0.2370, 0.8938],
        [0.8969, 0.3820, 0.4716, 0.5227, 0.4212, 0.1732, 0.3654, 0.5197],
        [0.2489, 0.3316, 0.9528, 0.4103, 0.8053, 0.7263, 0.6859, 0.2286],
        [0.1610, 0.7672, 0.9025, 0.6521, 0.7532, 0.8518, 0.1074, 0.1946],
        [0.1487, 0.1424, 0.1636, 0.0298, 0.8794, 0.1225, 0.1049, 0.1791],
        [0.9035, 0.9541, 0.1282, 0.3487, 0.0459, 0.3504, 0.9695, 0.7530],
        [0.9452, 0.1585, 0.7824, 0.3253, 0.4599, 0.8345, 0.6125, 0.1951],
        [0.0355, 0.0646, 0.2585, 0.0912, 0.4140, 0.2836, 0.4103, 0.2219]])
X_shape = torch.Size([8, 8])
X_shape_added = (1, 1, 8, 8)
X_V = tensor([[[[0.8524, 0.8213, 0.8802, 0.9012, 0.8227, 0.8512, 0.2370, 0.8938],
          [0.8969, 0.3820, 0.4716, 0.5227, 0.4212, 0.1732, 0.3654, 0.5197],
          [0.2489, 0.3316, 0.9528, 0.4103, 0.8053, 0.7263, 0.6859, 0.2286],
          [0.1610, 0.7672, 0.9025, 0.6521, 0.7532, 0.8518, 0.1074, 0.1946],
          [0.1487, 0.1424, 0.1636, 

torch.Size([8, 8])

## 池化层

In [12]:
def pool2d(X, pool_size, mode='max'):
    p_h, p_w = pool_size # 或许池的大小
    Y = torch.zeros((X.shape[0]-p_h+1, X.shape[1]-p_w+1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode == 'max':
                Y[i, j] = X[i:i+p_h, j:j+p_w].max()
            elif mode == 'avg':
                Y[i, j] = X[i:i+p_h, j:j+p_w].mean()
    return Y

In [19]:
X = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]], dtype=torch.float)
print('X =\n', X)
Y = pool2d(X, (2, 2))
print('pool_max =\n', Y)
Y = pool2d(X, (2, 2), 'avg')
print('pool_avg =\n', Y)

X =
 tensor([[0., 1., 2.],
        [3., 4., 5.],
        [6., 7., 8.]])
pool_max =
 tensor([[4., 5.],
        [7., 8.]])
pool_avg =
 tensor([[2., 3.],
        [5., 6.]])
