In [1]:
import torch as t
from torch.utils.data import DataLoader, Dataset
import torchvision as tv
import numpy as np

In [2]:
import torch.nn as nn
import torch.optim as optim

In [3]:
import torch.nn.functional as F

In [9]:
# LeNet
class LeNet(nn.Module):
    def __init__(self) -> None:
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool1 = nn.MaxPool2d((2, 2), 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.pool2 = nn.MaxPool2d((2, 2), 2)

        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

lenet = LeNet()
print(lenet)

LeNet(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [11]:
a = t.randn(4, 3, 32, 32)
print(lenet(a))
print(lenet(a).size())

tensor([[ 0.1799,  0.1288, -0.1012, -0.0370, -0.0976,  0.0205,  0.0730, -0.0452,
         -0.0518,  0.1802],
        [ 0.1684,  0.0957, -0.0824, -0.0627, -0.1079,  0.0107,  0.1005, -0.0478,
         -0.0744,  0.1521],
        [ 0.1737,  0.0926, -0.1028, -0.0461, -0.1016, -0.0061,  0.0869, -0.0634,
         -0.0472,  0.1456],
        [ 0.1836,  0.0970, -0.0927, -0.0479, -0.0869, -0.0044,  0.0805, -0.0489,
         -0.0618,  0.1692]], grad_fn=<AddmmBackward>)
torch.Size([4, 10])


In [4]:
# help(nn.Conv2d)
help(nn.MaxPool2d)

Help on class MaxPool2d in module torch.nn.modules.pooling:

class MaxPool2d(_MaxPoolNd)
 |  MaxPool2d(kernel_size: Union[int, Tuple[int, ...]], stride: Union[int, Tuple[int, ...], NoneType] = None, padding: Union[int, Tuple[int, ...]] = 0, dilation: Union[int, Tuple[int, ...]] = 1, return_indices: bool = False, ceil_mode: bool = False) -> None
 |  
 |  Applies a 2D max pooling over an input signal composed of several input
 |  planes.
 |  
 |  In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
 |  output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)`
 |  can be precisely described as:
 |  
 |  .. math::
 |      \begin{aligned}
 |          out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} \\
 |                                  & \text{input}(N_i, C_j, \text{stride[0]} \times h + m,
 |                                                 \text{stride[1]} \times w + n)
 |      \end{aligned}
 |  
 |  If

In [26]:
# AlexNet
class AlexNet(nn.Module):
    def __init__(self) -> None:
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 96, 11, 4, 2),
            nn.ReLU(),
            nn.MaxPool2d(3, 2),
            nn.Conv2d(96, 256, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d((3, 3), 2),
            nn.Conv2d(256, 384, 3, 1, 1),
            nn.Conv2d(384, 384, 3, 1, 1),
            nn.Conv2d(384, 256, 3, 1, 1),
            nn.MaxPool2d((3, 3), 2)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 1000)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x


alexnet = AlexNet()
print(alexnet)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=(3, 3), stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): MaxPool2d(kernel_size=(3, 3), stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=4096, out_features=4096, bias=True)
    (5): ReLU()
    (6): Dropou

In [37]:
# 查看卷积后形成的张量形状可以这么查看
x = t.rand(size=(1,3,224,224),dtype=t.float32)
for layer in alexnet.features:
  x = layer(x)
  print(layer.__class__.__name__, '\t' , f'{x.shape}')

Conv2d 	 torch.Size([1, 96, 55, 55])
ReLU 	 torch.Size([1, 96, 55, 55])
MaxPool2d 	 torch.Size([1, 96, 27, 27])
Conv2d 	 torch.Size([1, 256, 27, 27])
ReLU 	 torch.Size([1, 256, 27, 27])
MaxPool2d 	 torch.Size([1, 256, 13, 13])
Conv2d 	 torch.Size([1, 384, 13, 13])
Conv2d 	 torch.Size([1, 384, 13, 13])
Conv2d 	 torch.Size([1, 256, 13, 13])
MaxPool2d 	 torch.Size([1, 256, 6, 6])


In [27]:
x = t.randn(4, 3, 224, 224)
print(alexnet(x).shape)

torch.Size([4, 1000])


In [28]:
# help(nn.MaxPool2d)
# help(nn.Conv2d)
# 先定义一个VGG块
def vgg_block(num_conv, ch_in, ch_out):
    '''
    input: 
    num_conv: 卷积层的数目
    ch_in:卷积层的输入通道数
    ch_out:卷积层输出通道数
    output:
    返回VGG模块，格式为nn.Sequential
    ''' 
    layer = []
    for i in range(num_conv):
        layer.append(nn.Conv2d(ch_in, ch_out, 3, 1, 2))
        
    layer.append(nn.MaxPool2d((2, 2), 1))
    return nn.Sequential(*layer)


In [12]:
# VGG

v11 = [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']
v13 = [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']
v16 = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
v19 = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512 , 'M']

VGG_list = {'vgg11':v11, 'vgg13':v13, 'vgg16':v16, 'vgg19':v19}


class VGG(nn.Module):
    def __init__(self, model) -> None:
        super(VGG, self).__init__()
        
        model = VGG_list[model]
        layers = []
        in_cha = 3
        out_cha = 0
        for channel in model:
            if channel == 'M':
                layers.append(nn.MaxPool2d((2, 2), 2))
            else:
                out_cha = channel
                layers.append(nn.Conv2d(in_cha, out_cha, 3, 1, 1))
                layers.append(nn.ReLU())
                in_cha = out_cha
        
        self.features = nn.Sequential(*layers)

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(7 * 7 * 512, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 1000)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

def VGG11():
    return VGG('vgg11')

def VGG13():
    return VGG('vgg13')
def VGG16():
    return VGG('vgg16')
def VGG19():
    return VGG('vgg19')

vgg11 = VGG11()
print(vgg11)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (16): Conv2d(512, 512, kernel_size=(3, 3), stride

In [16]:
# 查看卷积后形成的张量形状可以这么查看
x = t.rand(size=(1,3,224,224),dtype=t.float32)
for layer in vgg11.features:
  x = layer(x)
  print(layer.__class__.__name__, '\t' , f'{x.shape}')

for layer in vgg11.classifier:
  x = layer(x)
  print(layer.__class__.__name__, '\t' , f'{x.shape}')

Conv2d 	 torch.Size([1, 64, 224, 224])
ReLU 	 torch.Size([1, 64, 224, 224])
MaxPool2d 	 torch.Size([1, 64, 112, 112])
Conv2d 	 torch.Size([1, 128, 112, 112])
ReLU 	 torch.Size([1, 128, 112, 112])
MaxPool2d 	 torch.Size([1, 128, 56, 56])
Conv2d 	 torch.Size([1, 256, 56, 56])
ReLU 	 torch.Size([1, 256, 56, 56])
Conv2d 	 torch.Size([1, 256, 56, 56])
ReLU 	 torch.Size([1, 256, 56, 56])
MaxPool2d 	 torch.Size([1, 256, 28, 28])
Conv2d 	 torch.Size([1, 512, 28, 28])
ReLU 	 torch.Size([1, 512, 28, 28])
Conv2d 	 torch.Size([1, 512, 28, 28])
ReLU 	 torch.Size([1, 512, 28, 28])
MaxPool2d 	 torch.Size([1, 512, 14, 14])
Conv2d 	 torch.Size([1, 512, 14, 14])
ReLU 	 torch.Size([1, 512, 14, 14])
Conv2d 	 torch.Size([1, 512, 14, 14])
ReLU 	 torch.Size([1, 512, 14, 14])
MaxPool2d 	 torch.Size([1, 512, 7, 7])
Flatten 	 torch.Size([1, 25088])
Linear 	 torch.Size([1, 4096])
ReLU 	 torch.Size([1, 4096])
Dropout 	 torch.Size([1, 4096])
Linear 	 torch.Size([1, 4096])
ReLU 	 torch.Size([1, 4096])
Dropout 	 tor

In [14]:
x = t.randn(3, 3, 224, 224)
print(vgg11(x).shape)

torch.Size([3, 1000])


In [11]:
# help(nn.Conv2d)
help(nn.MaxPool2d)

Help on class MaxPool2d in module torch.nn.modules.pooling:

class MaxPool2d(_MaxPoolNd)
 |  MaxPool2d(kernel_size: Union[int, Tuple[int, ...]], stride: Union[int, Tuple[int, ...], NoneType] = None, padding: Union[int, Tuple[int, ...]] = 0, dilation: Union[int, Tuple[int, ...]] = 1, return_indices: bool = False, ceil_mode: bool = False) -> None
 |  
 |  Applies a 2D max pooling over an input signal composed of several input
 |  planes.
 |  
 |  In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
 |  output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)`
 |  can be precisely described as:
 |  
 |  .. math::
 |      \begin{aligned}
 |          out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} \\
 |                                  & \text{input}(N_i, C_j, \text{stride[0]} \times h + m,
 |                                                 \text{stride[1]} \times w + n)
 |      \end{aligned}
 |  
 |  If

In [1]:
len([64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'])

21

In [2]:
print(len([1, 2, 3]))

3


In [11]:
# NiN

conv_list = [(11, 96, 4, 0), (5, 256, 1, 2), (3, 384, 1, 1), (3, 10, 1, 1)]

class NiN(nn.Module):
    def __init__(self) -> None:
        super(NiN, self).__init__()
        
        layer = []
        out_c = 0
        in_c = 3

        for item in conv_list:
            H, out_c, stride, pad = item
            layer.append(nn.Conv2d(in_c, out_c, H, stride, pad))
            layer.append(nn.ReLU())
            layer.append(nn.Conv2d(out_c, out_c, 1, 1))
            layer.append(nn.ReLU())
            layer.append(nn.Conv2d(out_c, out_c, 1, 1))
            layer.append(nn.ReLU())
            layer.append(nn.MaxPool2d((3, 3), 2))

            in_c = out_c
            # in_c = out_c / 2

        layer.append(nn.AdaptiveAvgPool2d((1, 1)))
        layer.append(nn.Flatten())
        self.net = nn.Sequential(*layer)
        
    def forward(self, x):
        x = self.net(x)
        return x

nin = NiN()
print(nin)

NiN(
  (net): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
    (3): ReLU()
    (4): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=(3, 3), stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (8): ReLU()
    (9): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (10): ReLU()
    (11): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (12): ReLU()
    (13): MaxPool2d(kernel_size=(3, 3), stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU()
    (16): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
    (17): ReLU()
    (18): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
    (19): ReLU()
    (20): MaxPool2d(kernel_size=(3, 3), stride=2, padding=0, dilati

In [12]:
# 查看卷积后形成的张量形状可以这么查看
x = t.rand(size=(1,3,224,224),dtype=t.float32)
for layer in nin.net:
  x = layer(x)
  print(layer.__class__.__name__, '\t' , f'{x.shape}')

Conv2d 	 torch.Size([1, 96, 54, 54])
ReLU 	 torch.Size([1, 96, 54, 54])
Conv2d 	 torch.Size([1, 96, 54, 54])
ReLU 	 torch.Size([1, 96, 54, 54])
Conv2d 	 torch.Size([1, 96, 54, 54])
ReLU 	 torch.Size([1, 96, 54, 54])
MaxPool2d 	 torch.Size([1, 96, 26, 26])
Conv2d 	 torch.Size([1, 256, 26, 26])
ReLU 	 torch.Size([1, 256, 26, 26])
Conv2d 	 torch.Size([1, 256, 26, 26])
ReLU 	 torch.Size([1, 256, 26, 26])
Conv2d 	 torch.Size([1, 256, 26, 26])
ReLU 	 torch.Size([1, 256, 26, 26])
MaxPool2d 	 torch.Size([1, 256, 12, 12])
Conv2d 	 torch.Size([1, 384, 12, 12])
ReLU 	 torch.Size([1, 384, 12, 12])
Conv2d 	 torch.Size([1, 384, 12, 12])
ReLU 	 torch.Size([1, 384, 12, 12])
Conv2d 	 torch.Size([1, 384, 12, 12])
ReLU 	 torch.Size([1, 384, 12, 12])
MaxPool2d 	 torch.Size([1, 384, 5, 5])
Conv2d 	 torch.Size([1, 10, 5, 5])
ReLU 	 torch.Size([1, 10, 5, 5])
Conv2d 	 torch.Size([1, 10, 5, 5])
ReLU 	 torch.Size([1, 10, 5, 5])
Conv2d 	 torch.Size([1, 10, 5, 5])
ReLU 	 torch.Size([1, 10, 5, 5])
MaxPool2d 	 torc