In [None]:
# 1. AlexNet
import torch
from torch import nn
from d2l import torch as d2l

net=nn.Sequential(
    #输入通道数1，输出通道数96，卷积核窗口大小11，步幅4，填充1
    nn.Conv2d(1,96,kernel_size=11,stride=4,padding=1),
    nn.MaxPool2d(kernel_size=3,stride=2),
    
    #减小卷积窗口，使用填充为2来使得输入与输出的高和宽一致，且增大输出通道数
    nn.Conv2d(96,256,kernel_size=5,padding=2),nn.ReLU(),
    nn.MaxPool2d(kernel_size=3,stride=2),
    
    #使用3个连续卷积层和较小的卷积窗口；除了最后的卷积层，输出通道数量进一步增加；最后一个卷积层后使用池化层
    nn.Conv2d(256,384,kernel_size=3,padding=1),nn.ReLU(),
    nn.Conv2d(384,384,kernel_size=3,padding=1),nn.ReLU(),
    nn.Conv2d(384,256,kernel_size=3,padding=1),nn.ReLu(),
    nn.MaxPool2d(kernel_size=3,stride=2),
    
    #flatten化
    nn.Flatten(),
    
    #2个全连接层，使用dropout层减轻过拟合
    nn.Linear(6400,4096),nn.ReLU(),
    nn.Dropout(p=0.5),
    nn.Linear(4096,4096),nn.ReLu(),
    nn.Dropout(p=0.5)
    
    #输出层
    nn.Linear(4096,10)
)

#构造高度、宽度为224的单通道数据，观察net每一层输出形状
X=torch.randn(1,1,224,224)
for layer in net:
    X=layer(X)
    print(layer.__class__.__name__,'output shape:\t',X.shape)

#读取数据集
batch_size=128
train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size,resize=224)

#训练AlexNet
lr,num_epochs=0.01,10
d2l.train_ch6(net,train_iter,test_iter,num_epochs,lr,d2l.try_gpu())

In [None]:
# 2. VGG
'''
原始VGG网络有5个卷积块，其中前两个块各有一个卷积层，后三个块各包含两个卷积层。 
第一个模块有64个输出通道，每个后续模块将输出通道数量翻倍，直到该数字达到512。
由于该网络使用8个卷积层和3个全连接层，因此它通常被称为VGG-11。
'''

import torch
from torch import nn
from d2l import torch as d2l

#VGG块
def vgg_block(num_convs,in_channels,out_channels):  #输入参数分别为卷积层数量，输入通道数，输出通道数、
    layers=[]
    for i in range(num_convs):
        layers.append(nn.Conv2d(in_channels,out_channels.kernel_size=3,padding=1))
        layers.append(nn.ReLU())
        in_channels=out_channels #VGG块内部，上一层卷积的输出通道数作为下一层的输入通道数
    layers.append(nn.MaxPool2d(kernel_size=2,stride=2))
    return nn.Sequential(*layers)

conv_arch=((1,64),(1,128),(2,256),(2,512),(2,512))

#VGG-11网络
def vgg(conv_arch): #此函数实现vgg-11网络
    conv_blks=[]
    in_channels=1
    for (num_convs,out_channels) in conv_arch:
        conv_blks.append(vgg_block(num_convs,in_channels,out_channels))
        in_channels=out_channels #VGG块之间，上一块最后一层的输出通道数作为下一块第一层的输入通道数
        
    return nn.Sequential(*conv_blks,nn.Flatten(),
                        #全连接层部分
                        nn.Linear(out_channels*7*7,4096),nn.ReLU,nn.Dropout(0.5),
                        nn.Linear(4096,4096),nn.ReLU(),nn.Dropout(0.5),
                        nn.Linear(4096,10))

net=vgg(conv_arch)


In [None]:
# 3. NiN
import torch
from torch import nn
from d2l import torch as d2l

# NiN块
def nin_block(in_channels,out_channels,kernel_size,strides,padding):
    return nn.Sequential(
        #1个普通卷积层
        nn.Conv2d(in_channels,out_channels,kernel_size,strides,padding),nn.ReLU(),
        #2个1x1卷积层
        nn.Conv2d(out_channels,out_channels,kernel_size=1),nn.ReLU(),
        nn.Conv2d(out_channels,out_channels,kernel_size=1),nn.ReLU())

# NiN网络
net=nn.Sequential(
    nin_block(1,96,kernel_size=11,strides=4,padding=0),
    nn.MaxPool2d(3,stride=2),
    nin_block(96,256,kernel_size=5,strides=1,padding=2),
    nn.MaxPool2d(3,strides=2),
    nin_block(256,384,kernel_size=3,strides=1,padding=1),
    nn.MaxPool2d(3,strides=2),
    nn.Dropout(0.5),
    #类别标签数为10
    nin_block(384,10,kernel_size=3,strides=1,padding=1),
    nn.AdaptiveAvgPool2d((1,1)),
    #flatten化，将四维输出转换成二维输出，二维的形状为 (batch_size,10)
    nn.Flatten())

In [None]:
# 4. GoogLeNet
import torch
from  torch import nn
from torch.nn import functional as F
from d2l import torch as d2l

#Inception块
class Iception(nn.Module):
    #构造函数，`c1`--`c4` 是每条路径的输出通道数
    def __init__(self,in_chanels,c1,c2,c3,c4,**kwargs):
        super(Inception,self).__init__(**kwargs)
        #路径1，单1x1卷积层
        self.p1_1=nn.Conv2d(in_chanels,c1,kernel_size=1)
        #路径2，1x1卷积层后接3x3卷积层
        self.p2_1=nn.Conv2d(in_chanels,c2[0],kernel_size=1)
        self.p2_2=nn.Conv2d(c2[0],c2[1],kernel_size=3,padding=1)
        #路径3，1x1卷积层后接5x5卷积层
        self.p3_1=nn.Conv2d(in_chanels,c3[0],kernel_size=1)
        self.p3_2=nn.Conv2d(c3[0],c3[1],kernel_size=5,padding=2)
        #路径4，3x3最大汇聚层后接1x1卷积层
        self.p4_1=nn.MaxPool2d(kernel_size=3,stride=1,padding=1)
        self.p4_2=nn.Conv2d(in_chanels,c4,kernel_size=1)
    
    #模块的前向传播函数
    def forward(self,x): 
        p1=F.relu(self.p1_1(x))
        p2=F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3=F.relu(self.p3_2(F.relu(slef.p3_1(x))))
        p4=F.relu(self.p4_2(self.p4_1(x)))
        #在通道维度上连结输出
        return torch.cat((p1,p2,p3,p4),dim=1)

#GoogLeNet网络
b1=nn.Sequential(nn.Conv2d(1,64,kernel_size=7,stride=2,padding=3),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=3,stride=2,padding=1))
b2=nn.Sequential(Conv2d(64,64,kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(64,192,kernel_size=3,padding=1),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=3,stride=2,padding=1))
b3=nn.Sequential(Inception(192,64,(96,128),(16,32),32),
                Inception(256,128,(128,192),(32,96),64),
                nn.MaxPool2d(kernel_size=3,stride=2,padding=1))
b4=nn.Sequential(Inception(480,192,(96,208),(16,48),64),
                Inception(512,160,(112,224),(24,64),64),
                Inception(512, 128, (128, 256), (24, 64), 64),
                Inception(512, 112, (144, 288), (32, 64), 64),
                Inception(528, 256, (160, 320), (32, 128), 128),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
b5=nn.Sequential(Inception(832,256,(160,320),(32,128),128),
                Inception(832,384,(192,384),(48,128),128),
                nn.AdaptiveAvgPool2d(1,1),
                nn.Flatten())

net=nn.Sequential(b1,b2,b3,b4,b5,nn.Linear(1024,10))

In [None]:
# 5. batch-norm 批量规范化

# 5.1 从零实现批量规范化层
import torch
from torch import nn
from d2l import torch as d2l

#批量规范化算法
def batch_norm(X,gamma,beta,moving_mean,moving_var,eps,momentum):
    if not torch.is_grad_enabled(): #通过 is_grad_enabled 判断当前模式是训练模式还是预测模式
        X_hat=(X-moving_mean)/torch.sqrt(moving_var+eps) #预测模式下，直接使用传入的移动平均所得的均值和方差
    else:
        assert len(X.shape) in (2,4) #通过输入变量X的形状判断当前是作用于全连接层还是卷积层
        if len(X.shape)==2:  #当前输入X作用于全连接层，计算特征维上的均值和方差
            mean=X.mean(dim=0)  
            var=((X=mean)**2).mean(dim=0)
        else: #当前输入X作用于二维卷积层，计算通道维(axis=1)的均值和方差；这里需要保持X的形状以便后面可以做广播运算
            mean=X.mean(dim=(0,2,3),keepdim=True)
            var=((X-mean)**2).mean(dim=(0,2,3),keepdim=True)
        X_hat=(X-mean)/torch.sqrt(var+eps) #训练模式下，用当前均值和方差做标准化
        #更新移动平均的均值和方差
        moving_mean=momentum*moving_mean+(1.0-momentum)*mean
        moving_var=momentum*moving_var+(1.0-momentum)*var
    Y=gamma*X_hat+beta #缩放和移位
    return Y,moving_mean.data,moving_var.data

