# 卷积神经网络Resnet

In [1]:
# 导入必要的工具包
import torch 
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
import matplotlib.pyplot as plt
%matplotlib inline
import re
import collections
import random
import math

## 实践

接下来，我们要借助深度学习框架PyTorch实现一个残差网络，并基于此完成图像分类任务。\
残差网络是典型的卷积神经网络，由许多卷积层和汇聚层堆叠而成；其独特的思想在于，将卷积神经网络的输入与输出相加，即残差连接。所以，在实现残差网络时，需要注意的保证输入和输出的维度（宽，高和通道）是相同的，零填充和1×1卷积会帮我们完成这些任务！

In [2]:
#  残差连接, 输入和输出的维度有时是相同的, 有时是不同的, 所以需要 use_1x1conv来判断是否需要 
class Residual(nn.Module):  
    def __init__(self, input_channels, num_channels, use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, num_channels, kernel_size=3, padding=1, stride=strides)
        self.conv2 = nn.Conv2d(num_channels, num_channels, kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(input_channels, num_channels, kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        # 批量归一化层，将会在第7章讲到    
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.bn2 = nn.BatchNorm2d(num_channels)

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return F.relu(Y)
    
# 残差网络是由几个不同的残差块组成的
def resnet_block(input_channels, num_channels, num_residuals, first_block=False):
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.append(Residual(input_channels, num_channels,
                                use_1x1conv=True, strides=2))
        else:
            blk.append(Residual(num_channels, num_channels))
    return blk


class ResNet(nn.Module):
    def __init__ (self, num_classes):
        super().__init__()
        
        self.b1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
                   nn.BatchNorm2d(64), nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        
        self.b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
        self.b3 = nn.Sequential(*resnet_block(64, 128, 2))
        self.b4 = nn.Sequential(*resnet_block(128, 256, 2))
        self.b5 = nn.Sequential(*resnet_block(256, 512, 2))
        self.head = nn.Sequential(nn.AdaptiveAvgPool2d((1,1)), nn.Flatten(), nn.Linear(512, num_classes))
        
    def forward(self, x):
        net = nn.Sequential(self.b1, self.b2, self.b3, self.b4, self.b5, self.head)
        
        return net(x)

现在, 让我们构建用于图片分类任务的训练数据集。我们使用CIFAR-10数据集，该数据集包含10个种类、共60000幅图像，我们仅使用1000张图像

In [3]:
import os

def read_csv_labels(fname):
    """读取fname来给标签字典返回一个文件名"""
    with open(fname, 'r') as f:
        # 跳过文件头行(列名)
        lines = f.readlines()[1:]
    tokens = [l.rstrip().split(',') for l in lines]
    return dict(((name, label) for name, label in tokens))

class CIFAR10Dataset(Dataset):
    def __init__(self, folder_path, fname):
        self.labels = read_csv_labels(os.path.join(folder_path,fname))
        self.folder_path = os.path.join(folder_path, 'train')
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        img = read_image(self.folder_path + '/' + str(idx+1) + '.png')
        label = self.labels[str(idx+1)]
        
        return img, torch.tensor(int(label))
    

实例化CIFAR10数据集，并指定批量大小

In [4]:
batch_size = 20
train_data = CIFAR10Dataset('cifar10_tiny', 'trainLabels.csv')
train_iter = DataLoader(train_data, batch_size=batch_size)

In [5]:
import torch

class Runner(object):
    def __init__(self, model, optimizer, loss_fn, metric=None):
        self.model = model
        self.optimizer = optimizer
        self.loss_fn = loss_fn
        # 用于计算评价指标
        self.metric = metric
        
        # 记录训练过程中的评价指标变化
        self.dev_scores = []
        # 记录训练过程中的损失变化
        self.train_epoch_losses = []
        self.dev_losses = []
        # 记录全局最优评价指标
        self.best_score = 0
   
 
# 模型训练阶段
    def train(self, train_loader, dev_loader=None, **kwargs):
        # 将模型设置为训练模式，此时模型的参数会被更新
        self.model.train()
        
        num_epochs = kwargs.get('num_epochs', 0)
        log_steps = kwargs.get('log_steps', 100)
        save_path = kwargs.get('save_path','best_model.pth')
        eval_steps = kwargs.get('eval_steps', 0)
        # 运行的step数，不等于epoch数
        global_step = 0
        
        if eval_steps:
            if dev_loader is None:
                raise RuntimeError('Error: dev_loader can not be None!')
            if self.metric is None:
                raise RuntimeError('Error: Metric can not be None')
                
        # 遍历训练的轮数
        for epoch in range(num_epochs):
            total_loss = 0
            # 遍历数据集
            for step, data in enumerate(train_loader):
                x, y = data
                logits = self.model(x.float())
                loss = self.loss_fn(logits, y.long())
                total_loss += loss
                if step%log_steps == 0:
                    print(f'loss:{loss.item():.5f}')
                    
                loss.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()
            # 每隔一定轮次进行一次验证，由eval_steps参数控制，可以采用不同的验证判断条件
            if eval_steps != 0 :
                if (epoch+1) % eval_steps ==  0:

                    dev_score, dev_loss = self.evaluate(dev_loader, global_step=global_step)
                    print(f'[Evalute] dev score:{dev_score:.5f}, dev loss:{dev_loss:.5f}')
                
                    if dev_score > self.best_score:
                        self.save_model(f'model_{epoch+1}.pth')
                    
                        print(f'[Evaluate]best accuracy performance has been updated: {self.best_score:.5f}-->{dev_score:.5f}')
                        self.best_score = dev_score
                    
                # 验证过程结束后，请记住将模型调回训练模式   
                    self.model.train()
            
            global_step += 1
            # 保存当前轮次训练损失的累计值
            train_loss = (total_loss/len(train_loader)).item()
            self.train_epoch_losses.append((global_step,train_loss))
        self.save_model(f'{save_path}.pth')   
        print('[Train] Train done')
        
    # 模型评价阶段
    def evaluate(self, dev_loader, **kwargs):
        assert self.metric is not None
        # 将模型设置为验证模式，此模式下，模型的参数不会更新
        self.model.eval()
        global_step = kwargs.get('global_step',-1)
        total_loss = 0
        self.metric.reset()
        
        for batch_id, data in enumerate(dev_loader):
            x, y = data
            logits = self.model(x.float())
            loss = self.loss_fn(logits, y.long()).item()
            total_loss += loss 
            self.metric.update(logits, y)
            
        dev_loss = (total_loss/len(dev_loader))
        self.dev_losses.append((global_step, dev_loss))
        dev_score = self.metric.accumulate()
        self.dev_scores.append(dev_score)
        return dev_score, dev_loss
    
    # 模型预测阶段，
    def predict(self, x, **kwargs):
        self.model.eval()
        logits = self.model(x)
        return logits
    
    # 保存模型的参数
    def save_model(self, save_path):
        torch.save(self.model.state_dict(), save_path)
        
    # 读取模型的参数
    def load_model(self, model_path):
        self.model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))

## 模型训练

In [6]:
num_classes = 10
# 定义模型
model = ResNet(num_classes)
# 定义损失函数
loss_fn = F.cross_entropy
# 定义优化器
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

runner = Runner(model, optimizer, loss_fn, metric=None)
runner.train(train_iter, num_epochs=10, save_path='chapter_5')

loss:2.30708
loss:3.13586
loss:2.84148
loss:2.90006
loss:2.60240
loss:2.37752
loss:1.88666
loss:2.02436
loss:1.90529
loss:1.76597
[Train] Train done


## 模型预测
    
   模型预测的方法与上一章完全相同，请大家自己尝试。
   在Cifar-10中，十个类的对应关系为 0: 飞机、1: 汽车、2: 鸟类、3: 猫、4: 鹿、5: 狗、6: 青蛙、7: 马、8: 船、9:卡车

## 循环神经网络
   与之前的模型有所不同，循环神经网络引入了隐藏状态和时间步两个新概念。当前时间步的隐藏状态由当前时间的输入与上一个时间步的隐藏状态一起计算出。
   根据隐藏状态的计算公式，需要计算两次矩阵乘法和三次加法才能得到当前时刻的隐藏状态。我们通过代码说明: 该计算公式等价于将当前时刻的输入与上一个时间步的隐藏状态做拼接，将两个权重矩阵做拼接，然后对两个拼接后的结果做矩阵乘法。此处展示我们省略了偏置项。

In [7]:
# X为模拟的输入，H为模拟的隐藏状态，在实际情况时要更复杂一些
X, W_xh = torch.normal(0, 1, (3, 1)), torch.normal(0, 1, (1, 4))
H, W_hh = torch.normal(0, 1, (3, 4)), torch.normal(0, 1, (4, 4))
torch.matmul(X, W_xh) + torch.matmul(H, W_hh)

tensor([[-3.3222,  2.6943,  1.1887,  0.6252],
        [-0.8495, -0.1767,  0.0372, -0.2811],
        [ 8.3495, -2.9154, -0.2664,  2.7573]])

上面是按照公式计算得到的结果，下面是拼接后计算得到的结果，可以看出两个结果完全相同

In [8]:
torch.matmul(torch.cat((X, H), 1), torch.cat((W_xh, W_hh), 0))

tensor([[-3.3222,  2.6943,  1.1887,  0.6252],
        [-0.8495, -0.1767,  0.0372, -0.2811],
        [ 8.3495, -2.9154, -0.2664,  2.7573]])

现在，让我们实现一个简单的循环神经网络的例子。
从下面的函数中，我们可以看到，在循环神经网络中，需要遍历时间步，并保存每一步都输出。

In [9]:
def rnn(inputs, state, params):
    # inputs的形状：(时间步数量，批量大小，词表大小)
    W_xh, W_hh, b_h, W_hq, b_q = params
    H = state
    outputs = []
    # X的形状：(批量大小，词表大小)
    for X in inputs:
        H = torch.tanh(torch.mm(X, W_xh) + torch.mm(H, W_hh) + b_h)
        Y = torch.mm(H, W_hq) + b_q
        outputs.append(Y)
    return torch.cat(outputs, dim=0), (H,)

In [10]:
inputs=torch.rand(10,3,50)
params=[torch.rand((50,50)),torch.rand((50,50)),torch.rand((3,50)),torch.rand((50,60)),torch.rand((3,60))]
state=torch.rand((3,50))
output=rnn(inputs,state,params)
print(output)

(tensor([[23.5445, 29.6079, 23.8511,  ..., 27.4512, 22.2955, 26.7673],
        [23.6436, 29.7690, 23.3254,  ..., 27.7294, 22.4751, 27.1660],
        [23.6796, 29.2678, 23.2866,  ..., 27.4755, 21.6524, 27.6663],
        ...,
        [23.5445, 29.6079, 23.8511,  ..., 27.4512, 22.2955, 26.7673],
        [23.6436, 29.7690, 23.3254,  ..., 27.7294, 22.4751, 27.1660],
        [23.6796, 29.2678, 23.2866,  ..., 27.4755, 21.6524, 27.6663]]), (tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1

   在循环神经网络的训练中，当时间步较大时，可能导致数值不稳定， 例如梯度爆炸或梯度消失，所以一个很重要的步骤是梯度裁剪。通过下面的函数，梯度范数永远不会超过给定的阈值， 并且更新后的梯度完全与的原始方向对齐。

In [11]:
def grad_clipping(net, theta): 
    if isinstance(net, nn.Module):
        params = [p for p in net.parameters() if p.requires_grad]
    else:
        params = net.params
    norm = torch.sqrt(sum(torch.sum((p.grad ** 2)) for p in params))
    if norm > theta:
        for param in params:
            param.grad[:] *= theta / norm

使用pytorch构建简单的RNN网络并训练

In [12]:
# 定义RNN模型
class Rnn(nn.Module):
    def __init__(self, input_size):
        super(Rnn, self).__init__()
        # 定义RNN网络
        ## hidden_size是自己设置的，取值都是32,64,128这样来取值
        ## num_layers是隐藏层数量，超过2层那就是深度循环神经网络了
        self.rnn = nn.RNN(
                input_size=input_size,
                hidden_size=32,
                num_layers=1,
                batch_first=True  # 输入形状为[批量大小, 数据序列长度, 特征维度]
                )
        # 定义全连接层
        self.out = nn.Linear(32, 1)

    # 定义前向传播函数
    def forward(self, x, h_0):
        r_out, h_n = self.rnn(x, h_0)
        # print("数据输出结果；隐藏层数据结果", r_out, h_n)
        # print("r_out.size()， h_n.size()", r_out.size(), h_n.size())
        outs = []
        # r_out.size=[1,10,32]即将一个长度为10的序列的每个元素都映射到隐藏层上
        for time in range(r_out.size(1)):  
            # print("映射", r_out[:, time, :])
            # 依次抽取序列中每个单词,将之通过全连接层并输出.r_out[:, 0, :].size()=[1,32] -> [1,1]
            outs.append(self.out(r_out[:, time, :])) 
            # print("outs", outs)
        # stack函数在dim=1上叠加:10*[1,1] -> [1,10,1] 同时h_n已经被更新
        return torch.stack(outs, dim=1), h_n 

In [13]:
TIME_STEP = 10
INPUT_SIZE = 1
LR = 0.02
model = Rnn(INPUT_SIZE)
print(model)

Rnn(
  (rnn): RNN(1, 32, batch_first=True)
  (out): Linear(in_features=32, out_features=1, bias=True)
)


In [14]:
# 此处使用的是均方误差损失
import numpy as np
loss_func = nn.MSELoss()  
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

h_state = None  # 初始化h_state为None

for step in range(300):
    # 人工生成输入和输出,输入x.size=[1,10,1],输出y.size=[1,10,1]
    start, end = step * np.pi, (step + 1)*np.pi
    # np.linspace生成一个指定大小，指定数据区间的均匀分布序列，TIME_STEP是生成数量
    steps = np.linspace(start, end, TIME_STEP, dtype=np.float32) 
    # print("steps", steps)
    x_np = np.sin(steps)
    y_np = np.cos(steps)
    # print("x_np,y_np", x_np, y_np)
    # 从numpy.ndarray创建一个张量 np.newaxis增加新的维度
    x = torch.from_numpy(x_np[np.newaxis, :, np.newaxis])
    y = torch.from_numpy(y_np[np.newaxis, :, np.newaxis])
    # print("x,y", x,y)

    # 将x通过网络,长度为10的序列通过网络得到最终隐藏层状态h_state和长度为10的输出prediction:[1,10,1]
    prediction, h_state = model(x, h_state)
    h_state = h_state.data  
    # 这一步只取了h_state.data.因为h_state包含.data和.grad 舍弃了梯度
    # print("precision, h_state.data", prediction, h_state)
    # print("prediction.size(), h_state.size()", prediction.size(), h_state.size())
    
    # 反向传播
    loss = loss_func(prediction, y)
    optimizer.zero_grad()
    loss.backward()
    # 更新优化器参数
    optimizer.step()

In [15]:
# 对最后一次的结果作图查看网络的预测效果
plt.plot(steps, y_np.flatten(), 'r-')
plt.plot(steps, prediction.data.numpy().flatten(), 'b-')
plt.show()

: 