## 张量

In [15]:
# 简单来说，tensor可以看做是一个多维数组，进行数据的存储和运算。常用的张量类型有FloatTensor(实数型)和LongTensor(长整数型)，可以分别存储数据金额索引。初始化张量既可以将数组转化为tensor，也可以直接利用PyTorch自带的函数进行随机初始化。

In [8]:
import torch
import numpy as np

In [5]:
# 一个大小为2*3的实数型张量

In [9]:
a = torch.FloatTensor([[1.2, 3.4, 5], [3, 6, 7.4]]);a.shape

torch.Size([2, 3])

In [10]:
# 一个大小为5*6的实数型张量，每个元素根据正态分布N(0, 1)随机采样

In [11]:
b = torch.randn(5, 6);b

tensor([[ 0.9777,  0.0945,  2.2145, -0.9826,  0.7154, -0.1066],
        [ 0.4646,  0.9623, -0.6247,  0.5775, -0.0253,  0.4498],
        [-1.2845, -0.4911,  0.0126,  0.2117, -0.4164,  0.3978],
        [-0.2474,  0.4484, -0.7149, -0.8894,  2.7722,  0.2696],
        [-0.5786,  0.2264,  0.6081,  0.0043,  0.7896,  1.2771]])

In [12]:
# 将a第0行第2列的元素改为5.6

In [13]:
a[0, 2] = 5.6

In [14]:
a

tensor([[1.2000, 3.4000, 5.6000],
        [3.0000, 6.0000, 7.4000]])

## 计算与求导

In [3]:
# PyTorch框架最大的好处在于可以根据张量的运算动态生成计算图(computation graph)，从而进行自动微分和求导。在PyTorch中，如果一个张量中的参数要计算导数，需要设置该张量的requires_grad属性为True。

In [13]:
x = np.array([1.3, 0.5, 1.9, 2.45])
print('np.array:', x)
t = torch.tensor(x, requires_grad=True)
print('tensor:', t)

np.array: [1.3  0.5  1.9  2.45]
tensor: tensor([1.3000, 0.5000, 1.9000, 2.4500], dtype=torch.float64, requires_grad=True)


In [32]:
a = torch.ones(1);a

tensor([1.])

In [33]:
a = a.cuda()  # 将a放入GPU，本机无N卡

RuntimeError: Cannot initialize CUDA without ATen_cuda library. PyTorch splits its backend into two shared libraries: a CPU library and a CUDA library; this error has occurred because you are trying to use some CUDA functionality, but the CUDA library has not been loaded by the dynamic linker for some reason.  The CUDA library MUST be loaded, EVEN IF you don't directly use any symbols from the CUDA library! One common culprit is a lack of -Wl,--no-as-needed in your link arguments; many dynamic linkers will delete dynamic library dependencies if you don't depend on any of their symbols.  You can check if this has occurred by using ldd on your binary to see if there is a dependency on *_cuda.so library.

In [34]:
a.requires_grad

False

In [35]:
# 设置a需要计算导数

In [36]:
a.requires_grad = True

In [37]:
a.requires_grad

True

In [38]:
b = torch.ones(1)

In [39]:
x = 3 * a + b

In [40]:
x.requires_grad  # 因为a需要计算导数，所以x需要计算导数

True

In [41]:
# 计算所有参数的导数

In [42]:
x.backward()

In [43]:
# 计算a的导数，为3

In [44]:
a.grad

tensor([3.])

## 网络层

In [45]:
# PyTorch的软件包nn中包含了绝大部分常用的网络结构。需要注意的是，在实际网络计算中需要将数据分批进行处理，一个批次的数据可以统一计算并求导，从而大大加快运算速度。因此，PyTorch框架中许多网络的输入默认有一维是批次大小，即batch_size。

### 全连接层

In [46]:
# PyTorch中使用nn.Linear命令实现两层神经元之间的全连接。nn.Linear(in_feature, out_feature, bias=True)表示前一层有in_feature个神经元，下一层有out_feature个神经元，bias表示是否需要截距（默认为True）。一个nn.Linear中有in_feature*out_feature+out_feature个权重（包括截距）。全连接层的输入张量需要最后一维大小是in_feature，输出张量最后一维大小是out_feature。

In [49]:
import torch
from torch import nn

In [48]:
# 四层神经网络，输入层大小为30，两个隐藏层大小分别为50和70，输出层大小为1

In [50]:
linear1 = nn.Linear(in_features=30, out_features=50)
linear2 = nn.Linear(in_features=50, out_features=70)
linear3 = nn.Linear(in_features=70, out_features=1)

In [51]:
# 10组输入数据作为一批次（batch），每个输入为30维

In [55]:
x = torch.randn(10, 30);x.shape

torch.Size([10, 30])

In [56]:
# 10组输出数据，每一个输出为1维

In [57]:
res = linear3(linear2(linear1(x)))

In [58]:
res

tensor([[ 0.0313],
        [ 0.3675],
        [ 0.2525],
        [ 0.3356],
        [-0.0732],
        [ 0.7092],
        [ 0.4141],
        [ 0.1981],
        [ 0.2590],
        [ 0.1702]], grad_fn=<ThAddmmBackward>)

### 丢弃

In [59]:
# PyTorch中使用nn.Dropout命令实现Dropout层。nn.Dropout(p=0.3)表示Dropout置零概率为p=0.3（默认为0.5）。其输入可以是任意维的tensor。

In [60]:
layer = nn.Dropout(p=0.3)  # Dropout层，置零概率为0.1
input = torch.randn(5, 2)
print('input: {}'.format(input))
output = layer(input)
print('input_dropout: {}'.format(output))  # 可以看到有的数被置为0了

input: tensor([[-0.8905, -0.7364],
        [-0.0502, -0.7841],
        [-2.2181,  1.4047],
        [-0.1035,  0.7474],
        [-0.4884, -1.3764]])
input_dropout: tensor([[-1.2721, -1.0520],
        [-0.0000, -1.1201],
        [-3.1687,  2.0067],
        [-0.1479,  1.0678],
        [-0.6977, -0.0000]])


## CNN

In [61]:
# PyTorch中使用nn.Conv2d命令实现卷积神经网络。nn.Conv2d(in_channels, out_channels, kernel_size, bias=True)表示有in_channels个输入通道、out_channels个输出通道，过滤器大小为kernel_size*kernel_size，bise表示卷积中是否需要加截距。输入维度batch*in_channels*height*width的张量，输出为维度是batch*out_channels*height_out_width_out的张量。

In [62]:
# 卷积神经网络，输入通道有1个，输出通道有3个，过滤器大小为5

In [63]:
conv = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=5)

In [64]:
# 10个输入数据作为一批次(batch)，每一个输入为单通道32*32矩阵

In [68]:
x = torch.randn(10, 1, 32, 32);x.shape

torch.Size([10, 1, 32, 32])

In [69]:
# y维度为10*3*28*28，表示输出10组数据，每一个输出为3通道28*28矩阵（28=32-5+1）。

In [72]:
y = conv(x); y.shape

torch.Size([10, 3, 28, 28])

## RNN

In [73]:
# PyTorch中使用nn.GRU命令可以实现基于GRU的循环神经网络，使用nn.LSTM命令可以实现基于LSTM的循环神经网络。
# 以GRU为例，nn.GRU(input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0, bidirectional=False)，
# 表示输入的每个元素xt的维度是input_size，状态ht的维度是hidden_size，一共有num_layers层（默认为1层），bias表示是否需要加截距（默认为True），batch_first表示batch的维度是否为第0维（默认为第1维）、Dropout的概率（默认是0）、是否是双向RNN（默认是False）。

In [77]:
# 双层GRU输入元素维度是10，状态维度是20，batch是第1维

In [79]:
rnn = nn.GRU(input_size=10, hidden_size=20, num_layers=2)

In [80]:
# 一批次共3个序列，每个序列长度为5，维度为10，注意batch是第1维的

In [83]:
x = torch.randn(5, 3, 10); x

tensor([[[ 1.6564,  0.8530, -1.7247,  1.3092,  1.4854, -0.1342, -0.2576,
           0.2223, -0.3561,  1.0511],
         [ 0.0946, -0.0584, -0.8242, -0.5478, -1.1778, -0.2225, -1.2146,
          -1.2610,  0.7019,  0.9740],
         [-0.5392,  0.4369,  1.0690,  1.8458,  1.1952,  0.3555,  0.5510,
          -0.0762, -2.2188, -0.7653]],

        [[ 1.5738, -1.2145,  0.0223, -0.3202,  1.3587,  0.1080, -1.7904,
          -1.0863, -1.0665,  1.2561],
         [ 0.4089, -0.9534, -0.1992,  1.5218, -0.9697,  1.1563, -0.2080,
           1.4709,  0.3636,  0.3639],
         [ 0.0107, -1.4033,  0.5778,  0.4441, -0.0658, -0.8724, -0.6508,
          -1.0449,  0.0564,  0.2961]],

        [[-1.8203, -0.0227,  0.6892,  0.0112,  0.6261,  0.0274, -0.3538,
          -1.4725, -0.3154,  1.0137],
         [ 0.4392, -0.4874,  0.1536,  0.5325,  1.7367, -0.5537,  1.2015,
           0.4060, -0.2801,  0.3332],
         [-1.7493, -0.4187, -0.5030,  0.6437,  0.3462, -0.7813, -0.8125,
           0.8259,  0.0360,  0.0008

In [84]:
# 初始状态，共3个序列，2层，维度是20

In [86]:
h0 = torch.randn(2, 3, 20); h0

tensor([[[-0.1467, -0.6165,  0.2578, -0.3161, -0.1763, -0.1667,  1.3993,
          -0.0647,  0.2494, -0.2049,  0.1499,  1.3912, -0.2295, -0.2568,
          -0.8759,  0.7645, -0.2468,  0.6890,  0.6483,  1.3278],
         [-1.0142, -1.5736,  0.5107,  2.0680,  1.2704,  0.6716,  0.1626,
          -0.8759, -0.6459, -0.9180, -0.2725, -0.7175,  0.5239, -0.2173,
          -1.3671,  0.5428,  0.0473,  0.0568,  0.9350,  1.0221],
         [ 1.2724,  0.9895,  1.1697,  1.1275,  0.3325, -0.3914, -0.0722,
           0.4492, -1.0444, -1.1348,  1.1292, -1.3776,  0.3336, -0.5393,
          -0.9897,  0.0647,  1.1800, -0.3506, -0.8704, -1.6343]],

        [[-0.5512,  1.7977,  0.9653,  0.3691, -0.4722,  1.8211,  0.2733,
           0.0794, -1.1279, -0.2973, -0.0541, -1.4686,  1.1699,  0.5713,
          -0.2022, -1.0224, -0.1765,  1.0786, -0.6800,  0.2373],
         [-0.5898, -1.2643,  0.2995,  0.5907,  0.9595, -0.8869, -0.6807,
          -0.7182,  0.4525, -1.4235,  1.8818, -0.4123, -0.0281, -1.1159,
        

In [87]:
# output是所有的RNN状态，大小为5*3*20；hn大小为2*3*20，为RNN最后一个状态

In [88]:
output, hn = rnn(x, h0)

In [89]:
output

tensor([[[-0.5584,  0.7450,  0.7515,  0.2207, -0.3389,  0.5826, -0.0202,
          -0.0991, -0.3450,  0.1980,  0.1280, -0.5844,  0.7615,  0.2036,
           0.2498, -0.5843,  0.0876,  0.6018, -0.1724,  0.3312],
         [-0.4281, -1.1675,  0.0453,  0.2297,  0.6998, -0.4277, -0.5196,
          -0.2244,  0.1568, -0.9400,  0.8813, -0.0868, -0.1761, -0.4002,
           0.6112,  1.0781, -0.2811,  0.6485,  0.8611,  0.3393],
         [-0.3270, -0.5938,  0.4756, -0.2269,  0.6389,  0.2558,  0.0614,
           0.9865, -0.0110,  0.3368,  0.5804, -0.3555,  0.4066, -0.3482,
           0.6340, -0.7100, -0.6215,  0.9279, -0.2670, -0.9842]],

        [[-0.4218,  0.2798,  0.5486,  0.1766, -0.1472,  0.1510, -0.1905,
          -0.2109,  0.0078,  0.2845,  0.1491, -0.1828,  0.5647,  0.0087,
           0.3208, -0.3091,  0.1983,  0.4715,  0.2120,  0.3195],
         [-0.3029, -0.8928,  0.0128,  0.0824,  0.4381, -0.2034, -0.3533,
           0.0441,  0.1389, -0.5956,  0.4432,  0.0450, -0.2927, -0.2549,
        

In [90]:
hn

tensor([[[-0.0857,  0.1412, -0.2785,  0.1061,  0.1241, -0.1227, -0.0866,
           0.0898,  0.0458,  0.0079, -0.0340,  0.1254, -0.1609, -0.2855,
           0.1338, -0.2518,  0.0936, -0.2039,  0.1216, -0.1918],
         [ 0.2979, -0.3138,  0.2738,  0.0807, -0.0743,  0.3784, -0.0882,
           0.0910,  0.5830,  0.3327, -0.4801,  0.1771, -0.5598,  0.2996,
           0.3533,  0.0559,  0.5862, -0.2444, -0.0503, -0.5810],
         [ 0.1657,  0.2654, -0.1810,  0.0502,  0.0726, -0.0892,  0.1264,
           0.0524, -0.0699,  0.0268, -0.0855, -0.2725,  0.0741, -0.0909,
          -0.1576, -0.0796, -0.0048, -0.1826,  0.0671,  0.0258]],

        [[-0.1765, -0.0334,  0.2632,  0.2015,  0.0668, -0.0052, -0.0894,
          -0.2180,  0.1517,  0.2934,  0.1580,  0.0670, -0.0667,  0.0823,
           0.1841,  0.0103,  0.0151,  0.4317,  0.2388,  0.0480],
         [ 0.0221,  0.0946,  0.2801,  0.0931,  0.3072,  0.0321,  0.0091,
           0.1027,  0.1712,  0.1340,  0.0630,  0.1989, -0.3528, -0.0484,
        

## 自定义网络模块

In [91]:
# 在PyTorch开发中，我们经常需要实现一个新的网络结构，例如全连接层后接RNN层再接全连接层。我们希望所有的计算、求导都能在定制的网络上进行。为此PyTorch提供了一个简洁的工具————将定制的网络写成一个类，它需要继承基类nn.Module并实现其中的构造函数和前向计算函数forward。PyTorch将根据前向计算得到网络的结构，然后自动实现反向传播求导的过程backward。

### 定义网络结构

In [92]:
# 在下面的代码中，我们实现了一个简单的网络FirstNet，它由一个全连接层、Dropout层和RNN层组成。

In [2]:
import torch
import torch.nn as nn

In [3]:
# 自定制网络为一个class，继承nn.Module基类

In [4]:
class FirstNet(nn.Module):
    # 构造函数，输入数据的维度input_dim，全连接后的维度rnn_dim，RNN状态的维度state_dim
    def __init__(self, input_dim, rnn_dim, state_dim):
        super(FirstNet, self).__init__()  # 调用父类nn.Module.__init__()方法
        self.linear = nn.Linear(in_features=input_dim, out_features=rnn_dim)  # 全连接层
        self.dropout = nn.Dropout(p=0.3)  # Dropout层，置零概率为0.3
        self.rnn = nn.GRU(input_size=rnn_dim, hidden_size=state_dim, batch_first=True)  # 单层单向GRU

    # 前向计算函数，x大小为batch*seq_len*input_dim，为长度是seq_len的输入序列
    def forward(self, x):
        rnn_input = self.dropout(self.linear(x))  # 对全连接层的输出进行dropout，结果维度为batch*seq_len*rnn_dim
        _, hn = self.rnn(rnn_input)  # GRU的最后一个状态，大小为1*batch*state_dim
        # 交换第0、1维，输出维度为batch*1*state_dim
        return hn.transpose(0, 1)

In [6]:
# 获取网络实例
net = FirstNet(input_dim=10, rnn_dim=20, state_dim=15)
x = torch.randn(3, 5, 10)  # batch是第0维，每个序列长度5，维度为10。具体为：共3个batch，每个batch中有5个长度为10的序列
res = net(x); res

tensor([[[ 0.5904,  0.3055,  0.4063,  0.0102, -0.3069, -0.0033,  0.2001,
           0.0980,  0.0308,  0.2591, -0.3474, -0.0509,  0.4294, -0.3988,
          -0.1944]],

        [[ 0.2186,  0.5173,  0.4278,  0.2601,  0.1865,  0.2927, -0.1015,
          -0.0396,  0.6598,  0.1779,  0.4690,  0.1746, -0.3549,  0.2339,
           0.5734]],

        [[-0.2472,  0.1883, -0.0074, -0.2397, -0.1368,  0.0363, -0.2424,
          -0.1632, -0.0043,  0.1990, -0.1810,  0.0441, -0.5758,  0.2696,
          -0.1659]]], grad_fn=<TransposeBackward0>)

In [7]:
res.shape

torch.Size([3, 1, 15])

In [9]:
# 现在给FirstNet加入训练任务和损失函数。这个机器学习任务是进行回归，即对每个序列输出一个实数预测它的某种性质。损失函数时均方差。训练数据有n个序列及对应的真值y。我们不需要在代码中实现backward函数，因为PyTorch会自动根据当前批次动态地得到计算图，并计算出每个参数的导数。每次计算导数前必须执行导数清零函数zero_grad()，因为PyTorch中张量的导数不会在一个批次计算后自动归零。此外，PyTorch的train()和eval()函数可以将网络设置成训练模式和测试模型，并根据设定的模式对Dropout进行处理。

In [10]:
import torch.optim as optim

In [11]:
net.train()  # 将FirstNet置为训练模式（启用Dropout）

FirstNet(
  (linear): Linear(in_features=10, out_features=20, bias=True)
  (dropout): Dropout(p=0.3)
  (rnn): GRU(20, 15, batch_first=True)
)

In [12]:
# net.cuda()  # 如果有GPU，执行此语句将FirstNet的参数放入GPU

In [13]:
# 随机定义训练数据

In [14]:
x = torch.randn(30, 5, 10)  # 共30个序列，每个序列长度为5，维度为10

In [16]:
y = torch.randn(30, 1)

In [20]:
# 随机梯度下降SGD优化器，学习率0.01

In [22]:
optimizer = optim.SGD(net.parameters(), lr=0.01)
for batch_id in range(10):
    # 获取当前批次的数据，batch_size=3
    x_now = x[batch_id * 3: (batch_id + 1) * 3]
    y_now = y[batch_id * 3: (batch_id + 1) * 3]
    res = net(x_now)  # RNN结果res，维度为3*1*15
    y_hat, _ = torch.max(res, dim=2)  # Max-pooling预测张量y_hat，维度3*1
    # 均方差损失函数
    loss = torch.sum(((y_now - y_hat) ** 2.0)) / 3
    optimizer.zero_grad()  # 将net中的所有张量的导数清零
    loss.backward()  # 自动实现反向传播
    optimizer.step()  # 按优化器的规则沿导数反方向移动每个参数
net.eval()  # 训练完成后，将FirstNet置为测试模式（即Dropout不置零，也不删除神经元）
y_pred = net(x)  # 获得测试模式下的输出