Practice for Pytorch usage. Implement a two-layer network to random data

## Pytorch manual

In [2]:
import torch
# We wanna this to run on GPU
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cpu')
# N is batch size; D_in is input dimension
# H is hidden dimension; D_out is output dimension
N, D_in, H, D_out = 64, 1000, 100, 10

In [4]:
# 标准正态分布, 随机生成[0, 1)之间的随机数(张量), 张量的形状由第一个参数sizes定义
# 生成一个64行, 1000列的张量
x = torch.randn(N, D_in)
# print(type(x))
# print(x.shape)
y = torch.randn(N, D_out)
# Randomly initialize weights
# w1: 1000行, 100列
# w2: 100行, 10列
w1 = torch.randn(D_in, H)
w2 = torch.randn(H, D_out)
learning_rate = 1e-6

In [6]:
for t in range(500):
    # Forward pass: predict y
    # x.mm(w1), 矩阵乘法 -> 64行100列
    h = x.mm(w1)
    # clamp(x, min, max), 将张量x限制在[min, max]之间
    # 例如x = torch.arange(12) => [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
    # torch.clamp(x, 2, 10)返回[2, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10]
    # relu函数, if(x < 0): y = 0 else: y = x
    h_relu = h.clamp(min=0)
    # 64行10列, 最终结果
    y_pred = h_relu.mm(w2)
    # Compute and print loss; 计算出来的loss是tensor, 如果只想要其中的value, 可以使用item()
    loss = (y_pred - y).pow(2).sum()
    print(t, loss.item())

    # Backprop to compute gradients of w1 and w2 with respect to loss
    # 首先计算损失对于预测值的梯度, 均方误差损失函数
    grad_y_pred = 2.0 * (y_pred - y)
    # 100行10列, 计算w2的梯度, t()代表矩阵转置
    # 链式法则
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h < 0] = 0
    grad_w1 = x.t().mm(grad_h)

    # update weights using gradient descent
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 28941504.0
1 26217942.0
2 26532180.0
3 26028582.0
4 22966868.0
5 17257316.0
6 11317957.0
7 6731106.0
8 3953307.0
9 2426489.0
10 1620892.875
11 1178934.375
12 918631.375
13 750203.0
14 631000.4375
15 540444.5
16 468327.6875
17 409083.34375
18 359472.375
19 317405.375
20 281360.03125
21 250275.109375
22 223332.90625
23 199865.0
24 179349.8125
25 161342.625
26 145485.375
27 131477.0
28 119083.046875
29 108097.6953125
30 98307.15625
31 89554.5859375
32 81713.6796875
33 74673.8984375
34 68343.125
35 62642.6328125
36 57487.66796875
37 52823.07421875
38 48592.9453125
39 44752.8828125
40 41261.18359375
41 38081.171875
42 35181.53125
43 32532.7265625
44 30110.94921875
45 27894.390625
46 25863.625
47 24002.466796875
48 22294.517578125
49 20723.48046875
50 19277.478515625
51 17942.265625
52 16710.498046875
53 15576.8671875
54 14533.6396484375
55 13569.1015625
56 12675.849609375
57 11848.8662109375
58 11082.025390625
59 10370.3251953125
60 9709.5732421875
61 9095.5703125
62 8524.9130859375
63 79

## Pytorch Autograd