导入 pytorch 库

In [12]:
import torch

生成张量矩阵

In [28]:
height = 4
width = 5
type = torch.float
x = torch.zeros(height, width, dtype = type)
x = torch.rand(height, width)
print(x.new_ones(height, width))
print(torch.randn_like(x, dtype = torch.float))
print(x.size(), x.shape)        # 返回 height, width

tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])
tensor([[ 0.0547,  0.5870, -0.5169, -1.3087, -0.7692],
        [ 0.4217,  0.1433, -1.5285, -0.1153, -0.0694],
        [-1.9810, -1.5888, -0.7454, -0.6610, -1.0191],
        [ 2.1525,  0.4409,  0.3594, -1.9272,  1.5251]])
torch.Size([4, 5]) torch.Size([4, 5])


利用 view 函数重塑张量

In [29]:
print(x.view(20))
print(x.view(2, 10))

tensor([0.5915, 0.8367, 0.9751, 0.4737, 0.1089, 0.5773, 0.0857, 0.8377, 0.7627,
        0.7098, 0.1919, 0.6099, 0.2173, 0.6880, 0.1656, 0.7736, 0.1788, 0.1442,
        0.7041, 0.6251])
tensor([[0.5915, 0.8367, 0.9751, 0.4737, 0.1089, 0.5773, 0.0857, 0.8377, 0.7627,
         0.7098],
        [0.1919, 0.6099, 0.2173, 0.6880, 0.1656, 0.7736, 0.1788, 0.1442, 0.7041,
         0.6251]])


ndarray 和 tensor 相互转化

In [None]:
torch.from_numpy(tensor)    # tensor 转 ndarray

使用 CUDA (CPU 和 GPU 操作)

In [31]:
x = torch.rand(height, width)
if torch.cuda.is_available():
    device = torch.device("cuda")
    # 搬入 GPU 的两种方法
    y = torch.ones_like(x, device = device)
    x = x.to(device)
    z = x + y
    print(z)
    print(x.to("cpu", torch.double))    # 搬回 CPU 以供处理 (在 CPU 才能处理)

tensor([[1.5470, 1.8688, 1.7033, 1.3549, 1.4193],
        [1.7112, 1.2792, 1.8050, 1.4491, 1.0816],
        [1.7271, 1.6421, 1.4443, 1.5189, 1.5889],
        [1.5171, 1.7733, 1.2525, 1.9881, 1.1150]], device='cuda:0')
tensor([[0.5470, 0.8688, 0.7033, 0.3549, 0.4193],
        [0.7112, 0.2792, 0.8050, 0.4491, 0.0816],
        [0.7271, 0.6421, 0.4443, 0.5189, 0.5889],
        [0.5171, 0.7733, 0.2525, 0.9881, 0.1150]], dtype=torch.float64)


实现简单的一个 model 
- $ h = W_1X $
- $ bet = max(0, h) $
- $ y_{out} = W_2 bet $

遵循 3 项:

  1. **forward pass**
  2. **loss**
  3. **backward pass**

In [41]:
# 使用最基础的来手动实现
import numpy as np
N, D_in, H, D_out = 32, 300, 100, 10
times = 300

X = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for i in range(times):
    # 计算 forward pass
    h = X.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # 计算 loss 
    loss = np.square(y - y_pred).sum()
    print(i, loss)

    # 计算 backward pass
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = X.T.dot(grad_h)

    # 更新参数
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 5825537.79399452
1 13025095.888369191
2 26405999.43522036
3 1455481.6157880486
4 610692.0475780653
5 364821.18168139213
6 244382.677261854
7 174770.89513128804
8 130124.80237483399
9 99765.83192699074
10 78265.95633724422
11 62547.98607320874
12 50748.345429807945
13 41729.73629864013
14 34725.18272322697
15 29202.493229743945
16 24799.94836869903
17 21253.9026531974
18 18368.218801317886
19 15998.17045942978
20 14034.937311317832
21 12395.62400570916
22 11017.667822834504
23 9849.18223258675
24 8851.94895462026
25 7993.592290459195
26 7250.633480528266
27 6603.269429166647
28 6034.418399233932
29 5533.6857348923495
30 5092.548730993804
31 4702.310094637323
32 4353.712690533133
33 4041.05693507614
34 3759.707513177027
35 3505.352324100504
36 3274.8793425048743
37 3065.510223138877
38 2874.7271382204963
39 2700.347798781125
40 2540.6324022038652
41 2394.128450718817
42 2259.2552234851737
43 2134.9367842027323
44 2020.1713198889356
45 1914.0120249637039
46 1815.6434996649796
47 1724.38

In [40]:
import torch

dtype = torch.float
device = torch.device("cpu")
N, D_in, H, D_out = 32, 300, 100, 10
times = 300

x = torch.randn(N, D_in, device = device, dtype = dtype)
y = torch.randn(N, D_out, device = device, dtype = dtype)
w1 = torch.randn(D_in, H, device = device, dtype = dtype, requires_grad = True)
w2 = torch.randn(H, D_out, device = device, dtype = dtype, requires_grad = True)

learning_rate = 1e-5
for t in range(times):
    # 计算 forward pass
    y_pred = x.mm(w1).clamp(min = 0).mm(w2)

    # 计算 loss 
    loss = (y_pred - y).pow(2).sum()
    print(t, loss.item())

    # 计算 backward pass
    loss.backward()

    # 更新参数
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad
        w1.grad.zero_()
        w2.grad.zero_()

0 3776156.5
1 5528263.5
2 14867354.0
3 11923359.0
4 168510.578125
5 91333.40625
6 57133.10546875
7 39268.58203125
8 28661.833984375
9 21774.65234375
10 17014.2265625
11 13559.427734375
12 10964.2236328125
13 8971.068359375
14 7412.0517578125
15 6177.02783203125
16 5184.91455078125
17 4382.7607421875
18 3727.31396484375
19 3187.298095703125
20 2739.06396484375
21 2364.88037109375
22 2050.697509765625
23 1785.76611328125
24 1561.045654296875
25 1369.5546875
26 1205.6883544921875
27 1064.982666015625
28 943.83935546875
29 839.0029296875
30 747.9459228515625
31 668.59033203125
32 599.287109375
33 538.52978515625
34 485.0826416015625
35 437.9480895996094
36 396.2491455078125
37 359.2742614746094
38 326.3955993652344
39 297.0975341796875
40 270.92645263671875
41 247.4972381591797
42 226.4735107421875
43 207.57180786132812
44 190.539306640625
45 175.17869567871094
46 161.28057861328125
47 148.68836975097656
48 137.25848388671875
49 126.86995697021484
50 117.41140747070312
51 108.7853393554687

In [42]:
import torch

N, D_in, H, D_out = 32, 300, 100, 10

x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)

loss_fn = torch.nn.MSELoss(reduction = 'sum')

learning_rate = 1e-4
for t in range(500):
    # 计算 forward pass
    y_pred = model(x)

    # 计算 loss
    loss = loss_fn(y_pred, y)
    print(t, loss.item())

    # 计算 backward pass
    model.zero_grad()
    loss.backward()

    # 更新参数
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

0 286.2662048339844
1 277.2135009765625
2 268.703369140625
3 260.7464294433594
4 253.2162628173828
5 246.04745483398438
6 239.2170867919922
7 232.6998291015625
8 226.4536590576172
9 220.44154357910156
10 214.64041137695312
11 209.0576171875
12 203.70843505859375
13 198.5474853515625
14 193.55746459960938
15 188.7219696044922
16 184.05282592773438
17 179.54685974121094
18 175.1686248779297
19 170.89974975585938
20 166.7569580078125
21 162.7478790283203
22 158.85679626464844
23 155.05438232421875
24 151.32957458496094
25 147.70286560058594
26 144.1607208251953
27 140.71640014648438
28 137.3572235107422
29 134.08804321289062
30 130.8970947265625
31 127.7657241821289
32 124.70851135253906
33 121.71721649169922
34 118.7957534790039
35 115.94145202636719
36 113.14124298095703
37 110.39445495605469
38 107.7040023803711
39 105.06673431396484
40 102.48284912109375
41 99.94824981689453
42 97.45711517333984
43 95.01776885986328
44 92.6284408569336
45 90.27799987792969
46 87.97378540039062
47 85.7