In [27]:
import torch
import numpy as np

# 5.3 减少损失是我们想要的

In [28]:
def model(t_u, w, b):  # 创建模型函数
    return w * t_u + b

In [29]:
def loss_fn(t_p, t_c):  # 创建损失函数
    squared_diffs = (t_p - t_c) ** 2
    print(squared_diffs)
    return squared_diffs.mean()

In [30]:
w = torch.ones(())  # 创建一个标量张量
b = torch.zeros(())

t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0])
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4])

t_p = model(t_u, w, b)
t_p, t_p.size(0)

(tensor([35.7000, 55.9000, 58.2000, 81.9000, 56.3000, 48.9000, 33.9000, 21.8000,
         48.4000, 60.4000, 68.4000]),
 11)

In [31]:
loss = loss_fn(t_p, t_c)
loss

tensor([1239.0400, 1755.6101, 1866.2401, 2905.2102, 2052.0898, 1672.8102,
         954.8101,  665.6400, 1797.7601, 2246.7603, 2246.7603])


tensor(1763.8848)

# Pytorch中的广播机制

In [32]:
# 张量之间相乘，会将最后一位对齐
x = torch.ones(())
y = torch.ones(3, 1)
z = torch.ones(1, 3)
a = torch.ones(2, 1, 1)
print(f"shapes: x: {x.shape}, y: {y.shape}")
print(f"        z: {z.shape}, a: {a.shape}")
print("x * y:", (x * y).shape)
print("y * z:", (y * z).shape)
print("y * z * a:", (y * z * a).shape)

shapes: x: torch.Size([]), y: torch.Size([3, 1])
        z: torch.Size([1, 3]), a: torch.Size([2, 1, 1])
x * y: torch.Size([3, 1])
y * z: torch.Size([3, 3])
y * z * a: torch.Size([2, 3, 3])


# 5.4 沿着梯度下降

## 5.4.1 减少损失

In [33]:
delta = 0.1

loss_rate_of_change_w = \
    (loss_fn(model(t_u, w + delta, b), t_c)  -
     loss_fn(model(t_u, w - delta, b), t_c)) / (2.0 * delta)

learning_rate = 1e-2
w = w - learning_rate * loss_rate_of_change_w

tensor([1503.1129, 2255.3003, 2402.9609, 3855.1687, 2593.8650, 2096.7246,
        1175.8042,  782.8804, 2231.6177, 2855.8337, 2941.9783])
tensor([1000.4570, 1318.4163, 1397.2645, 2089.4041, 1573.7087, 1296.7200,
         756.8001,  557.9044, 1410.7537, 1710.6497, 1645.1138])


In [34]:
loss_rate_of_change_b = \
    (loss_fn(model(t_u, w + delta, b), t_c)  -
     loss_fn(model(t_u, w - delta, b), t_c)) / (2.0 * delta)

b = b - learning_rate * loss_rate_of_change_b

tensor([ 2477177.7500,  6138895.5000,  6656643.0000, 13231964.0000,
         6211601.0000,  4679298.0000,  2241230.5000,   915448.6875,
         4575886.0000,  7155667.0000,  9214816.0000])
tensor([ 2499703.7500,  6194420.0000,  6716841.5000, 13351398.0000,
         6267854.5000,  4721705.0000,  2261576.5000,   923810.7500,
         4617393.0000,  7220440.0000,  9298055.0000])


## 5.4.2 进行分析

In [53]:
# 1.计算导数
def dloss_fn(t_p, t_c):
    dsq_diffs = 2 * (t_p - t_c) / t_p.size(0)  # t_p中有若干数据，相当于是计算给定数据的平均值
    return dsq_diffs

# 2.将导数应用到模型中
def dmodel_dw(t_u, w, b):
    return t_u

def dmodel_db(t_u, w, b):
    return 1.0

# 3.定义梯度函数
def grad_fn(t_u, t_c, t_p, w, b):
    # dloss_dtp = dloss_fn(t_p, t_c)
    # dloss_dw = dloss_dtp * dmodel_dw(t_u, w, b)
    # dloss_db = dloss_dtp * dmodel_db(t_u, w, b)
    dloss_dw = (2 * t_u * (t_u * w + b - t_c)) / t_u.size(0)
    dloss_db = (2 * (t_u * w + b - t_c)) / t_u.size(0)
    return torch.stack([dloss_dw.sum(), dloss_db.sum()])

In [54]:
def training_loop(n_epochs, learning_rate, params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
        w, b = params
        
        # 正向传播
        t_p = model(t_u, w, b)
        loss = loss_fn(t_p, t_c)

        # 反向传播
        grad = grad_fn(t_u, t_c, t_p, w, b)

        params -= learning_rate * grad

        print(f"Epoch {epoch}, Loss {float(loss)}")
    return params

In [55]:
training_loop(n_epochs=5, learning_rate=1e-4, params=torch.tensor([1.0, 0.0]), t_u=t_u, t_c=t_c)

tensor([1239.0400, 1755.6101, 1866.2401, 2905.2102, 2052.0898, 1672.8102,
         954.8101,  665.6400, 1797.7601, 2246.7603, 2246.7603])
Epoch 1, Loss 1763.884765625
tensor([363.4739, 276.8914, 285.6463, 285.4439, 394.3942, 353.5212, 242.6773,
        254.2122, 421.3998, 404.3023, 272.0334])
Epoch 2, Loss 323.09051513671875
tensor([154.3265,  38.9575,  36.9046,   2.7616,  88.1067,  94.1945,  85.9451,
        141.2921, 132.8078,  78.7126,  14.2169])
Epoch 3, Loss 78.92963409423828
tensor([ 93.8661,   3.8455,   2.6187,  21.2431,  25.7586,  35.5288,  44.5432,
        104.3721,  61.1167,  18.0365,   2.1518])
Epoch 4, Loss 37.5528450012207
tensor([7.3321e+01, 3.9535e-02, 4.6814e-02, 5.1702e+01, 1.0894e+01, 1.9528e+01,
        3.1417e+01, 9.0793e+01, 3.9588e+01, 5.4897e+00, 1.3125e+01])
Epoch 5, Loss 30.540283203125


tensor([ 0.2413, -0.0149])

## 5.4.4 归一化输入

In [56]:
t_un = 0.1 * t_u

In [59]:
training_loop(n_epochs=5000, learning_rate=1e-2, params=torch.tensor([1.0, 0.0]), t_u=t_un, t_c=t_c)

tensor([9.4249e+00, 7.0728e+01, 8.4272e+01, 3.9244e+02, 2.8837e+01, 9.6721e+00,
        1.5210e-01, 3.8192e+01, 1.3456e+00, 4.8442e+01, 2.0051e+02])
Epoch 1, Loss 80.36434173583984
tensor([ 35.3694,  15.7210,  20.7613, 178.1427,   0.7991,   0.6268,   9.7814,
         63.6546,   7.3058,   4.6903,  76.4715])
Epoch 2, Loss 37.574913024902344
tensor([5.0025e+01, 4.9103e+00, 7.4880e+00, 1.1654e+02, 7.5253e-01, 5.4045e+00,
        1.7620e+01, 7.5256e+01, 1.7813e+01, 7.7153e-02, 4.3692e+01])
Epoch 3, Loss 30.871076583862305
tensor([56.3883,  2.3342,  4.0790, 95.7146,  2.4354,  8.5592, 21.2661, 79.8906,
        23.1856,  0.2175, 33.2475])
Epoch 4, Loss 29.756193161010742
tensor([58.8950,  1.5806,  3.0174, 87.9290,  3.3607,  9.9833, 22.7263, 81.5529,
        25.4677,  0.5783, 29.4873])
Epoch 5, Loss 29.507152557373047
tensor([59.7940,  1.3248,  2.6418, 84.8081,  3.7653, 10.5579, 23.2449, 82.0121,
        26.3685,  0.7699, 28.0299])
Epoch 6, Loss 29.3924560546875
tensor([60.0472,  1.2307,  2.499

tensor([  5.3671, -17.3012])