In [1]:
# 自动求梯度和优化器

import numpy as np
import torch
torch.set_printoptions(edgeitems=2, linewidth=75)

import matplotlib.pyplot as plt

In [7]:
t_c = [0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0] # y
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4] # x
t_c = torch.tensor(t_c)
t_u = torch.tensor(t_u)
t_un = t_u * 0.1

In [8]:
def model(input_tensor,w,b):
    return input_tensor * w + b

In [10]:
def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

In [6]:
# w, b初始值
params = torch.tensor([1.0, 0.0], requires_grad=True) # requires_grad参数表示允许自动计算梯度
params.grad is None

True

In [7]:
# 计算一次梯度
z = model(t_un, *params)
loss = loss_fn(z, t_c)
loss

tensor(80.3643, grad_fn=<MeanBackward0>)

In [8]:
loss.backward() # 计算可以计算梯度的参数
params.grad

tensor([-77.6140, -10.6400])

In [23]:
if params.grad is not None:
    params.grad.zero_() # 梯度重置为0
params.grad

tensor([0., 0.])

In [32]:
# 训练函数
def training_loop(n_epochs, learning_rate, params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
#         print(params)
#         print(params.grad)
        # 首先要重置梯度
        if params.grad is not None:
            params.grad.zero_()
        z = model(t_u, *params)
        loss = loss_fn(z, t_c)
        loss.backward()
        
#         print(loss)
#         break
        
        # 在这个with的上下文中，不进行梯度计算
        # 只进行前向传播，获取输出和需要更新的值
        with torch.no_grad(): 
#             params = params - learning_rate * params.grad #这种方式会重新创建新对象params，只能继承值而非所有属性
            params -= learning_rate * params.grad # 只能使用这种方式，从而能继承parms设置的requires_grad=True参数
            
        if epoch % 500 == 0:  
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
    return params

In [33]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
print(params.grad is None)

training_loop(
    n_epochs = 5000, 
    learning_rate = 1e-2,
    params = params, 
    t_u = t_un, 
    t_c = t_c)

True
Epoch 500, Loss 7.860115
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957698
Epoch 2500, Loss 2.933134
Epoch 3000, Loss 2.928648
Epoch 3500, Loss 2.927830
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652
Epoch 5000, Loss 2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

In [4]:
from torch import optim
dir(optim)

['ASGD',
 'Adadelta',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'NAdam',
 'Optimizer',
 'RAdam',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_functional',
 '_multi_tensor',
 'lr_scheduler',
 'swa_utils']

In [6]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-5
optimizer = optim.SGD([params],lr=learning_rate)

In [11]:
z = model(t_u, *params)
loss = loss_fn(z, t_c)
loss.backward()

# 相当于上文的 params -= learning_rate * params.grad
optimizer.step()
params

tensor([ 9.5483e-01, -8.2600e-04], requires_grad=True)

In [12]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2 # 尝试大学习率
optimizer = optim.SGD([params],lr=learning_rate)

z = model(t_u, *params)
loss = loss_fn(z, t_c)
loss.backward()

optimizer.step()
params

tensor([-44.1730,  -0.8260], requires_grad=True)

In [13]:
# 使用optimizer的训练函数

def training_loop(n_epochs, optimizer, params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
        t_p = model(t_u, *params) 
        loss = loss_fn(t_p, t_c)
        
        # optimizer.zero_grad()的作用是将所有参数的梯度值置零，即将每个参数的梯度张量中的所有元素设置为0。
        # 这个操作会遍历优化器中的所有参数，并对其梯度进行归零
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch % 500 == 0:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
            
    return params

In [14]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate) # <1>

training_loop(
    n_epochs = 5000, 
    optimizer = optimizer,
    params = params, # <1> 
    t_u = t_un,
    t_c = t_c)

Epoch 500, Loss 7.860115
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957698
Epoch 2500, Loss 2.933134
Epoch 3000, Loss 2.928648
Epoch 3500, Loss 2.927830
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652
Epoch 5000, Loss 2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

In [15]:
# 使用其他优化器
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-1
optimizer = optim.Adam([params], lr=learning_rate) # <1> 使用Adam优化器后，不用使用归一化，见<2>

training_loop(
    n_epochs = 2000, 
    optimizer = optimizer,
    params = params,
    t_u = t_u, # <2> 
    t_c = t_c)

Epoch 500, Loss 7.612900
Epoch 1000, Loss 3.086700
Epoch 1500, Loss 2.928579
Epoch 2000, Loss 2.927644


tensor([  0.5367, -17.3021], requires_grad=True)