In [1]:
%matplotlib inline
import numpy as np
import torch
torch.set_printoptions(edgeitems=2, linewidth=75)

In [2]:
t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0,
                    8.0, 3.0, -4.0, 6.0, 13.0, 21.0])
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9,
                    33.9, 21.8, 48.4, 60.4, 68.4])
t_un = 0.1 * t_u

In [3]:
#模型是指 参数 与输入输出之间的关系
def model(t_u,w,b):
    return w * t_u + b


In [4]:
# 损失函数是用来定义 预测值和实际值之间的相对关系
def loss_fn(t_p,t_c):
    squared_diffs = (t_p-t_c)**2
    return squared_diffs.mean()


In [5]:
import torch.optim as optim

dir(optim)

['ASGD',
 'Adadelta',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'NAdam',
 'Optimizer',
 'RAdam',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_functional',
 '_multi_tensor',
 'lr_scheduler',
 'swa_utils']

In [6]:
params = torch.tensor([1.0,0.0],requires_grad=True)
learning_rate = 1e-5
optimizer = optim.SGD([params],lr=learning_rate)

In [7]:
#什么是优化器
#优化器的输入为参数w,b,学习率，梯度。输出为更新后的params ,
#相当于在 slf1中的这行代码params = params - leaning_rate*grad
#自动梯度下降中的params包含.grad方法，即params中已经包含了梯度对象，我们无须重新定义
#所以当调用optimizer时只需要传入params 和 lr
#optimizer.step()即利用当前梯度对于参数进行一次更新
t_p = model(t_u,*params)
loss = loss_fn(t_p,t_c)
loss.backward()
optimizer.step()

params

tensor([ 9.5483e-01, -8.2600e-04], requires_grad=True)

In [9]:
def training_loop(n_epochs, optimizer,params,t_u,t_c):
    for epoch in range(1,n_epochs+1):
        t_p = model(t_u,*params)
        loss = loss_fn(t_p,t_c)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch % 500 == 0:
            print('epoch %d ,loss %f'% (epoch,float(loss)))

    return params

In [10]:
params = torch.tensor([1.0,0.0],requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params],lr=learning_rate)

training_loop(
    n_epochs= 5000,
    optimizer= optimizer,
    params = params,
    t_u =t_un,
    t_c = t_c
)

epoch 500 ,loss 7.860120
epoch 1000 ,loss 3.828538
epoch 1500 ,loss 3.092191
epoch 2000 ,loss 2.957698
epoch 2500 ,loss 2.933134
epoch 3000 ,loss 2.928648
epoch 3500 ,loss 2.927830
epoch 4000 ,loss 2.927679
epoch 4500 ,loss 2.927652
epoch 5000 ,loss 2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

In [12]:
# Adam 优化器全称，Adaptive Moment Estimation，自适应矩估计，会自动调整步长

params = torch.tensor([1.0,0.0],requires_grad=True)
learning_rate = 1e-1
optimizer = optim.Adam([params],lr= learning_rate)

training_loop(n_epochs=2000,
              optimizer=optimizer,
              params=params,
              t_u=t_u,
              t_c= t_c)


epoch 500 ,loss 7.612900
epoch 1000 ,loss 3.086700
epoch 1500 ,loss 2.928579
epoch 2000 ,loss 2.927644


tensor([  0.5367, -17.3021], requires_grad=True)

In [14]:
n_samples = t_u.shape[0]
n_val = int(0.2*n_samples)
#验证数据集划分 20%

n_val

2

In [15]:
shuffled_indices = torch.randperm(n_samples)
# random permutation随机排序，范围为，(0,samples-1)
train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

shuffled_indices,train_indices,val_indices

(tensor([ 6,  8,  3,  4,  2,  5,  7,  1,  9,  0, 10]),
 tensor([6, 8, 3, 4, 2, 5, 7, 1, 9]),
 tensor([ 0, 10]))

In [16]:
#tensor 中 可以直接传入索引，根据索引值获取参数张量
train_t_u =t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1 *train_t_u
val_t_un = 0.1 * val_t_u

In [17]:
def training_loop(n_epochs, optimizer, params, train_t_u,val_t_u,
                  train_t_c,val_t_c):
    for epoch in range(1,n_epochs+1):
        train_t_p = model(train_t_u,*params)
        train_loss = loss_fn(train_t_p,train_t_c)

        val_t_p = model(val_t_u,*params)
        val_loss = loss_fn(val_t_p,val_t_c)

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

        if epoch <=3 or epoch %500 ==0:
            print(f"Epoch{epoch},train loss {train_loss.item():.4f},"
                  f"Validation loss {val_loss.item():.4f}")

    return params

# f""的作用为在字符串中嵌入表达式 {}中写入变量名
# .item()将张量转换成标量

In [20]:
params = torch.tensor([1.0,0.0],requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params],lr=learning_rate)

training_loop(n_epochs=3000,
              optimizer=optimizer,
              params=params,
              train_t_u=train_t_un,
              train_t_c=train_t_c,
              val_t_u=val_t_un,
              val_t_c=val_t_c)

Epoch1,train loss 74.8975,Validation loss 104.9652
Epoch2,train loss 34.0469,Validation loss 56.6435
Epoch3,train loss 27.6024,Validation loss 47.3689
Epoch500,train loss 7.3489,Validation loss 15.1187
Epoch1000,train loss 3.7854,Validation loss 7.2841
Epoch1500,train loss 3.1280,Validation loss 4.8208
Epoch2000,train loss 3.0067,Validation loss 3.9292
Epoch2500,train loss 2.9844,Validation loss 3.5769
Epoch3000,train loss 2.9802,Validation loss 3.4312


tensor([  5.1394, -16.1405], requires_grad=True)

In [21]:
def training_loop(n_epochs,optimizer,params,train_t_u,val_t_u,
                  train_t_c,val_t_c):
    for epoch in range(1,n_epochs+1):
        train_t_p = model(train_t_u,*params)
        train_loss = loss_fn(train_t_p,train_t_c)

        with torch.no_grad(): #torch.no_grad()就是不利用验证集进行梯度更新
            val_t_p = model(val_t_u,*params)
            val_loss = loss_fn(val_t_p,val_t_c)
            assert val_loss.requires_grad == False
            # assert 断言 val_loss.requires_grad == False，如果不是就会报错AssertionError

        optimizer.zero_grad() # 清空上次运行的优化器的梯度
        train_loss.backward()
        optimizer.step()





In [22]:
def calc_forward(t_u,t_c, is_train):
    with torch.set_grad_enabled(is_train):
        t_p = model(t_u,*params)
        loss = loss_fn(t_p,t_c)
    return loss