In [35]:
import torch
import numpy as np
import random
import torch.optim as optim
def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)

setup_seed(1)  # 设置随机种子
 
#构建可学习参数
weight = torch.randn((2, 2), requires_grad=True)
weight.grad = torch.ones((2, 2))
 
#传入可学习参数，学习率设置为1
optimizer = optim.SGD([weight], lr=1)

# ----------------------------------- step -----------------------------------
print("weight before step:{}".format(weight.data))
optimizer.step()        # 修改lr=1 0.1观察结果
print("weight after step:{}".format(weight.data))

weight before step:tensor([[0.6614, 0.2669],
        [0.0617, 0.6213]])
weight after step:tensor([[-0.3386, -0.7331],
        [-0.9383, -0.3787]])


In [36]:
# ----------------------------------- zero_grad -----------------------------------
print("weight before step:{}".format(weight.data))
optimizer.step()        # 修改lr=1 0.1观察结果
print("weight after step:{}".format(weight.data))
 
print("weight in optimizer:{}\nweight in weight:{}\n".format(id(optimizer.param_groups[0]['params'][0]), id(weight)))
 
print("weight.grad is {}\n".format(weight.grad))
optimizer.zero_grad()
print("after optimizer.zero_grad(), weight.grad is\n{}".format(weight.grad))

weight before step:tensor([[-0.3386, -0.7331],
        [-0.9383, -0.3787]])
weight after step:tensor([[-1.3386, -1.7331],
        [-1.9383, -1.3787]])
weight in optimizer:2435071410240
weight in weight:2435071410240

weight.grad is tensor([[1., 1.],
        [1., 1.]])

after optimizer.zero_grad(), weight.grad is
tensor([[0., 0.],
        [0., 0.]])


In [37]:
# ----------------------------------- add_param_group -----------------------------------
print("optimizer.param_groups is\n{}".format(optimizer.param_groups))
 
w2 = torch.randn((3, 3), requires_grad=True)
 
optimizer.add_param_group({"params": w2, 'lr': 0.0001})
 
print("optimizer.param_groups is\n{}".format(optimizer.param_groups))

optimizer.param_groups is
[{'params': [tensor([[-1.3386, -1.7331],
        [-1.9383, -1.3787]], requires_grad=True)], 'lr': 1, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None}]
optimizer.param_groups is
[{'params': [tensor([[-1.3386, -1.7331],
        [-1.9383, -1.3787]], requires_grad=True)], 'lr': 1, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None}, {'params': [tensor([[-0.4519, -0.1661, -1.5228],
        [ 0.3817, -1.0276, -0.5631],
        [-0.8923, -0.0583, -0.1955]], requires_grad=True)], 'lr': 0.0001, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None}]


In [43]:
# ----------------------------------- state_dict -----------------------------------
import os
weight = torch.randn((2, 2), requires_grad=True)
weight.grad = torch.ones((2, 2))
optimizer = optim.SGD([weight], lr=0.1, momentum=0.9)
opt_state_dict = optimizer.state_dict()
 
print("state_dict before step:\n", opt_state_dict)
 
for i in range(10):
    optimizer.step()
 
print("state_dict after step:\n", optimizer.state_dict())
#保存状态信息
path = "D:\Learn\deep-learning-for-image-processing-master\pytorch_object_detection\mask_rcnn"
torch.save(optimizer.state_dict(), os.path.join(path, "optimizer_state_dict.pkl"))

state_dict before step:
 {'state': {}, 'param_groups': [{'lr': 0.1, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'params': [0]}]}
state_dict after step:
 {'state': {0: {'momentum_buffer': tensor([[6.5132, 6.5132],
        [6.5132, 6.5132]])}}, 'param_groups': [{'lr': 0.1, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'params': [0]}]}


In [44]:
# -----------------------------------load state_dict -----------------------------------
 
optimizer = optim.SGD([weight], lr=0.1, momentum=0.9)
state_dict = torch.load(os.path.join(path, "optimizer_state_dict.pkl"))

print("state_dict before load state:\n", optimizer.state_dict())
optimizer.load_state_dict(state_dict)
print("state_dict after load state:\n", optimizer.state_dict())

state_dict before load state:
 {'state': {}, 'param_groups': [{'lr': 0.1, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'params': [0]}]}
state_dict after load state:
 {'state': {0: {'momentum_buffer': tensor([[6.5132, 6.5132],
        [6.5132, 6.5132]])}}, 'param_groups': [{'lr': 0.1, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'params': [0]}]}


In [None]:
# ------------------------------ gradient descent ------------------------------
    #记录loss和迭代次数用来画loss曲线
iter_rec, loss_rec, x_rec = list(), list(), list()
 
lr = 0.5   # /1. /.5 /.2 /.1 /.125
max_iteration = 4  # /1. 4     /.5 4   /.2 20 200
 
for i in range(max_iteration):
 
    y = func(x)
    y.backward()
 
    print("Iter:{}, X:{:8}, X.grad:{:8}, loss:{:10}".format(
        i, x.detach().numpy()[0], x.grad.detach().numpy()[0], y.item()))
 
    x_rec.append(x.item())
 
    x.data.sub_(lr * x.grad)    # x -= x.grad  数学表达式意义:  x = x - x.grad    # 0.5 0.2 0.1 0.125
    x.grad.zero_()
 
    iter_rec.append(i)
    loss_rec.append(y.detach().numpy())
 
    plt.subplot(121).plot(iter_rec, loss_rec, '-ro')
    plt.xlabel("Iteration")
    plt.ylabel("Loss value")
 
    x_t = torch.linspace(-3, 3, 100)
    y = func(x_t)
    plt.subplot(122).plot(x_t.numpy(), y.numpy(), label="y = 4*x^2")
    plt.grid()
    y_rec = [func(torch.tensor(i)).item() for i in x_rec]
    plt.subplot(122).plot(x_rec, y_rec, '-ro')
    plt.legend()
    plt.show()