In [1]:
import torch
from torch import nn

In [20]:
class lr_test(nn.Module):
    def __init__(self):
        super(lr_test, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=10, kernel_size=3, stride=1, padding=0),
            nn.Linear(10, 3)
        )

    def forward(self, x):
        return self.model(x)

model1 = lr_test()
model2 = lr_test()

initial_lr = 1e-2

# 只有一个模型的参数优化
optimizer1 = torch.optim.Adam(model1.parameters(), lr = initial_lr)
# defaults: dict
# state: dict
# param_groups: List[dict]
print('Optimizer1:')
print(optimizer1.defaults)
# print(optimizer1.state)
print(len(optimizer1.param_groups))
print(optimizer1.param_groups[0].keys())

# 两个模型的参数优化一起
optimizer2 = torch.optim.Adam([*model2.parameters(), *model1.parameters()], lr=initial_lr)
print('Optimizer2:')
print(optimizer2.defaults)
print(len(optimizer2.param_groups))
print(optimizer2.param_groups[0].keys())

# 分别优化两个不同的模型

# 为不同的模型设置不同的学习率
optimizer3 = torch.optim.Adam([{'params':model1.parameters(), 'lr': 1e-2}, {'params':model2.parameters(), 'lr' : 1e-3}])
# 为模型设置相同的学习率
# optimizer3 = torch.optim.Adam([{'params':model1.parameters()}, {'params':model2.parameters()}], lr=initial_lr)
print('Optimizer3:')
print(optimizer3.defaults)
print(len(optimizer3.param_groups))
print(optimizer3.param_groups[0].keys())

Optimizer1:
{'lr': 0.01, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}
1
dict_keys(['params', 'lr', 'betas', 'eps', 'weight_decay', 'amsgrad'])
Optimizer2:
{'lr': 0.01, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}
1
dict_keys(['params', 'lr', 'betas', 'eps', 'weight_decay', 'amsgrad'])
Optimizer3:
{'lr': 0.01, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}
2
dict_keys(['params', 'lr', 'betas', 'eps', 'weight_decay', 'amsgrad'])


# LambdaLR
学习率乘以系数lambda；也可以表示没有确定形式的lambda函数的学习率

In [34]:
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import LambdaLR
import torch
from torch import nn

class Lambdalr_test(nn.Module):
    def __init__(self):
        super(Lambdalr_test, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=10, kernel_size=3, stride=1, padding=0),
            nn.Linear(10, 3)
        )

    def forward(self, x):
        return self.model(x)

writer = SummaryWriter(log_dir='LR_Logs')

initial_lr = 1e-2
model_Lambda = Lambdalr_test()
optimizer_Stable_Lambda = torch.optim.SGD(params=model_Lambda.parameters(), lr=initial_lr)
optimizer_Flexible_Lambda = torch.optim.SGD(params=model_Lambda.parameters(), lr=initial_lr)
scheduler_Flexible_Lambda = LambdaLR(optimizer = optimizer_Flexible_Lambda, lr_lambda=lambda epoch: 1/(epoch+1))    # 动态变化的Lambda
scheduler_Stable_Lambda = LambdaLR(optimizer = optimizer_Stable_Lambda, lr_lambda=lambda epoch: 0.9**epoch)    # 固定系数的Lambda

epoch = 10

print(f'初始化的learning_rate为{initial_lr}')
for i in range(epoch):
    # train

    # Optimize
    optimizer_Stable_Lambda.zero_grad()
    optimizer_Stable_Lambda.step()
    optimizer_Flexible_Lambda.zero_grad()
    optimizer_Flexible_Lambda.step()
    print(f'Stable情况下：第{i+1}次训练的学习率为{optimizer_Stable_Lambda.param_groups[0]["lr"]}')
    print(f'Flexible情况下：第{i+1}次训练的学习率为{optimizer_Flexible_Lambda.param_groups[0]["lr"]}')
    writer.add_scalar(tag='Lambda_Stable_LR', scalar_value=optimizer_Stable_Lambda.param_groups[0]['lr'], global_step=i)
    writer.add_scalar(tag='Lambda_Flexible_LR', scalar_value=optimizer_Flexible_Lambda.param_groups[0]['lr'], global_step=i)
    scheduler_Flexible_Lambda.step()
    scheduler_Stable_Lambda.step()

初始化的learning_rate为0.01
Stable情况下：第1次训练的学习率为0.01
Flexible情况下：第1次训练的学习率为0.01
Stable情况下：第2次训练的学习率为0.009000000000000001
Flexible情况下：第2次训练的学习率为0.005
Stable情况下：第3次训练的学习率为0.008100000000000001
Flexible情况下：第3次训练的学习率为0.003333333333333333
Stable情况下：第4次训练的学习率为0.007290000000000001
Flexible情况下：第4次训练的学习率为0.0025
Stable情况下：第5次训练的学习率为0.006561
Flexible情况下：第5次训练的学习率为0.002
Stable情况下：第6次训练的学习率为0.005904900000000001
Flexible情况下：第6次训练的学习率为0.0016666666666666666
Stable情况下：第7次训练的学习率为0.00531441
Flexible情况下：第7次训练的学习率为0.0014285714285714286
Stable情况下：第8次训练的学习率为0.004782969000000001
Flexible情况下：第8次训练的学习率为0.00125
Stable情况下：第9次训练的学习率为0.004304672100000001
Flexible情况下：第9次训练的学习率为0.0011111111111111111
Stable情况下：第10次训练的学习率为0.003874204890000001
Flexible情况下：第10次训练的学习率为0.001


![](img_1.png)

# StepLR
在固定步长的位置更新学习率

In [40]:
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import StepLR
import torch
from torch import nn

class Steplr_test(nn.Module):
    def __init__(self):
        super(Steplr_test, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=10, kernel_size=3, stride=1, padding=0),
            nn.Linear(10, 3)
        )

    def forward(self, x):
        return self.model(x)

writer = SummaryWriter(log_dir='LR_Logs')

initial_lr = 1e-2
model_Step = Steplr_test()
optimizer_Step = torch.optim.SGD(params=model_Step.parameters(), lr=initial_lr)
scheduler_Step = StepLR(optimizer = optimizer_Step, step_size=5, gamma=0.1)

epoch = 20

print(f'初始化的learning_rate为{initial_lr}')
for i in range(epoch):
    # train

    # Optimize
    optimizer_Step.zero_grad()
    optimizer_Step.step()
    print(f'Step情况下：第{i+1}次训练的学习率为{optimizer_Step.param_groups[0]["lr"]}')
    writer.add_scalar(tag='Step_LR', scalar_value=optimizer_Step.param_groups[0]['lr'], global_step=i)
    scheduler_Step.step()

初始化的learning_rate为0.01
Step情况下：第1次训练的学习率为0.01
Step情况下：第2次训练的学习率为0.01
Step情况下：第3次训练的学习率为0.01
Step情况下：第4次训练的学习率为0.01
Step情况下：第5次训练的学习率为0.01
Step情况下：第6次训练的学习率为0.001
Step情况下：第7次训练的学习率为0.001
Step情况下：第8次训练的学习率为0.001
Step情况下：第9次训练的学习率为0.001
Step情况下：第10次训练的学习率为0.001
Step情况下：第11次训练的学习率为0.0001
Step情况下：第12次训练的学习率为0.0001
Step情况下：第13次训练的学习率为0.0001
Step情况下：第14次训练的学习率为0.0001
Step情况下：第15次训练的学习率为0.0001
Step情况下：第16次训练的学习率为1e-05
Step情况下：第17次训练的学习率为1e-05
Step情况下：第18次训练的学习率为1e-05
Step情况下：第19次训练的学习率为1e-05
Step情况下：第20次训练的学习率为1e-05


![](img.png)

# MultiStep
在指定的MileStone位置进行改变学习率

In [41]:
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import MultiStepLR
import torch
from torch import nn

class MultiSteplr_test(nn.Module):
    def __init__(self):
        super(MultiSteplr_test, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=10, kernel_size=3, stride=1, padding=0),
            nn.Linear(10, 3)
        )

    def forward(self, x):
        return self.model(x)

writer = SummaryWriter(log_dir='LR_Logs')
epoch = 20

initial_lr = 1e-2
model_MultiStep = MultiSteplr_test()
optimizer_MultiStep = torch.optim.SGD(params=model_MultiStep.parameters(), lr=initial_lr)
scheduler_MultiStep = MultiStepLR(optimizer=optimizer_MultiStep, milestones=[2,7,15], gamma=0.05)

print(f'初始化的learning_rate为{initial_lr}')
for i in range(epoch):
    # train

    # Optimize
    optimizer_MultiStep.zero_grad()
    optimizer_MultiStep.step()
    print(f'MultiStep情况下：第{i+1}次训练的学习率为{optimizer_MultiStep.param_groups[0]["lr"]}')
    writer.add_scalar(tag='MultiStep_LR', scalar_value=optimizer_MultiStep.param_groups[0]['lr'], global_step=i)
    scheduler_MultiStep.step()

初始化的learning_rate为0.01
MultiStep情况下：第1次训练的学习率为0.01
MultiStep情况下：第2次训练的学习率为0.01
MultiStep情况下：第3次训练的学习率为0.0005
MultiStep情况下：第4次训练的学习率为0.0005
MultiStep情况下：第5次训练的学习率为0.0005
MultiStep情况下：第6次训练的学习率为0.0005
MultiStep情况下：第7次训练的学习率为0.0005
MultiStep情况下：第8次训练的学习率为2.5e-05
MultiStep情况下：第9次训练的学习率为2.5e-05
MultiStep情况下：第10次训练的学习率为2.5e-05
MultiStep情况下：第11次训练的学习率为2.5e-05
MultiStep情况下：第12次训练的学习率为2.5e-05
MultiStep情况下：第13次训练的学习率为2.5e-05
MultiStep情况下：第14次训练的学习率为2.5e-05
MultiStep情况下：第15次训练的学习率为2.5e-05
MultiStep情况下：第16次训练的学习率为1.25e-06
MultiStep情况下：第17次训练的学习率为1.25e-06
MultiStep情况下：第18次训练的学习率为1.25e-06
MultiStep情况下：第19次训练的学习率为1.25e-06
MultiStep情况下：第20次训练的学习率为1.25e-06


![](img_2.png)

# CosineAnnealingLR
T_max (int)：对于周期函数cosine，T_max就是这个周期的一半。

eta_min (float)：最小的学习率，默认值为0。

In [43]:
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import CosineAnnealingLR
import torch
from torch import nn

class CosinesAnnealinglr_test(nn.Module):
    def __init__(self):
        super(CosinesAnnealinglr_test, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=10, kernel_size=3, stride=1, padding=0),
            nn.Linear(10, 3)
        )

    def forward(self, x):
        return self.model(x)

writer = SummaryWriter(log_dir='LR_Logs')
epoch = 20

initial_lr = 1e-1
model_Cosine = CosinesAnnealinglr_test()
optimizer_Cosine = torch.optim.SGD(params=model_Cosine.parameters(), lr=initial_lr)
scheduler_Cosine = CosineAnnealingLR(optimizer=optimizer_Cosine, T_max=4, eta_min=0)

print(f'初始化的learning_rate为{initial_lr}')
for i in range(epoch):
    # train

    # Optimize
    optimizer_Cosine.zero_grad()
    optimizer_Cosine.step()
    print(f'CosineAnnealing情况下：第{i+1}次训练的学习率为{optimizer_Cosine.param_groups[0]["lr"]}')
    writer.add_scalar(tag='CosineAnnealing_LR', scalar_value=optimizer_Cosine.param_groups[0]['lr'], global_step=i)
    scheduler_Cosine.step()

初始化的learning_rate为0.1
CosineAnnealing情况下：第1次训练的学习率为0.1
CosineAnnealing情况下：第2次训练的学习率为0.08535533905932738
CosineAnnealing情况下：第3次训练的学习率为0.05
CosineAnnealing情况下：第4次训练的学习率为0.014644660940672627
CosineAnnealing情况下：第5次训练的学习率为0.0
CosineAnnealing情况下：第6次训练的学习率为0.014644660940672622
CosineAnnealing情况下：第7次训练的学习率为0.05000000000000001
CosineAnnealing情况下：第8次训练的学习率为0.0853553390593274
CosineAnnealing情况下：第9次训练的学习率为0.10000000000000003
CosineAnnealing情况下：第10次训练的学习率为0.0853553390593274
CosineAnnealing情况下：第11次训练的学习率为0.05000000000000003
CosineAnnealing情况下：第12次训练的学习率为0.014644660940672672
CosineAnnealing情况下：第13次训练的学习率为0.0
CosineAnnealing情况下：第14次训练的学习率为0.014644660940672622
CosineAnnealing情况下：第15次训练的学习率为0.04999999999999992
CosineAnnealing情况下：第16次训练的学习率为0.08535533905932723
CosineAnnealing情况下：第17次训练的学习率为0.09999999999999988
CosineAnnealing情况下：第18次训练的学习率为0.08535533905932725
CosineAnnealing情况下：第19次训练的学习率为0.04999999999999996
CosineAnnealing情况下：第20次训练的学习率为0.01464466094067266


![](img_3.png)

# ReduceLROnPlateau
mode (str) – One of min, max. In min mode, lr will be reduced when the quantity monitored has stopped decreasing; in max mode it will be reduced when the quantity monitored has stopped increasing. Default: ‘min’.

factor (float) – Factor by which the learning rate will be reduced. new_lr = lr * factor. Default: 0.1.

patience (int) – Number of epochs with no improvement after which learning rate will be reduced. For example, if patience = 2, then we will ignore the first 2 epochs with no improvement, and will only decrease the LR after the 3rd epoch if the loss still hasn’t improved then. Default: 10.

threshold (float) – Threshold for measuring the new optimum, to only focus on significant changes. Default: 1e-4.

threshold_mode (str) – One of rel, abs. In rel mode, dynamic_threshold = best * ( 1 + threshold ) in ‘max’ mode or best * ( 1 - threshold ) in min mode. In abs mode, dynamic_threshold = best + threshold in max mode or best - threshold in min mode. Default: ‘rel’.

cooldown (int) – Number of epochs to wait before resuming normal operation after lr has been reduced. Default: 0.

min_lr (float or list) – A scalar or a list of scalars. A lower bound on the learning rate of all param groups or each group respectively. Default: 0.

eps (float) – Minimal decay applied to lr. If the difference between new and old lr is smaller than eps, the update is ignored. Default: 1e-8.
verbose (bool) – If True, prints a message to stdout for each update. Default: False.

In [52]:
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch
from torch import nn

class ReduceLROnPlateaulr_test(nn.Module):
    def __init__(self):
        super(ReduceLROnPlateaulr_test, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=10, kernel_size=3, stride=1, padding=0),
            nn.Linear(10, 3)
        )

    def forward(self, x):
        return self.model(x)

writer = SummaryWriter(log_dir='LR_Logs')
epoch = 20

initial_lr = 1e-3
model_Reduce = ReduceLROnPlateaulr_test()
optimizer_ReduceLROnPlateau = torch.optim.SGD(params=model_Reduce.parameters(), lr=initial_lr)
schedular_ReduceLROnPlateau = ReduceLROnPlateau(optimizer=optimizer_ReduceLROnPlateau, mode='min', factor=0.05, patience=2, verbose=False)

print(f'初始化的learning_rate为{initial_lr}')
for i in range(epoch):
    # train

    # Optimize
    optimizer_ReduceLROnPlateau.zero_grad()
    optimizer_ReduceLROnPlateau.step()
    print(f'ReduceLROnPlateau情况下：第{i+1}次训练的学习率为{optimizer_ReduceLROnPlateau.param_groups[0]["lr"]}')
    writer.add_scalar(tag='ReduceLROnPlateau_LR', scalar_value=optimizer_ReduceLROnPlateau.param_groups[0]['lr'], global_step=i)
    schedular_ReduceLROnPlateau.step(2)

初始化的learning_rate为0.001
ReduceLROnPlateau情况下：第1次训练的学习率为0.001
ReduceLROnPlateau情况下：第2次训练的学习率为0.001
ReduceLROnPlateau情况下：第3次训练的学习率为0.001
ReduceLROnPlateau情况下：第4次训练的学习率为0.001
ReduceLROnPlateau情况下：第5次训练的学习率为5e-05
ReduceLROnPlateau情况下：第6次训练的学习率为5e-05
ReduceLROnPlateau情况下：第7次训练的学习率为5e-05
ReduceLROnPlateau情况下：第8次训练的学习率为2.5e-06
ReduceLROnPlateau情况下：第9次训练的学习率为2.5e-06
ReduceLROnPlateau情况下：第10次训练的学习率为2.5e-06
ReduceLROnPlateau情况下：第11次训练的学习率为1.2500000000000002e-07
ReduceLROnPlateau情况下：第12次训练的学习率为1.2500000000000002e-07
ReduceLROnPlateau情况下：第13次训练的学习率为1.2500000000000002e-07
ReduceLROnPlateau情况下：第14次训练的学习率为6.250000000000001e-09
ReduceLROnPlateau情况下：第15次训练的学习率为6.250000000000001e-09
ReduceLROnPlateau情况下：第16次训练的学习率为6.250000000000001e-09
ReduceLROnPlateau情况下：第17次训练的学习率为6.250000000000001e-09
ReduceLROnPlateau情况下：第18次训练的学习率为6.250000000000001e-09
ReduceLROnPlateau情况下：第19次训练的学习率为6.250000000000001e-09
ReduceLROnPlateau情况下：第20次训练的学习率为6.250000000000001e-09
