In [2]:
import torch.nn as nn
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)

        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.bn = nn.BatchNorm2d(10)
        self.relu = nn.ReLU()
        self.mse_loss = nn.MSELoss()
        self.bceloss = nn.BCEWithLogitsLoss()

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))

        # 2 is ame as (2, 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)

        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x

In [3]:
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
  (bn): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
  (mse_loss): MSELoss()
  (bceloss): BCEWithLogitsLoss()
)


In [4]:
for param in net.parameters():
    print(param)

Parameter containing:
tensor([[[[-0.1068, -0.1129, -0.0164,  0.0594, -0.0371],
          [ 0.1771, -0.1885, -0.0961,  0.0063,  0.0142],
          [-0.1533, -0.1441,  0.1614, -0.1027, -0.0843],
          [ 0.1662,  0.0600, -0.1739, -0.1625, -0.1382],
          [ 0.0792, -0.0768, -0.1160, -0.1926,  0.0978]]],


        [[[ 0.0057, -0.1945,  0.0284, -0.1127,  0.1587],
          [-0.1684, -0.0416,  0.0365,  0.1508, -0.1381],
          [ 0.0147,  0.1285, -0.0203,  0.0510, -0.1244],
          [-0.1640, -0.0317, -0.0528, -0.0876,  0.0027],
          [ 0.1126, -0.1384,  0.0387, -0.1783,  0.1169]]],


        [[[-0.0330,  0.1457,  0.0418, -0.0174,  0.0497],
          [-0.1388,  0.1647, -0.0095, -0.0458,  0.0877],
          [ 0.1623,  0.1727, -0.1223,  0.0992,  0.1340],
          [-0.0903, -0.0993, -0.1706,  0.0514,  0.0376],
          [ 0.0788, -0.1183, -0.0043,  0.0442,  0.1596]]],


        [[[ 0.1299,  0.0656,  0.1928,  0.0872, -0.1651],
          [-0.1216,  0.0123,  0.0464, -0.1242,  0.0437

Parameter containing:
tensor([[ 0.0761,  0.0500, -0.0637, -0.0386,  0.0275,  0.0302,  0.0670, -0.0197,
          0.0816, -0.0208, -0.0926, -0.0608, -0.0472, -0.0099, -0.0304,  0.0852,
          0.1032,  0.1063,  0.0445,  0.0531,  0.1073,  0.0281, -0.1085, -0.0110,
         -0.0071, -0.0135,  0.0495, -0.0523, -0.0860,  0.0816, -0.0521, -0.0958,
         -0.0209, -0.0618,  0.0830,  0.0714, -0.0285, -0.0331, -0.0828,  0.0495,
          0.0119,  0.0565,  0.0867, -0.0055, -0.0737, -0.0651,  0.0697,  0.0674,
          0.0698, -0.0153, -0.0342, -0.0955, -0.1005,  0.1016,  0.0082,  0.0394,
         -0.0250, -0.0097, -0.0695, -0.0471, -0.0799,  0.0409,  0.0537,  0.0229,
         -0.0253, -0.0571, -0.0492, -0.0793,  0.1058,  0.0922, -0.0216,  0.0822,
          0.0840,  0.0843,  0.0585, -0.0547,  0.0596, -0.0353, -0.0681,  0.0766,
         -0.0541,  0.0526, -0.0773, -0.0422],
        [ 0.0075, -0.0412, -0.0910, -0.0965,  0.0953,  0.0209, -0.0828, -0.0133,
         -0.0068,  0.0223,  0.0107, -0.04

In [5]:
print(net.parameters())

<generator object Module.parameters at 0x7f2f54689678>


In [6]:
import torch
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.1, weight_decay=0.9)
print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.001, 'momentum': 0.1, 'dampening': 0, 'weight_decay': 0.9, 'nesterov': False, 'params': [139841255819016, 139841255820096, 139841255817720, 139841255819736, 139841255819808, 139841255819952, 139841255819664, 139841255820240, 139841255820744, 139841255820816, 139841255819160, 139841255821248]}]}


params内的每一个数字可能是那个参数(weight, bias)的内存起始地址

In [7]:
optimizer.add_param_group({'params':nn.Conv2d(2,6,5).parameters()})

In [8]:
print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.001, 'momentum': 0.1, 'dampening': 0, 'weight_decay': 0.9, 'nesterov': False, 'params': [139841255819016, 139841255820096, 139841255817720, 139841255819736, 139841255819808, 139841255819952, 139841255819664, 139841255820240, 139841255820744, 139841255820816, 139841255819160, 139841255821248]}, {'lr': 0.001, 'momentum': 0.1, 'dampening': 0, 'weight_decay': 0.9, 'nesterov': False, 'params': [139841255818080, 139841255820600]}]}


In [9]:
for param_group in optimizer.param_groups:
    print(param_group.keys())
    print(param_group['lr'])

dict_keys(['params', 'lr', 'momentum', 'dampening', 'weight_decay', 'nesterov'])
0.001
dict_keys(['params', 'lr', 'momentum', 'dampening', 'weight_decay', 'nesterov'])
0.001


In [10]:
for param_group in optimizer.param_groups:
    param_group['lr'] = 0.00001

In [12]:
for param_group in optimizer.param_groups:
    print(param_group.keys())
    print(param_group['lr'])

dict_keys(['params', 'lr', 'momentum', 'dampening', 'weight_decay', 'nesterov'])
1e-05
dict_keys(['params', 'lr', 'momentum', 'dampening', 'weight_decay', 'nesterov'])
1e-05
