## 模型构建

**模型构建两要素**

- 样例
    
        class Net(nn.Module):
            ##初始化构造模型
            def __init__(self, classes):
                super(LeNet, self).__init__()
                self.conv1 = nn.Conv2d(3, 6, 5)
                ....
            
            ##前向运算
            def forward(self, x):
                out = F.relu(self.conv1(x))
                out = F.max_pool2d(out, 2)
                out = F.relu(self.conv2(out))
                
                .....
                
                return out
            
            ##初始化模型
            def initialize_weights(self):
                for m in self.modules():
                    if isinstance(m, nn.Conv2d):
                        nn.init.xavier_normal_(m.weight.data)
                        if m.bias is not None:
                            m.bias.data.zero_()
                    ...




- 构建子模块：

       def __init__()函数

- 拼接子模块

       def forward()函数

**pytorch的神经网络模块**

torch.nn模块

- nn.parameter

    张量子类，表示可学习参数

- nn.module

    所有网络层基类

- nn.functional

    函数具体实现，如卷积，池化，激活函数

- nn.init

    基本初始化方法


**nn.module**

- 一个module可以包含多个module

- 一个module相当于运算，必须实现forward()函数

- 每个module都有8个字典管理属性


![](./img/module.png)

In [4]:
'''
模型创建实例
'''
import torch.nn as nn
import torch.nn.functional as F


class LeNet(nn.Module):
    def __init__(self, classes):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, classes)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.max_pool2d(out, 2)
        out = F.relu(self.conv2(out))
        out = F.max_pool2d(out, 2)
        out = out.view(out.size(0), -1)
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out

    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_normal_(m.weight.data)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight.data, 0, 0.1)
                m.bias.data.zero_()

## 模型容器

**Containers**

- nn.Sequential

    按顺序包装多个网络层
    
- nn.ModuleList

    像python的list一样包装多个网络层，可以迭代

- nn.ModuleDict

    像python的字典一样包装网络层，可以通过命名索引

In [2]:
'''
nn.Sequential构建模型，能够自行反向传播

只能通过索引获取每个层
'''


class LeNet2(nn.Module):
    def __init__(self, classes):
        super(LeNet2, self).__init__()
        self.features = nn.Sequential(nn.Conv2d(3, 6, 5), nn.ReLU(),
                                      nn.MaxPool2d(2, 2), nn.Conv2d(6, 16, 5),
                                      nn.ReLU(), nn.MaxPool2d(2, 2))
        self.classifier = nn.Sequential(nn.Linear(16 * 5 * 5, 120), nn.ReLU(),
                                        nn.Linear(120, 84), nn.ReLU(),
                                        nn.Linear(84, classes))

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size()[0], -1)
        x = self.classifier(x)
        return x

In [7]:
'''
通过有序字典对构建网络模型并命名
'''
import torch.nn as nn
import collections
import torch.nn.functional as F

class LeNetSequentialOrderDict(nn.Module):
    def __init__(self, classes):
        super(LeNetSequentialOrderDict, self).__init__()

        self.features = nn.Sequential(
            collections.OrderedDict({
                'conv1': nn.Conv2d(3, 6, 5),
                'relu1': nn.ReLU(inplace=True),
                'pool1': nn.MaxPool2d(kernel_size=2, stride=2),
                'conv2': nn.Conv2d(6, 16, 5),
                'relu2': nn.ReLU(inplace=True),
                'pool2': nn.MaxPool2d(kernel_size=2, stride=2),
            }))

        self.classifier = nn.Sequential(
            collections.OrderedDict({
                'fc1': nn.Linear(16 * 5 * 5, 120),
                'relu3': nn.ReLU(),
                'fc2': nn.Linear(120, 84),
                'relu4': nn.ReLU(inplace=True),
                'fc3': nn.Linear(84, classes),
            }))

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size()[0], -1)
        x = self.classifier(x)
        return x

In [8]:
lenetDict = LeNetSequentialOrderDict(3)

In [9]:
lenetDict

LeNetSequentialOrderDict(
  (features): Sequential(
    (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
    (relu1): ReLU(inplace=True)
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (relu2): ReLU(inplace=True)
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (fc1): Linear(in_features=400, out_features=120, bias=True)
    (relu3): ReLU()
    (fc2): Linear(in_features=120, out_features=84, bias=True)
    (relu4): ReLU(inplace=True)
    (fc3): Linear(in_features=84, out_features=3, bias=True)
  )
)

In [10]:
# 索引
lenetDict.features.conv1

Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))

    nn.ModuleList以迭代方式调用网络层

- append()

    在ModuleList后面添加网络层

- extend()

    拼接两个ModuleList

- insert()

    指定ModuleList的某个位置插入网络层

In [11]:
class ModuleList(nn.Module):
    def __init__(self):
        super(ModuleList, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(10, 10) for i in range(20)])

    def forward(self, x):
        for i, linear in enumerate(self.linears):
            x = linear(x)
        return x

In [12]:
model=ModuleList()

In [13]:
model

ModuleList(
  (linears): ModuleList(
    (0): Linear(in_features=10, out_features=10, bias=True)
    (1): Linear(in_features=10, out_features=10, bias=True)
    (2): Linear(in_features=10, out_features=10, bias=True)
    (3): Linear(in_features=10, out_features=10, bias=True)
    (4): Linear(in_features=10, out_features=10, bias=True)
    (5): Linear(in_features=10, out_features=10, bias=True)
    (6): Linear(in_features=10, out_features=10, bias=True)
    (7): Linear(in_features=10, out_features=10, bias=True)
    (8): Linear(in_features=10, out_features=10, bias=True)
    (9): Linear(in_features=10, out_features=10, bias=True)
    (10): Linear(in_features=10, out_features=10, bias=True)
    (11): Linear(in_features=10, out_features=10, bias=True)
    (12): Linear(in_features=10, out_features=10, bias=True)
    (13): Linear(in_features=10, out_features=10, bias=True)
    (14): Linear(in_features=10, out_features=10, bias=True)
    (15): Linear(in_features=10, out_features=10, bias=Tru

In [2]:
import torch
import torchvision

    nn.ModuleDict可以通过索引方式构建网络层
    
- clear()

    清空nn.ModuleDict

- items

    返回键值对

- keys

    返回字典的键
    
- values

    返回键的值
    
- pop()

    返回键值对并删除

In [5]:
class ModuleDict(nn.Module):
    def __init__(self):
        super(ModuleDict, self).__init__()
        self.choices = nn.ModuleDict({
            'conv': nn.Conv2d(10, 10, 3),
            'pool': nn.MaxPool2d(3)
        })

        self.activations = nn.ModuleDict({
            'relu': nn.ReLU(),
            'prelu': nn.PReLU()
        })

    def forward(self, x, choice, act):
        x = self.choices[choice](x)
        x = self.activations[act](x)
        return x


net = ModuleDict()

fake_img = torch.randn((4, 10, 32, 32))

output = net(fake_img, 'conv', 'relu')

print(output)

tensor([[[[0.0000, 0.0000, 0.0000,  ..., 1.4552, 0.0000, 0.2201],
          [0.2793, 0.0000, 0.0000,  ..., 0.0000, 0.5210, 0.0000],
          [0.0000, 0.6374, 0.0000,  ..., 0.0000, 0.2804, 0.0000],
          ...,
          [0.0000, 0.2204, 0.6717,  ..., 0.3833, 0.6086, 0.0000],
          [0.0000, 0.1939, 0.0000,  ..., 1.3796, 0.0000, 0.0000],
          [0.0000, 0.7306, 0.0529,  ..., 0.6645, 0.3103, 0.0000]],

         [[0.4940, 0.5611, 0.2969,  ..., 0.0690, 0.2809, 0.7135],
          [0.0000, 0.0000, 0.4298,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0830,  ..., 0.3249, 0.0000, 0.0000],
          ...,
          [0.0195, 0.0602, 0.3718,  ..., 0.0000, 0.5859, 0.1944],
          [0.5237, 0.1533, 0.0727,  ..., 0.0000, 0.5815, 0.0000],
          [0.2429, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

         [[0.0597, 0.5197, 0.0000,  ..., 0.0000, 0.4431, 0.0000],
          [0.0000, 0.3850, 0.0528,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.2043,  ..., 0

In [9]:
'''
采用pytorch内部模型AlexNet
'''
AlexNet=torchvision.models.AlexNet()

### 模型属性

In [1]:
# 模型
from torchvision import models
alexnet = models.alexnet(pretrained=True)

In [2]:
alexnet.children

<bound method Module.children of AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216

In [3]:
# 查看属性
alexnet.classifier

Sequential(
  (0): Dropout(p=0.5, inplace=False)
  (1): Linear(in_features=9216, out_features=4096, bias=True)
  (2): ReLU(inplace=True)
  (3): Dropout(p=0.5, inplace=False)
  (4): Linear(in_features=4096, out_features=4096, bias=True)
  (5): ReLU(inplace=True)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [4]:
alexnet.features

Sequential(
  (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
  (1): ReLU(inplace=True)
  (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (4): ReLU(inplace=True)
  (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU(inplace=True)
  (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): ReLU(inplace=True)
  (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
)

In [19]:
alexnet.features[0]  # 索引

Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))

In [8]:
alexnet.named_parameters

<bound method Module.named_parameters of AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_featu

In [21]:
for param in alexnet.parameters():
    print(param)

Parameter containing:
tensor([[[[ 1.1864e-01,  9.4069e-02,  9.5435e-02,  ...,  5.5822e-02,
            2.1575e-02,  4.9963e-02],
          [ 7.4882e-02,  3.8940e-02,  5.2979e-02,  ...,  2.5709e-02,
           -1.1299e-02,  4.1590e-03],
          [ 7.5425e-02,  3.8779e-02,  5.4930e-02,  ...,  4.3596e-02,
            1.0225e-02,  1.3251e-02],
          ...,
          [ 9.3155e-02,  1.0374e-01,  6.7547e-02,  ..., -2.0277e-01,
           -1.2839e-01, -1.1220e-01],
          [ 4.3544e-02,  6.4916e-02,  3.6164e-02,  ..., -2.0248e-01,
           -1.1376e-01, -1.0719e-01],
          [ 4.7369e-02,  6.2543e-02,  2.4758e-02,  ..., -1.1844e-01,
           -9.5567e-02, -8.3890e-02]],

         [[-7.2634e-02, -5.7996e-02, -8.0661e-02,  ..., -6.0304e-04,
           -2.5309e-02,  2.5471e-02],
          [-6.9042e-02, -6.7562e-02, -7.6367e-02,  ..., -3.9616e-03,
           -3.0402e-02,  1.0477e-02],
          [-9.9517e-02, -8.5592e-02, -1.0521e-01,  ..., -2.6587e-02,
           -2.2777e-02,  6.6451e-03]

            1.0063e-01,  9.0307e-02]]]], requires_grad=True)
Parameter containing:
tensor([-0.9705, -2.8070, -0.0371, -0.0795, -0.1159,  0.0252, -0.0752, -1.4181,
         1.6454, -0.0990, -0.0161, -0.1282, -0.0658, -0.0345, -0.0743, -1.2977,
        -0.0505,  0.0121, -0.1013, -1.1887, -0.1380, -0.0492, -0.0789, -0.0405,
        -0.0958, -0.0705, -1.9374, -0.0850, -0.1388, -0.1968, -0.1279, -2.0095,
        -0.0476, -0.0604, -0.0351, -0.3843, -2.7823,  0.6605, -0.1655, -2.1293,
         0.0543, -0.0274, -0.1703, -0.0593, -0.4215, -1.9394, -1.2094,  0.0153,
        -0.1081, -0.0248, -0.1503, -1.8516, -0.0928, -0.0177, -0.0700, -0.0582,
        -0.0630, -0.0721, -1.2678, -0.1176, -0.0441, -0.3259,  0.0507, -0.0146],
       requires_grad=True)
Parameter containing:
tensor([[[[ 3.6245e-03,  1.4335e-03,  3.7217e-02, -2.0926e-02,  1.8121e-03],
          [ 2.4126e-02, -1.2056e-02,  7.1170e-02, -8.5224e-02,  1.3067e-02],
          [ 2.0966e-02, -1.0623e-01,  2.1572e-02, -6.9547e-02,  3.1583e-0

       requires_grad=True)
Parameter containing:
tensor([-1.0928e-01, -1.7263e-01, -5.1698e-02, -6.5495e-02, -3.9439e-02,
         1.7420e-01, -1.8464e-01,  9.5525e-02, -2.5099e-02, -3.0075e-02,
         1.3654e-01, -3.2269e-01, -1.4834e-01,  1.2790e-01,  4.0678e-03,
        -5.7279e-02, -2.1440e-03,  2.0314e-01,  3.3098e-01,  7.3128e-02,
         4.9798e-01, -4.4765e-02, -8.2151e-02,  1.4895e-02, -4.6373e-02,
         5.7510e-02,  1.9020e-02,  7.4533e-04, -1.8182e-01, -1.2202e-04,
        -1.3162e-01, -1.2743e-01,  5.9956e-03, -4.2740e-01,  1.1648e-01,
        -1.6375e-01, -6.0307e-02, -2.3113e-01,  2.4188e-01, -7.8616e-02,
         2.0777e-02, -2.5044e-02, -4.4685e-02,  2.8326e-02, -1.7459e-01,
         7.1566e-02, -5.7368e-02,  1.2070e-01, -5.8133e-02, -1.7347e-01,
        -1.2782e-01,  5.3739e-02,  4.5314e-02,  9.4975e-02,  1.0401e-02,
         1.3877e-01, -1.3664e-01,  2.4293e-01,  1.4512e-01,  4.8102e-01,
        -1.1793e-01, -1.5927e-01,  1.6019e-01,  2.2559e-01,  9.6043e-02,
  

          [-4.1695e-02, -6.6255e-02, -5.7636e-02]]]], requires_grad=True)
Parameter containing:
tensor([ 2.5296e-02,  1.1945e-01, -3.3357e-01,  1.1266e-01, -1.1582e-01,
        -5.9723e-02,  1.1421e-01, -9.5083e-02,  3.1485e-02, -1.1093e-02,
        -4.9384e-02,  9.1009e-02, -5.2086e-02,  3.6598e-02, -1.2018e-01,
        -1.3416e-01, -5.0457e-03, -1.4378e-03, -7.6294e-02, -4.9133e-02,
        -1.2550e-01,  2.3369e-01, -8.1535e-02,  2.4502e-02, -1.1238e-02,
         1.3240e-01,  2.5781e-01, -2.4450e-02,  7.3169e-02,  2.0816e-01,
         5.9903e-02, -1.1075e-02,  7.0557e-02,  5.3707e-03,  1.3942e-02,
        -6.9005e-02,  9.5581e-02,  8.2127e-02, -8.9249e-02,  2.3552e-02,
         2.0885e-02,  2.0952e-01,  5.7000e-02,  1.0061e-01, -1.5167e-01,
         5.1431e-02,  6.0977e-02,  1.6120e-01,  4.6385e-02, -2.9361e-01,
        -1.9220e-01, -3.3040e-02, -2.2844e-01,  5.2254e-02, -4.1985e-02,
        -2.2856e-01, -2.2374e-01, -1.0398e-01, -3.9473e-02,  2.3086e-02,
         8.5222e-02, -3.7522

          [ 0.0003,  0.0701,  0.0068]]]], requires_grad=True)
Parameter containing:
tensor([-6.2908e-02,  1.2599e-01,  2.9910e-01,  1.1226e-01,  2.8529e-01,
         1.2800e-01,  1.8281e-01, -3.0966e-02,  5.4521e-01,  1.5652e-01,
        -1.3417e-01, -1.7108e-01,  2.7399e-01,  2.2774e-01,  2.6715e-01,
         2.6192e-01,  4.5637e-02, -1.4264e+00,  6.0950e-01,  8.2353e-02,
         9.6382e-02,  1.5679e-01, -2.9053e-01,  1.2642e-01,  2.0995e-01,
         7.2459e-02, -2.7236e-04, -2.2086e-01,  1.1173e-01,  4.8032e-01,
         3.5515e-02, -2.7491e-01, -5.0275e-01,  6.9615e-04, -1.2266e-01,
         1.4878e-01, -8.2103e-03,  1.3206e-01,  2.9920e-01, -1.6474e-01,
         3.2465e-01,  5.3489e-01,  2.4496e-01,  5.9936e-01,  3.3782e-02,
         6.6390e-02,  4.8976e-01,  4.2106e-01,  6.3466e-01, -9.4680e-02,
        -7.8501e-02,  2.5697e-01,  7.6671e-02, -2.3927e-01,  1.7104e-01,
         4.1678e-01,  2.6976e-01, -4.6004e-01, -8.4528e-02,  2.7783e-01,
         2.6750e-01,  3.3215e-01,  5.068

          [ 8.8704e-03,  1.1838e-02,  7.4983e-04]]]], requires_grad=True)
Parameter containing:
tensor([ 3.7964e-02,  4.3626e-01,  1.9919e-01,  4.7873e-01,  1.9452e-01,
         4.6094e-02,  8.9556e-02, -9.0875e-02,  1.2068e-01,  1.8953e-01,
         4.5016e-02,  4.8434e-01,  2.4634e-01, -7.8553e-02, -4.1008e-02,
        -4.5605e-01,  6.7455e-01,  4.3669e-02, -3.8591e-01,  2.3154e-01,
         1.2107e-01,  3.2227e-01,  1.1335e+00,  2.1153e-01,  4.7995e-01,
         2.8958e-01,  6.4578e-02,  2.4041e-02,  1.5042e-01,  2.8817e-01,
         6.7181e-02,  2.6940e-01,  1.0063e-02,  3.4250e-01,  1.6370e-01,
         2.0520e-01,  7.5118e-02,  1.1070e-01,  2.6571e-01,  1.2281e-01,
        -4.6485e-02,  5.1063e-01,  1.4461e-01,  4.4510e-01,  3.3639e-01,
         4.0720e-01, -2.1050e-02,  1.1052e+00,  1.2309e-01,  3.1051e-01,
         2.3018e-01,  3.6438e-01,  3.8058e-01,  1.7631e-01,  6.3093e-01,
        -1.3756e-02,  2.6251e-01,  2.0362e-01,  3.4236e-01,  1.9062e-02,
         1.5609e-01,  2.2937

       requires_grad=True)


In [25]:
for name,param in alexnet.named_parameters():
    print(name)
    print(param)
    print('-'*20)

features.0.weight
Parameter containing:
tensor([[[[ 1.1864e-01,  9.4069e-02,  9.5435e-02,  ...,  5.5822e-02,
            2.1575e-02,  4.9963e-02],
          [ 7.4882e-02,  3.8940e-02,  5.2979e-02,  ...,  2.5709e-02,
           -1.1299e-02,  4.1590e-03],
          [ 7.5425e-02,  3.8779e-02,  5.4930e-02,  ...,  4.3596e-02,
            1.0225e-02,  1.3251e-02],
          ...,
          [ 9.3155e-02,  1.0374e-01,  6.7547e-02,  ..., -2.0277e-01,
           -1.2839e-01, -1.1220e-01],
          [ 4.3544e-02,  6.4916e-02,  3.6164e-02,  ..., -2.0248e-01,
           -1.1376e-01, -1.0719e-01],
          [ 4.7369e-02,  6.2543e-02,  2.4758e-02,  ..., -1.1844e-01,
           -9.5567e-02, -8.3890e-02]],

         [[-7.2634e-02, -5.7996e-02, -8.0661e-02,  ..., -6.0304e-04,
           -2.5309e-02,  2.5471e-02],
          [-6.9042e-02, -6.7562e-02, -7.6367e-02,  ..., -3.9616e-03,
           -3.0402e-02,  1.0477e-02],
          [-9.9517e-02, -8.5592e-02, -1.0521e-01,  ..., -2.6587e-02,
           -2.2777

            1.0063e-01,  9.0307e-02]]]], requires_grad=True)
--------------------
features.0.bias
Parameter containing:
tensor([-0.9705, -2.8070, -0.0371, -0.0795, -0.1159,  0.0252, -0.0752, -1.4181,
         1.6454, -0.0990, -0.0161, -0.1282, -0.0658, -0.0345, -0.0743, -1.2977,
        -0.0505,  0.0121, -0.1013, -1.1887, -0.1380, -0.0492, -0.0789, -0.0405,
        -0.0958, -0.0705, -1.9374, -0.0850, -0.1388, -0.1968, -0.1279, -2.0095,
        -0.0476, -0.0604, -0.0351, -0.3843, -2.7823,  0.6605, -0.1655, -2.1293,
         0.0543, -0.0274, -0.1703, -0.0593, -0.4215, -1.9394, -1.2094,  0.0153,
        -0.1081, -0.0248, -0.1503, -1.8516, -0.0928, -0.0177, -0.0700, -0.0582,
        -0.0630, -0.0721, -1.2678, -0.1176, -0.0441, -0.3259,  0.0507, -0.0146],
       requires_grad=True)
--------------------
features.3.weight
Parameter containing:
tensor([[[[ 3.6245e-03,  1.4335e-03,  3.7217e-02, -2.0926e-02,  1.8121e-03],
          [ 2.4126e-02, -1.2056e-02,  7.1170e-02, -8.5224e-02,  1.3067e-02

       requires_grad=True)
--------------------
features.3.bias
Parameter containing:
tensor([-1.0928e-01, -1.7263e-01, -5.1698e-02, -6.5495e-02, -3.9439e-02,
         1.7420e-01, -1.8464e-01,  9.5525e-02, -2.5099e-02, -3.0075e-02,
         1.3654e-01, -3.2269e-01, -1.4834e-01,  1.2790e-01,  4.0678e-03,
        -5.7279e-02, -2.1440e-03,  2.0314e-01,  3.3098e-01,  7.3128e-02,
         4.9798e-01, -4.4765e-02, -8.2151e-02,  1.4895e-02, -4.6373e-02,
         5.7510e-02,  1.9020e-02,  7.4533e-04, -1.8182e-01, -1.2202e-04,
        -1.3162e-01, -1.2743e-01,  5.9956e-03, -4.2740e-01,  1.1648e-01,
        -1.6375e-01, -6.0307e-02, -2.3113e-01,  2.4188e-01, -7.8616e-02,
         2.0777e-02, -2.5044e-02, -4.4685e-02,  2.8326e-02, -1.7459e-01,
         7.1566e-02, -5.7368e-02,  1.2070e-01, -5.8133e-02, -1.7347e-01,
        -1.2782e-01,  5.3739e-02,  4.5314e-02,  9.4975e-02,  1.0401e-02,
         1.3877e-01, -1.3664e-01,  2.4293e-01,  1.4512e-01,  4.8102e-01,
        -1.1793e-01, -1.5927e-01,  1.6

          [-4.1695e-02, -6.6255e-02, -5.7636e-02]]]], requires_grad=True)
--------------------
features.6.bias
Parameter containing:
tensor([ 2.5296e-02,  1.1945e-01, -3.3357e-01,  1.1266e-01, -1.1582e-01,
        -5.9723e-02,  1.1421e-01, -9.5083e-02,  3.1485e-02, -1.1093e-02,
        -4.9384e-02,  9.1009e-02, -5.2086e-02,  3.6598e-02, -1.2018e-01,
        -1.3416e-01, -5.0457e-03, -1.4378e-03, -7.6294e-02, -4.9133e-02,
        -1.2550e-01,  2.3369e-01, -8.1535e-02,  2.4502e-02, -1.1238e-02,
         1.3240e-01,  2.5781e-01, -2.4450e-02,  7.3169e-02,  2.0816e-01,
         5.9903e-02, -1.1075e-02,  7.0557e-02,  5.3707e-03,  1.3942e-02,
        -6.9005e-02,  9.5581e-02,  8.2127e-02, -8.9249e-02,  2.3552e-02,
         2.0885e-02,  2.0952e-01,  5.7000e-02,  1.0061e-01, -1.5167e-01,
         5.1431e-02,  6.0977e-02,  1.6120e-01,  4.6385e-02, -2.9361e-01,
        -1.9220e-01, -3.3040e-02, -2.2844e-01,  5.2254e-02, -4.1985e-02,
        -2.2856e-01, -2.2374e-01, -1.0398e-01, -3.9473e-02,  2.3

          [ 0.0003,  0.0701,  0.0068]]]], requires_grad=True)
--------------------
features.8.bias
Parameter containing:
tensor([-6.2908e-02,  1.2599e-01,  2.9910e-01,  1.1226e-01,  2.8529e-01,
         1.2800e-01,  1.8281e-01, -3.0966e-02,  5.4521e-01,  1.5652e-01,
        -1.3417e-01, -1.7108e-01,  2.7399e-01,  2.2774e-01,  2.6715e-01,
         2.6192e-01,  4.5637e-02, -1.4264e+00,  6.0950e-01,  8.2353e-02,
         9.6382e-02,  1.5679e-01, -2.9053e-01,  1.2642e-01,  2.0995e-01,
         7.2459e-02, -2.7236e-04, -2.2086e-01,  1.1173e-01,  4.8032e-01,
         3.5515e-02, -2.7491e-01, -5.0275e-01,  6.9615e-04, -1.2266e-01,
         1.4878e-01, -8.2103e-03,  1.3206e-01,  2.9920e-01, -1.6474e-01,
         3.2465e-01,  5.3489e-01,  2.4496e-01,  5.9936e-01,  3.3782e-02,
         6.6390e-02,  4.8976e-01,  4.2106e-01,  6.3466e-01, -9.4680e-02,
        -7.8501e-02,  2.5697e-01,  7.6671e-02, -2.3927e-01,  1.7104e-01,
         4.1678e-01,  2.6976e-01, -4.6004e-01, -8.4528e-02,  2.7783e-01,
   

          [ 8.8704e-03,  1.1838e-02,  7.4983e-04]]]], requires_grad=True)
--------------------
features.10.bias
Parameter containing:
tensor([ 3.7964e-02,  4.3626e-01,  1.9919e-01,  4.7873e-01,  1.9452e-01,
         4.6094e-02,  8.9556e-02, -9.0875e-02,  1.2068e-01,  1.8953e-01,
         4.5016e-02,  4.8434e-01,  2.4634e-01, -7.8553e-02, -4.1008e-02,
        -4.5605e-01,  6.7455e-01,  4.3669e-02, -3.8591e-01,  2.3154e-01,
         1.2107e-01,  3.2227e-01,  1.1335e+00,  2.1153e-01,  4.7995e-01,
         2.8958e-01,  6.4578e-02,  2.4041e-02,  1.5042e-01,  2.8817e-01,
         6.7181e-02,  2.6940e-01,  1.0063e-02,  3.4250e-01,  1.6370e-01,
         2.0520e-01,  7.5118e-02,  1.1070e-01,  2.6571e-01,  1.2281e-01,
        -4.6485e-02,  5.1063e-01,  1.4461e-01,  4.4510e-01,  3.3639e-01,
         4.0720e-01, -2.1050e-02,  1.1052e+00,  1.2309e-01,  3.1051e-01,
         2.3018e-01,  3.6438e-01,  3.8058e-01,  1.7631e-01,  6.3093e-01,
        -1.3756e-02,  2.6251e-01,  2.0362e-01,  3.4236e-01,  1.

       requires_grad=True)
--------------------


In [17]:
for name, sub_module in alexnet.named_modules():
    print(name)
    #print(sub_module)


features
features.0
features.1
features.2
features.3
features.4
features.5
features.6
features.7
features.8
features.9
features.10
features.11
features.12
avgpool
classifier
classifier.0
classifier.1
classifier.2
classifier.3
classifier.4
classifier.5
classifier.6


In [20]:
for name, sub_module in alexnet.named_modules():
    #print(name)
    print(sub_module)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [24]:
for sub_module in alexnet.modules():
    print(sub_module)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 