In [97]:
import torch.nn as nn
import torch
import torch.nn.functional as F

In [98]:
def get_num_correct(preds, labels):
    return pred2.argmax(dim=1).eq(labels).sum().item()

In [3]:
class Network(nn.Module):
#有继承的类需要把父类写在括号里
    def __init__(self):
        super(Network, self).__init__()
#规定操作，第三行要写父类的构造器，格式为super(子类，self).__init__（）括号内可
#能有父亲类的构造参数。
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
#in_channels=1因为输入的图片为灰阶图片，此超参数由数据影响
#类的属性可以是其他类
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2= nn.Linear(in_features=120, out_features=60)
        
        self.out = nn.Linear(in_features=60, out_features=10)
#out_features=10由数据决定，分成十类。

#一般来说一个层的输出数目就是下一个层的输入数目。


        
#Network类包含属性：conv1，conv2，fc1，fc2和out一共5个属性，每个属性的值由nn
#下面的一个类定义而成（类的属性可以是其他类），类位置在nn/modole/conv中并且加
#入了适当的构造参数。其构造参数即为神经网络的“超参数”。超参数由设计者根据经验给出。
#
#Con2d为图片卷积层：包含三个参数：kernel_size，in_channels，out_features。
#kernel_size：定义了卷积核的大小
#in_channels：定义了输入的通道数
#out_channels：定义了卷积核的数量，每个卷积核都能生成一个卷积图片（feature_maps）
# out_features：全连阶层的输出，设计者决定。
#一般来说附加的卷积层会提升输出的通道，线性层会一层一层收缩。
#

    def forward(self, t): 
        #(1) input layer
        t=t
        
        #(2) hidden conv layer
        t=self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        #relu和pooling不包含权重参数，只是操作，所以直接从torch.nn.functional引用
        #大小为2的池化层会将每一个2x2的区域找出最大值并且返回，输出图片是之前的一半
        
        #(3) hidden conv layer
        t=self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        #(4) hidden linear layer 
        t = t.reshape(-1,12*4*4)
        t = self.fc1(t)
        t = F.relu(t)
        
        #(5) hidden linear layer 
        t = self.fc2(t)
        t = F.relu(t)
        
        #(6) output layer
        t = self.out(t)
        #t = F.softmax(t, dim=1)
        
        #hidden layers多用ReLU激活函数，单一预测的输出层多用softmax激活函数
        
        return t

In [6]:
net = Network()

In [11]:
print(net)
#继承了nn.Module这个类才能打印出神经网络的构成，因为nn.Module这个类override了
#python类默认的继承中的一个方法。
#kernel_size=(5,5)为类自动补全，方形卷积核，stride为卷积步长
#bias

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)


In [12]:
net.conv1
#也可以打印出相应的属性

Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))

In [15]:
net.conv1.weight
#可以打印出相应层的内部权重
#weight其实也是一个类。

Parameter containing:
tensor([[[[-0.0754,  0.0473,  0.0803, -0.1206, -0.0088],
          [-0.0668,  0.0425, -0.0932,  0.0182,  0.0688],
          [ 0.1886, -0.0400,  0.0023,  0.1900,  0.0632],
          [-0.0914, -0.1901, -0.0706,  0.1265,  0.0144],
          [-0.1941,  0.0058,  0.0710, -0.0816,  0.0148]]],


        [[[ 0.1660,  0.1029,  0.0390, -0.1283,  0.1485],
          [-0.1035,  0.1331, -0.1843,  0.1988, -0.0716],
          [-0.1431,  0.1293, -0.1194,  0.1289,  0.0581],
          [-0.0420, -0.1234,  0.1143, -0.0330, -0.1725],
          [ 0.0021,  0.1608,  0.0225,  0.0620, -0.0526]]],


        [[[-0.0632,  0.0717,  0.0401, -0.0334, -0.0570],
          [-0.1434, -0.0406,  0.0896,  0.1817, -0.0749],
          [-0.0746, -0.0774,  0.1365,  0.1238, -0.0481],
          [ 0.1566, -0.0367, -0.1992,  0.0045, -0.1563],
          [-0.0638, -0.0391, -0.1716,  0.0414,  0.0067]]],


        [[[ 0.1934,  0.1922, -0.1462,  0.0595, -0.1648],
          [-0.0812, -0.1670, -0.0201,  0.1679,  0.0566

In [18]:
print(net.conv1.weight.shape)
print(net.conv2.weight.shape)
#第一维：输出通道数目,也是卷积核数目，第二维：输入通道数目，也可以说是卷积核的
#深度，第三四维：卷积核大小

torch.Size([6, 1, 5, 5])
torch.Size([12, 6, 5, 5])


In [20]:
net.conv2.weight[0]
#得到第二个卷积层的第一个卷积核的信息，5x5深，度为6

tensor([[[-0.0757, -0.0596, -0.0193, -0.0275, -0.0730],
         [-0.0695,  0.0369, -0.0422, -0.0796, -0.0501],
         [ 0.0684,  0.0569,  0.0044,  0.0751, -0.0075],
         [-0.0092,  0.0732,  0.0386, -0.0229,  0.0292],
         [-0.0766,  0.0090, -0.0520, -0.0125, -0.0233]],

        [[ 0.0401, -0.0507,  0.0477, -0.0739,  0.0229],
         [ 0.0058, -0.0495, -0.0605, -0.0446, -0.0499],
         [-0.0694,  0.0768, -0.0444, -0.0575, -0.0032],
         [ 0.0009, -0.0538, -0.0459, -0.0746,  0.0134],
         [ 0.0402,  0.0562, -0.0701, -0.0223, -0.0683]],

        [[ 0.0620, -0.0141,  0.0734, -0.0476,  0.0335],
         [-0.0087, -0.0422, -0.0726,  0.0450, -0.0629],
         [ 0.0333,  0.0643, -0.0367, -0.0392, -0.0384],
         [ 0.0741, -0.0240, -0.0367, -0.0274,  0.0511],
         [-0.0440,  0.0052,  0.0074, -0.0245,  0.0792]],

        [[-0.0226, -0.0674,  0.0140, -0.0793, -0.0473],
         [-0.0728,  0.0349, -0.0773,  0.0646,  0.0778],
         [-0.0519,  0.0207, -0.0785, -0.03

In [21]:
net.fc1.weight
#rank=2张量， 也叫linear_map

Parameter containing:
tensor([[ 0.0627,  0.0458,  0.0071,  ...,  0.0499,  0.0486, -0.0406],
        [-0.0462, -0.0552,  0.0257,  ...,  0.0628, -0.0050,  0.0547],
        [ 0.0110,  0.0389,  0.0717,  ..., -0.0386, -0.0271,  0.0551],
        ...,
        [-0.0567, -0.0160, -0.0680,  ..., -0.0359,  0.0013,  0.0390],
        [ 0.0476, -0.0421, -0.0010,  ..., -0.0696,  0.0563,  0.0648],
        [ 0.0695, -0.0401, -0.0659,  ...,  0.0661, -0.0322, -0.0223]],
       requires_grad=True)

In [22]:
net.fc1.weight.shape

torch.Size([120, 192])

In [23]:
#120为输出的feature数量，192为输入的feature数量，矩阵乘法时线性层的参数矩阵
#在前面，前一层输出 在后面。

In [34]:
for name, param in net.named_parameters():
    print(name,'\t\t\t',param.shape)
    
#关于bias：为神经元激活的阈值

conv1.weight 			 torch.Size([6, 1, 5, 5])
conv1.bias 			 torch.Size([6])
conv2.weight 			 torch.Size([12, 6, 5, 5])
conv2.bias 			 torch.Size([12])
fc1.weight 			 torch.Size([120, 192])
fc1.bias 			 torch.Size([120])
fc2.weight 			 torch.Size([60, 120])
fc2.bias 			 torch.Size([60])
out.weight 			 torch.Size([10, 60])
out.bias 			 torch.Size([10])


In [35]:
for param in net.parameters():
    print(param.shape)

torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([12, 6, 5, 5])
torch.Size([12])
torch.Size([120, 192])
torch.Size([120])
torch.Size([60, 120])
torch.Size([60])
torch.Size([10, 60])
torch.Size([10])


In [90]:
fc=nn.Linear(in_features=4, out_features=3, bias=False)

In [91]:
fc.weight.shape
#随即初始化生成矩阵

torch.Size([3, 4])

In [92]:
weight_matrix = torch.tensor([[1,2,3,4],[2,3,4,5],[3,4,5,6]], dtype = torch.float32)
in_features = torch.tensor([1,2,3,4],dtype = torch.float32)

In [96]:
fc.weight = nn.Parameter(weight_matrix)
fc.weight
#weigh也可以手动设定，需要用nn.Parameter类来生成对应实例。

Parameter containing:
tensor([[1., 2., 3., 4.],
        [2., 3., 4., 5.],
        [3., 4., 5., 6.]], requires_grad=True)

In [95]:
fc(in_features)
#fc的输入直接可以写进类的初始化参数里，在无bias情况下直接做矩阵乘法
#这里运用了__call__()的表达，引用fc()时，call了其forward(input)的方法。

tensor([30., 40., 50.], grad_fn=<SqueezeBackward3>)