# Pytorch之nn模块练习

In [22]:
#库
import torch
from torch import nn

In [None]:
#定义网络
class Linear(nn.Module): #继承nn.Module

    def __init__(self, in_features, out_features): #输入数据的维度、输出数据的维度
        super(Linear,self).__init__()
        
        self.w=nn.Parameter(torch.randn(in_features,out_features))#可学习的权重矩阵——标准正态分布初始化
        self.b=nn.Parameter(torch.randn(out_features))#可学习的偏置向量——标准正统分布初始化

    def forward(self,x):
        x=x.mm(self.w) #乘
        return x+self.b.expand_as(x)

In [24]:
#网络正向传播
layer=Linear(5,3)
input=torch.randn(4,5)
output=layer(input)
output

tensor([[-1.3185, -1.9280, -0.2926],
        [-0.2435, -0.2010, -2.2742],
        [-1.9942, -4.9260,  3.2610],
        [-1.4837, -2.4830,  2.5660]], grad_fn=<AddBackward0>)

In [25]:
#查看参数
for name, parameter in layer.named_parameters():
    print(name, parameter) #即w，b

w Parameter containing:
tensor([[-0.2819, -1.6520,  0.4598],
        [ 0.8439,  0.2167, -0.8900],
        [-0.8746, -0.6107,  0.8331],
        [-0.2925, -0.6853, -0.5600],
        [-0.4001,  0.0921, -0.6926]], requires_grad=True)
b Parameter containing:
tensor([-0.5442, -1.1021, -0.2497], requires_grad=True)


In [26]:
#使用sigmoid函数作为激发函数的网络
class Perceptron(nn.Module):
    def __init__(self,in_features,hidden_features,out_features):
        super(Perceptron,self).__init__()

        self.layer1=Linear(in_features,hidden_features)
        self.layer2=Linear(hidden_features,out_features)

    def forward(self,x):
        x=self.layer1(x)
        x=torch.sigmoid(x)
        return self.layer2(x)
    
perceptron=Perceptron(3,5,1)
for name,param in perceptron.named_parameters():
    print(name,param.size())

layer1.w torch.Size([3, 5])
layer1.b torch.Size([5])
layer2.w torch.Size([5, 1])
layer2.b torch.Size([1])


In [27]:
#第一种
net1 = nn.Sequential()
net1.add_module('Linear1', nn.Linear(4, 3))
net1.add_module('Linear2', nn.Linear(3, 1))
net1.add_module('activation_layer', nn.Sigmoid())

#第二种
net2 = nn.Sequential(
    nn.Linear(4, 3),
    nn.Linear(3, 1),
    nn.Sigmoid()
    )

#第三种
from collections import OrderedDict
net3 = nn.Sequential(OrderedDict([
    ('Linear1', nn.Linear(4, 3)),
    ('Linear2', nn.Linear(3, 1)),
    ('activation_layer',nn.Sigmoid())
    ]))
print('net1:', net1)
print('net2:', net2)
print('net3:', net3)

net1: Sequential(
  (Linear1): Linear(in_features=4, out_features=3, bias=True)
  (Linear2): Linear(in_features=3, out_features=1, bias=True)
  (activation_layer): Sigmoid()
)
net2: Sequential(
  (0): Linear(in_features=4, out_features=3, bias=True)
  (1): Linear(in_features=3, out_features=1, bias=True)
  (2): Sigmoid()
)
net3: Sequential(
  (Linear1): Linear(in_features=4, out_features=3, bias=True)
  (Linear2): Linear(in_features=3, out_features=1, bias=True)
  (activation_layer): Sigmoid()
)


In [28]:
#可根据名字后序号取出子module
net1.Linear1, net2[0], net3.Linear1

(Linear(in_features=4, out_features=3, bias=True),
 Linear(in_features=4, out_features=3, bias=True),
 Linear(in_features=4, out_features=3, bias=True))

In [29]:
input = torch.rand(4,4)
output1 = net1(input)
output2 = net2(input)
output3 = net3(input)
output4 = net3.activation_layer(net1.Linear2(net1.Linear1(input)))
output1,output2,output3,output4

(tensor([[0.3482],
         [0.3716],
         [0.3339],
         [0.3181]], grad_fn=<SigmoidBackward0>),
 tensor([[0.4510],
         [0.4628],
         [0.4652],
         [0.5064]], grad_fn=<SigmoidBackward0>),
 tensor([[0.4314],
         [0.4745],
         [0.4169],
         [0.4029]], grad_fn=<SigmoidBackward0>),
 tensor([[0.3482],
         [0.3716],
         [0.3339],
         [0.3181]], grad_fn=<SigmoidBackward0>))

In [30]:
class MyModule(nn.Module):
    def __init__(self):
        super(MyModule,self).__init__()
        self.list=[nn.Linear(3,4),nn.Sigmoid()]
        self.module_list=nn.ModuleList([nn.Linear(4,3),nn.Sigmoid()])
        
    def forward(self):
        pass
    
model=MyModule()
model

MyModule(
  (module_list): ModuleList(
    (0): Linear(in_features=4, out_features=3, bias=True)
    (1): Sigmoid()
  )
)

In [31]:
for name,param in model.named_parameters():
    print(name,param.size())

module_list.0.weight torch.Size([3, 4])
module_list.0.bias torch.Size([3])


In [32]:
input = torch.randn(3,3)
model = nn.Linear(3,4)
output1 = model(input)
#使用上面使用的w,b，两种写法返回的结果是相同的
output2 = nn.functional.linear(input, model.weight, model.bias)
output1 == output2

tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

In [33]:
b1 = nn.functional.sigmoid(input)
b2 = nn.Sigmoid()(input)
b1 == b2

tensor([[True, True, True],
        [True, True, True],
        [True, True, True]])

In [34]:
relu=nn.ReLU(inplace=True)
input=torch.randn(2,3)
print(input)
output=relu(input)
print(output)

tensor([[ 0.4894,  0.4143, -0.0041],
        [ 0.1547, -0.4861,  0.7648]])
tensor([[0.4894, 0.4143, 0.0000],
        [0.1547, 0.0000, 0.7648]])


In [35]:
pred = torch.tensor([1.,2.,3.,4.,5.])
target =torch.tensor([1.,1.,1.,1.,1.])
criterion = nn.MSELoss()
loss = criterion(pred, target)
print(loss)

tensor(6.)


In [36]:
# cross entropy loss
# batch_size=3,计算对应的每个类别的分数(只有两个类别)
pred = torch.randn(3,2)
# 三个样本分别属于1,0,1类，
target=torch.tensor([1,1,1])
CELoss = torch.nn.CrossEntropyLoss()

loss2 = CELoss(pred, target)
loss2

tensor(0.3788)

In [37]:
#利用nn.init初始化
from torch.nn import init
linear = nn.Linear(3,4)

#等价于linear.weight.data.normal_(0, std),std是正态分布的标准差
init.xavier_normal_(linear.weight)

Parameter containing:
tensor([[-1.2498, -0.1959, -0.6250],
        [-0.5790, -0.3113, -0.9196],
        [-0.6987,  0.5840, -0.0751],
        [ 0.4066, -0.1735, -0.3879]], requires_grad=True)

In [38]:
net1 = nn.Sequential()
net1.add_module('Linear1', nn.Linear(4, 3))
net1.add_module('Linear2', nn.Linear(3, 1))
net1.add_module('activation_layer', nn.Sigmoid())


print(init.xavier_uniform_(net1.Linear1.weight))
print(init.constant_(net1.Linear1.bias, 0.1))

Parameter containing:
tensor([[-0.2881, -0.0278, -0.2095,  0.0700],
        [ 0.3854,  0.1827,  0.8569,  0.0608],
        [-0.3087,  0.0730,  0.1381,  0.1736]], requires_grad=True)
Parameter containing:
tensor([0.1000, 0.1000, 0.1000], requires_grad=True)


In [39]:
def weights_init(m):
    classname=m.__class__.__name__
    if classname.find('Linear') != -1:
        init.xavier_uniform_(m.weight.data)
        init.constant_(m.bias.data,0.1)

net1.apply(weights_init) #apply函数会递归地搜索网络内的所有module并把参数表示的函数应用到所有的module上。  
            #对所有的Conv层都初始化权重. 

Sequential(
  (Linear1): Linear(in_features=4, out_features=3, bias=True)
  (Linear2): Linear(in_features=3, out_features=1, bias=True)
  (activation_layer): Sigmoid()
)

# 课堂作业

In [40]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        
        # 隐藏层
        self.fc1 = nn.Linear(784, 224)  # 输入层到第一隐藏层
        # 第一隐藏层到第二隐藏层
        self.fc2 = nn.Linear(224, 128) 
        # 第二隐藏层到第三隐藏层
        self.fc3 = nn.Linear(128, 64)       
        # 输出层
        self.fc4 = nn.Linear(64, 10)     # 第三隐藏层到输出层

    def forward(self, x):
        x = x.view(-1, 784)   # 将输入张量展平成一维
        
        x = F.relu(self.fc1(x))  # 输入层到第一隐藏层的前向传播，使用ReLU作为激活函数
        # 第一隐藏层到第二隐藏层的前向传播，使用ReLU作为激活函数
        x = F.relu(self.fc2(x))
        # 第二隐藏层到第三隐藏层的前向传播，使用ReLU作为激活函数
        x = F.relu(self.fc3(x))    
        x = self.fc4(x)  # 第三隐藏层到输出层的前向传播
        return F.log_softmax(x, dim=1)  # 使用log_softmax作为输出的激活函数

# 创建网络实例
net = SimpleNet()
# 打印网络
print(net)

SimpleNet(
  (fc1): Linear(in_features=784, out_features=224, bias=True)
  (fc2): Linear(in_features=224, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=10, bias=True)
)
