# 单层前向神经网络例子

In [3]:
import torch

In [4]:
# 样本数，样本维度，隐藏层维度，输出层维度
N, D_in, H, D_out = 64, 1000, 100, 10 

x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

w1 = torch.randn(D_in, H, requires_grad = True)
w2 = torch.randn(H, D_out, requires_grad = True)

learning_rate = 1e-6
for i in range(501):
    y_pred = x.mm(w1).clamp(min=0).mm(w2)
    
    loss = (y_pred - y).pow(2).sum()
    if i % 50 == 0:
        print(i, loss.item())
    loss.backward()
    
    with torch.no_grad():
        w1 -= w1.grad*learning_rate
        w2 -= w2.grad*learning_rate
        # grad清零  因为已经为W权重重新赋值了。
        # 否则下次反向传播时又会带上上一次的grad结果
        w1.grad.zero_()
        w2.grad.zero_()
    

0 28805666.0


50 12367.09375
100 446.92889404296875
150 29.964210510253906
200 2.4932303428649902
250

 0.22652092576026917
300 0.02163032628595829
350 0.002363927662372589
400 0.00042014956125058234
450

 0.00013166134885977954
500 5.862200850970112e-05


# 使用nn包构建前向网络

In [5]:
import torch.nn as nn


In [6]:
# 样本数，样本维度，隐藏层维度，输出层维度
N, D_in, H, D_out = 64, 1000, 100, 10 

x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = nn.Sequential(
    nn.Linear(D_in, H),
    nn.ReLU(),
    nn.Linear(H, D_out)
)
loss_fn = nn.MSELoss(reduction="sum")

learning_rate = 1e-4
for i in range(501):
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    
    if i % 50 == 0:
        print(i, loss.item())
        
    model.zero_grad()
    
    loss.backward()
    
    with torch.no_grad():
        for para in model.parameters():
            para -= para.grad * learning_rate


0 720.2109985351562
50 41.37998580932617
100 3.235888957977295
150 0.3981344997882843


200 0.06371669471263885
250 0.011966506950557232
300 0.00247399415820837
350 0.0005454039201140404


400 0.00012585466902237386
450 3.0133418476907536e-05
500 7.42580914447899e-06


# 使用optim去更新权重

In [7]:
# 样本数，样本维度，隐藏层维度，输出层维度
N, D_in, H, D_out = 64, 1000, 100, 10 

x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = nn.Sequential(
    nn.Linear(D_in, H),
    nn.ReLU(),
    nn.Linear(H, D_out)
)
loss_fn = nn.MSELoss(reduction="sum")
learning_rate = 1e-4

optimizer = torch.optim.Adam(model.parameters(), learning_rate)
for i in range(501):
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    
    if i % 50 == 0:
        print(i, loss.item())
        
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


0 714.3883666992188
50 227.57684326171875
100 61.03059005737305


150 10.047301292419434
200 0.9512631893157959
250 0.059015870094299316


300 0.0026670400984585285
350 9.352606866741553e-05
400 2.5314604954473907e-06


450 4.5081094413035316e-08
500 7.876985175236939e-10


# 自定义模型

In [13]:
# 自定义双层模型
class TwoLayerNet(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(TwoLayerNet, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)
        
    def forward(self, x):
        h_relu = self.linear1(x).clamp(min=0)
        y_pred = self.linear2(h_relu)
        return y_pred
        
        
# 样本数，样本维度，隐藏层维度，输出层维度
N, D_in, H, D_out = 64, 1000, 100, 10 
# 随机生成样本 标签
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

loss_fn = nn.MSELoss(reduction="sum")
learning_rate = 1e-4
# 实例化模型
model = TwoLayerNet(D_in, H, D_out)

optimizer = torch.optim.SGD(model.parameters(), learning_rate)

for i in range(501):
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    
    if i % 50 == 0:
        print(i, loss.item())
        
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

0 690.1822509765625
50 34.1402702331543
100 2.139177083969116
150 0.23636604845523834
200 0.037243906408548355
250

 0.007162435445934534
300 0.0015055050607770681
350 0.00033183288178406656
400 7.519089558627456e-05


450 1.7330039554508403e-05
500 4.043296485178871e-06


In [21]:
import random
class DynamicNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        """
        在构造函数中，我们构造了三个nn.Linear实例，它们将在前向传播时被使用。
        """
        super(DynamicNet, self).__init__()
        self.input_linear = torch.nn.Linear(D_in, H)
        self.middle_linear = torch.nn.Linear(H, H)
        self.output_linear = torch.nn.Linear(H, D_out)

    def forward(self, x):
        """
        对于模型的前向传播，我们随机选择0、1、2、3，
        并重用了多次计算隐藏层的middle_linear模块。
        由于每个前向传播构建一个动态计算图，
        我们可以在定义模型的前向传播时使用常规Python控制流运算符，如循环或条件语句。
        在这里，我们还看到，在定义计算图形时多次重用同一个模块是完全安全的。
        这是Lua Torch的一大改进，因为Lua Torch中每个模块只能使用一次。
        """
        h_relu = self.input_linear(x).clamp(min=0)
        for _ in range(random.randint(0, 3)):
            h_relu = self.middle_linear(h_relu).clamp(min=0)
        y_pred = self.output_linear(h_relu)
        return y_pred
    
# 样本数，样本维度，隐藏层维度，输出层维度
N, D_in, H, D_out = 64, 1000, 100, 10 
# 随机生成样本 标签
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

loss_fn = nn.MSELoss(reduction="sum")
learning_rate = 1e-4
# 实例化模型
model = DynamicNet(D_in, H, D_out)

optimizer = torch.optim.SGD(model.parameters(), learning_rate, momentum=0.8)

for i in range(501):
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    
    if i % 50 == 0:
        print(i, loss.item())
        
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

0 607.8623657226562
50 162.10675048828125
100 66.3708267211914
150 2.579711437225342


200 4.121944427490234
250 4.304188251495361


300 16.872217178344727
350 5.7509942054748535
400 1.5938948392868042


450 0.6730867028236389
500 0.4437660276889801
