<a href="https://colab.research.google.com/github/FlashOrange/ML_Learning/blob/master/torch_nn_grad.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch

https://www.bilibili.com/video/av49462224/?p=1

In [0]:
N, D_in, H, D_out = 64, 1000, 100, 10 # 64个数据，1000维，10维输出(标签)，H为中间层  N实际应当为mini-batch

In [0]:
#创建随机的训练数据集
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

#神经网路参数
w1 = torch.randn(D_in, H, requires_grad=True) #导入中间层
w2 = torch.randn(H, D_out, requires_grad=True) #转换到输出

In [0]:
learning_rate = 1e-6
#迭代500次的梯度下降
for it in range(500):
    #step 1:forward pass
    #生成 N*H矩阵
    #用relu函数激活中间层
    y_pred = x.mm(w1).clamp(min=0).mm(w2) # 预测，生成 N*D_out的矩阵，这是一张graph
    
    #step 2:  MSE loss
    loss = (y_pred - y).pow(2).sum() 
    print(it, loss.item())
    
    #step3: Backward pass 链式求导
    ## compute grad d(loss)/d(w1)
    loss.backward()
    
    
    #step4: update w1,w2
    with torch.no_grad():
        w1 -= learning_rate * w1.grad # 这也是计算图
        w2 -= learning_rate * w2.grad
        w1.grad.zero_()
        w2.grad.zero_()

In [0]:
import torch
import torch.nn as nn

使用Torch.nn

In [0]:
N, D_in, H, D_out = 64, 1000, 100, 10 # 64个数据，1000维，10维输出(标签)，H为中间层  N实际应当为mini-batch

In [0]:
#创建随机的训练数据集
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

#建立神经网络模型
model = nn.Sequential(
    nn.Linear(D_in, H, bias=False), #和上面不同，这里是包含bias的
    nn.ReLU(),
    nn.Linear(H, D_out)
)

#正规化
torch.nn.init.normal_(model[0].weight)
torch.nn.init.normal_(model[2].weight)

learning_rate = 1e-6

#定义损失函数
loss_fn = nn.MSELoss(reduction='sum')
#迭代500次的梯度下降
for it in range(500):
    #step 1:forward pass
    #生成 N*H矩阵
    #用relu函数激活中间层
    y_pred = model(x)  # model.forward()
    
    #step 2:  MSE loss
    loss = loss_fn(y_pred,y)
    print(it, loss.item())
    #step3: Backward pass 链式求导
    ## compute grad d(loss)/d(w1)
    loss.backward()
    
    
    #step4: update w1,w2
    with torch.no_grad():
        for param in model.parameters():
          param -= learning_rate * param.grad
    model.zero_grad()

In [15]:
model

Sequential(
  (0): Linear(in_features=1000, out_features=100, bias=False)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
)

In [0]:
model[0].weight

### 更加自动化

In [0]:
import torch
import torch.nn as nn

In [0]:
N, D_in, H, D_out = 64, 1000, 100, 10 # 64个数据，1000维，10维输出(标签)，H为中间层  N实际应当为mini-batch

In [0]:
#创建随机的训练数据集
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

#建立神经网络模型
model = nn.Sequential(
    nn.Linear(D_in, H, bias=False), #和上面不同，这里是包含bias的
    nn.ReLU(),
    nn.Linear(H, D_out)
)

#正规化初始w(这里如果正规化，效果会不好)
#torch.nn.init.normal_(model[0].weight)
#torch.nn.init.normal_(model[2].weight)

learning_rate = 1e-4 #Adam优化器的参数一般在 1e-4 - 1e-3之间

#定义损失函数
loss_fn = nn.MSELoss(reduction='sum')
#定义优化器
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
#迭代500次的梯度下降
for it in range(500):
    #step 1:forward pass
    #生成 N*H矩阵
    #用relu函数激活中间层
    y_pred = model(x)  # model.forward()
    
    #step 2:  MSE loss
    loss = loss_fn(y_pred,y)
    print(it, loss.item())
    
    #step3 :清空Gard
    optimizer.zero_grad()
    #step4: Backward pass 
    
    loss.backward()
    
    #step5: update w
    optimizer.step()

##  自定义模型

In [0]:
import torch

class TwoLayerNet(torch.nn.Module): #从nn.model继承
  def __init__(self, D_in, H, D_out):
    super(TwoLayerNet, self).__init__()
    self.linear1 = torch.nn.Linear(D_in, H, bias=False)
    self.linear2 = torch.nn.Linear(H, D_out)
    
  def forward(self, x):
    y_pred = self.linear2(self.linear1(x).clamp(min=0))
    return y_pred

In [0]:
N, D_in, H, D_out = 64, 1000, 100, 10 # 64个数据，1000维，10维输出(标签)，H为中间层  N实际应当为mini-batch
model = TwoLayerNet(D_in, H, D_out)

In [0]:
#正规化初始w(这里如果正规化，效果会不好)
#torch.nn.init.normal_(model[0].weight)
#torch.nn.init.normal_(model[2].weight)

learning_rate = 1e-4 #Adam优化器的参数一般在 1e-4 - 1e-3之间

#定义损失函数
loss_fn = nn.MSELoss(reduction='sum')
#定义优化器
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
#迭代500次的梯度下降
for it in range(500):
    #step 1:forward pass
    #生成 N*H矩阵
    #用relu函数激活中间层
    y_pred = model(x)  # model.forward()
    
    #step 2:  MSE loss
    loss = loss_fn(y_pred,y)
    print(it, loss.item())
    
    #step3 :清空Gard
    optimizer.zero_grad()
    #step4: Backward pass 
    
    loss.backward()
    
    #step5: update w
    optimizer.step()