### 单层感知机

In [1]:
import torch 
from torch.nn import functional as F
x = torch.randn(1, 10)
w = torch.randn(1, 10, requires_grad=True)

o = torch.sigmoid(x@w.t())
print(o.shape)
print(o)

loss = F.mse_loss(torch.ones(1, 1), o)
# loss 是个标量
print(loss.shape)
print(loss)

loss.backward()
# 得到 loss 分别对 w_0、w_1、...、w_9 的 10 个偏导数
print(w.grad)
print(w.grad.shape)

torch.Size([1, 1])
tensor([[0.3836]], grad_fn=<SigmoidBackward0>)
torch.Size([])
tensor(0.3799, grad_fn=<MseLossBackward0>)
tensor([[-0.1743,  0.0209,  0.1090,  0.2765, -0.1002, -0.4500,  0.1041,  0.0736,
         -0.0140, -0.0630]])
torch.Size([1, 10])


### 多层感知机

In [2]:
import torch 
from torch.nn import functional as F

x = torch.randn(1, 10)
# 权重参数矩阵的 10 与输入数据的数量相匹配
# 权重参数矩阵的 2 表示 2 组权重参数，也表示中间层有 2 个结果
w = torch.randn(2, 10, requires_grad=True)
print(x)
print(w)

tensor([[-1.0786,  0.4199, -0.7229, -0.4312,  0.1539,  0.1000, -0.2137,  0.3945,
         -0.5721, -0.4595]])
tensor([[-0.7933, -0.1524,  0.6714, -2.0703, -0.3610, -1.4520, -0.0576, -1.4111,
          1.4460,  0.4906],
        [-0.4150,  1.5163, -1.5411,  1.9912, -0.6449, -0.6590, -0.1667, -0.6525,
         -0.9053,  0.0287]], requires_grad=True)


In [3]:
# [1, 10] @ [10, 2] --> [1, 2]
o = torch.sigmoid(x@w.t())
print(o.shape)
print(o)

torch.Size([1, 2])
tensor([[0.3547, 0.8111]], grad_fn=<SigmoidBackward0>)


In [4]:
# 这里使用 torch.ones(1, 2) 更为合适
# torch.ones(1, 1) 原本是不能与 [1, 2] 进行计算的
# 但是有广播机制，自动将 [1, 1] 广播为 [1, 2]， 从而使得二者可以进行计算
loss = F.mse_loss(torch.ones(1, 1), o)
print(loss)

tensor(0.2261, grad_fn=<MseLossBackward0>)


  loss = F.mse_loss(torch.ones(1, 1), o)


In [5]:
loss.backward()
print(w.grad)
# 权重参数更新 w' = w - learningrate * w.grad
# 所以 w 、w'、 w.grad 的形状是一样的

tensor([[ 0.1593, -0.0620,  0.1068,  0.0637, -0.0227, -0.0148,  0.0316, -0.0583,
          0.0845,  0.0679],
        [ 0.0312, -0.0121,  0.0209,  0.0125, -0.0045, -0.0029,  0.0062, -0.0114,
          0.0166,  0.0133]])
