In [1]:
import torch

In [2]:
# 以一个简单的函数为例子: y = x*w + b
x = torch.ones(1)
w = torch.full(size=[1], fill_value=2)

In [3]:
x

tensor([1.])

In [4]:
w

tensor([2.])

In [5]:
# 导入函数库
import torch.nn.functional as F

### 使用autograd方法进行求导

In [6]:
# 均方差损失函数
# 第一个参数是预测值，第二个参数是真实值
mse = F.mse_loss(x*w, torch.ones(1))
mse

tensor(1.)

In [7]:
# 使用autograd函数对loss函数求导
# 这里会报错,是因为我们之前没有指定w需要求导
# torch.autograd.grad(mse, w)

In [8]:
w.requires_grad_()

tensor([2.], requires_grad=True)

In [9]:
# 此时需要重新计算一下mse
mse = F.mse_loss(x*w, torch.ones(1))
mse

tensor(1., grad_fn=<MseLossBackward>)

In [10]:
torch.autograd.grad(mse, w)

(tensor([2.]),)

### 使用backward方法进行求导

In [11]:
mse = F.mse_loss(x*w, torch.ones(1))
mse

tensor(1., grad_fn=<MseLossBackward>)

In [12]:
# 使用backward方法进行求导
mse.backward()

In [13]:
w.grad

tensor([2.])

#### 无论使用哪种方法,都需要对参数指定需要求导信息

### 计算Softmax函数的导数

In [14]:
a = torch.rand(3)
a

tensor([0.5214, 0.9971, 0.7792])

In [15]:
# 指定需要求导信息
a.requires_grad_()

tensor([0.5214, 0.9971, 0.7792], requires_grad=True)

In [16]:
p = F.softmax(a, dim=0)
p

tensor([0.2562, 0.4122, 0.3315], grad_fn=<SoftmaxBackward>)

In [17]:
# 使用autograd方法进行求导
# 这里我们p[0]对a0, a1, a2的偏导
torch.autograd.grad(p[0], a, retain_graph=True)

(tensor([ 0.1906, -0.1056, -0.0849]),)

In [18]:
torch.autograd.grad(p[1], a, retain_graph=True)

(tensor([-0.1056,  0.2423, -0.1367]),)

In [19]:
torch.autograd.grad(p[2], a)

(tensor([-0.0849, -0.1367,  0.2216]),)

### 单层感知器的导数推导

In [20]:
# 1行10列的数据
x = torch.randn(1, 10)
x

tensor([[-1.5173, -0.1510,  1.5163, -1.1457,  0.6900, -0.3498,  0.4459,  0.1377,
          0.9805, -1.5648]])

In [21]:
w = torch.randn(1, 10, requires_grad=True)
w

tensor([[ 1.4973, -0.5020, -0.0952, -1.7759,  0.9548, -0.3387,  1.4977, -0.7681,
          0.8197,  1.0211]], requires_grad=True)

In [22]:
# x矩阵乘以w的转置
o = torch.sigmoid(x @ w.t())
o


tensor([[0.5596]], grad_fn=<SigmoidBackward>)

In [23]:
# 使用均方差作为loss，假设真实值为[[1]]，计算loss，并求梯度
# 第一个参数是预测值，第二个参数是真实值
loss = F.mse_loss(o, torch.ones(1,1))
loss

tensor(0.1940, grad_fn=<MseLossBackward>)

In [24]:
# loss是一个标量
loss.shape


torch.Size([])

In [25]:
# 使用backward求导
loss.backward()


In [26]:
w.grad

tensor([[ 0.3294,  0.0328, -0.3291,  0.2487, -0.1498,  0.0759, -0.0968, -0.0299,
         -0.2128,  0.3397]])

### 多层感知器的导数推导

In [27]:
# 第一层有10个神经元
x = torch.randn(1, 10)
x

tensor([[ 0.1235, -1.6236,  2.3391, -2.3477,  0.0476,  0.4647,  0.8141,  0.1049,
          1.4921,  0.0067]])

In [29]:
# 第二层有2个神经元（有2*10个权重）
# 不要忘记添加需要求导的信息
w = torch.rand(2, 10, requires_grad=True)
w

tensor([[9.2778e-01, 8.2995e-01, 9.1204e-01, 8.1783e-01, 5.5393e-01, 8.4093e-01,
         1.8543e-01, 1.2240e-01, 5.8497e-01, 7.2318e-01],
        [1.6900e-01, 3.0168e-01, 3.7606e-01, 4.9990e-04, 3.8866e-01, 8.9428e-01,
         2.0580e-01, 1.9254e-01, 1.5388e-01, 8.2669e-01]], requires_grad=True)

In [30]:
o = torch.sigmoid(x@w.t())
o

tensor([[0.6080, 0.7802]], grad_fn=<SigmoidBackward>)

In [31]:
loss = F.mse_loss(o, torch.ones(1, 2))
loss

tensor(0.1010, grad_fn=<MseLossBackward>)

In [32]:
loss.backward()

In [33]:
w.grad

tensor([[-0.0115,  0.1517, -0.2185,  0.2193, -0.0044, -0.0434, -0.0760, -0.0098,
         -0.1394, -0.0006],
        [-0.0047,  0.0612, -0.0882,  0.0885, -0.0018, -0.0175, -0.0307, -0.0040,
         -0.0563, -0.0003]])

### 链式法则导数推导

In [35]:
x = torch.tensor(1.)
x

tensor(1.)

In [36]:
w1 = torch.tensor(2., requires_grad=True)
b1 = torch.tensor(1.)
w2 = torch.tensor(2., requires_grad=True)
b2 = torch.tensor(1.)

In [38]:
# 定义y1
y1 = x*w1 + b1
# 定义y2
y2 = y1*w2 + b2

In [46]:
dy2_dy1 = torch.autograd.grad(y2, y1, retain_graph=True)[0]
dy1_dw1 = torch.autograd.grad(y1, w1, retain_graph=True)[0]
dy2_dw1 = torch.autograd.grad(y2, w1, retain_graph=True)[0]

In [47]:
dy2_dy1 * dy1_dw1

tensor(2.)

In [48]:
dy2_dw1

tensor(2.)

#### 可以看出，结果是一样的
