# 计算导数和极限

In [1]:
import torch as t
from torch.functional import Tensor


def f(x: Tensor):
    return 3*x.pow(2)-4*x


h = t.tensor(0.1, dtype=t.float64)
x = t.tensor(1., dtype=t.float64)
for i in range(10):
    delta = (f(x+h)-f(x))/h
    print("{:.10f}".format(delta))
    h = h*0.1


2.3000000000
2.0300000000
2.0030000000
2.0003000000
2.0000300000
2.0000030001
2.0000002987
1.9999999878
2.0000001655
2.0000001655


# 计算梯度

In [2]:
import torch as t
x= t.arange(4.)
print(x)
x.requires_grad_(True)
print(x.grad)
y=2*t.dot(x,x)
y


tensor([0., 1., 2., 3.])
None


tensor(28., grad_fn=<MulBackward0>)

In [3]:
y.backward()
x.grad

tensor([ 0.,  4.,  8., 12.])

In [4]:
x.grad==4*x

tensor([True, True, True, True])

# 求一个新的函数的梯度

In [5]:
x.grad.zero_()
y=x.sum()
y.backward()
x.grad

tensor([1., 1., 1., 1.])

In [6]:
print(x)
# zero grad
x.grad.zero_()
y=x[0]+2*x[1]
y.backward()
x.grad

tensor([0., 1., 2., 3.], requires_grad=True)


tensor([1., 2., 0., 0.])

# 非标量变量的反向传播
- y不是标量的时候，y关于x的导数是一个矩阵。

In [14]:
x.grad.zero_()
y=x*x
print(y)
y=y.sum()
y.backward()
#y.backward(t.ones(len(x)))
print(y)
x.grad

tensor([0., 1., 4., 9.], grad_fn=<MulBackward0>)
tensor(14., grad_fn=<SumBackward0>)


tensor([0., 2., 4., 6.])

# 分离计算-Detach

In [17]:
x.grad.zero_()
y=x*x
u=y.detach()
z=u*x
# there , u is considered as a constent, so the gradient of z is u
z.sum().backward()
x.grad==u

# we can also get the gradient of y
x.grad.zero_()
y.sum().backward()
x.grad

tensor([ 0.,  3.,  8., 15.])

# Python 控制流的梯度计算

In [26]:
def f2(a:Tensor):
    b=a*2
    while b.norm()<1000:
        b=b*2
    if b.sum()>0:
        c=b
    else:
        c=100*b
    return c

a=t.randn(size=(),requires_grad=True)
d=f2(a)
d.backward()
print(a,d)
a.grad==d/a

tensor(2.2043, requires_grad=True) tensor(1128.6014, grad_fn=<MulBackward0>)


tensor(True)

# 关于MSE（均方误差）函数梯度的计算

In [1]:
import torch as t
import torch.nn as nn
a = t.tensor(([1],[2]),dtype=t.float32,requires_grad=True)
b = t.tensor(([3], [4]), dtype=t.float32)
loss_func=nn.MSELoss()
loss=loss_func(a,b)
loss.sum().backward()# which equals to loss.backward()
# |a-b|^2/2 = [(1-3)^2 + (2-4)^2]/2
# let a = x1,x2
# |a-b|^2 = (x1-3)^2 + (x2-4)^2
# a.grad = (x1-3)+(x2-4)
# when x1=1 ,x2=4 , the grad is [-2,-2]
print("a.grad is :{}",a.grad)
# compute |b-a|^2/2
a.grad.zero_()
loss=loss_func(b,a)
loss.backward()
print("a.grad is :{}", a.grad)
# loss.mean() 
a.grad.zero_()
loss = loss_func(b, a)
loss.mean().backward()
print("a.grad is :{}", a.grad)


a.grad is :{} tensor([[-2.],
        [-2.]])
a.grad is :{} tensor([[-2.],
        [-2.]])
a.grad is :{} tensor([[-2.],
        [-2.]])
