In [1]:
import torch
print(torch.__version__)

1.7.0


In [2]:
x = torch.randn(4,1)

In [3]:
print(x)

tensor([[-0.3125],
        [-0.8072],
        [-1.1902],
        [-0.8574]])


In [4]:
print(x.requires_grad)

False


### 自动求导机制

In [5]:
x = torch.randn(4,1, requires_grad=True)
y = torch.randn(4,1, requires_grad=True)
W = torch.randn(4,4)
print(x)
print(y)
print(W)

tensor([[-0.3547],
        [-0.1142],
        [-2.5544],
        [ 0.0165]], requires_grad=True)
tensor([[-0.0953],
        [ 0.4071],
        [-0.6318],
        [ 1.0965]], requires_grad=True)
tensor([[ 1.7454, -0.5501,  1.0740, -0.3665],
        [ 0.2229, -0.6237, -0.8042, -1.5603],
        [ 1.1954,  0.5553, -0.1137, -2.4699],
        [-0.6802, -0.2414,  1.3333, -1.3997]])


In [6]:
torch.trace(W.mm(y).t().mm(x))

tensor(7.0989, grad_fn=<TraceBackward>)

In [7]:
x.mm(W.mm(y).t())

tensor([[ 0.5218,  0.5242,  0.8955,  0.8551],
        [ 0.1679,  0.1687,  0.2882,  0.2753],
        [ 3.7571,  3.7750,  6.4482,  6.1578],
        [-0.0242, -0.0244, -0.0416, -0.0397]], grad_fn=<MmBackward>)

In [8]:
torch.trace(x.mm(W.mm(y).t()))

tensor(7.0989, grad_fn=<TraceBackward>)

## $z = x^T W y $

In [9]:
z = torch.mm(torch.mm(torch.t(x), W),y)
print(z)

tensor([[7.0989]], grad_fn=<MmBackward>)


In [10]:
z = x.t().mm(W).mm(y)
print(z)

tensor([[7.0989]], grad_fn=<MmBackward>)


In [11]:
print(x.grad)

None


In [12]:
print(y.grad)

None


In [13]:
z.backward()

In [14]:
print(x.grad)

tensor([[-1.4708],
        [-1.4778],
        [-2.5243],
        [-2.4106]])


In [15]:
print(W.mm(y))

tensor([[-1.4708],
        [-1.4778],
        [-2.5243],
        [-2.4106]], grad_fn=<MmBackward>)


In [16]:
print(y.grad)

tensor([[-3.7095],
        [-1.1562],
        [ 0.0233],
        [ 6.5944]])


In [17]:
print(W.t().mm(x))

tensor([[-3.7095],
        [-1.1562],
        [ 0.0233],
        [ 6.5944]], grad_fn=<MmBackward>)


In [18]:
x.grad.zero_()

tensor([[0.],
        [0.],
        [0.],
        [0.]])

In [19]:
y.grad.zero_()

tensor([[0.],
        [0.],
        [0.],
        [0.]])

In [20]:
print(x.grad)

tensor([[0.],
        [0.],
        [0.],
        [0.]])


### 默认情况下，定义的tensor属性requires_grad为false

In [21]:
x = torch.randn(4,1, requires_grad=True)
print(x)
y = torch.mm(torch.t(x),x)
print(y)
y.backward()

tensor([[-0.9573],
        [ 0.5544],
        [-0.8530],
        [ 1.2618]], requires_grad=True)
tensor([[3.5437]], grad_fn=<MmBackward>)


In [22]:
print(x.grad)

tensor([[-1.9146],
        [ 1.1089],
        [-1.7061],
        [ 2.5237]])


In [23]:
print(y)
print(x.grad)
print(2*x)

tensor([[3.5437]], grad_fn=<MmBackward>)
tensor([[-1.9146],
        [ 1.1089],
        [-1.7061],
        [ 2.5237]])
tensor([[-1.9146],
        [ 1.1089],
        [-1.7061],
        [ 2.5237]], grad_fn=<MulBackward0>)


# 给定数据

In [24]:
import torch
torch.manual_seed(0)

x = torch.randn(10, 4, requires_grad=True)
W = torch.randn(4, 4, requires_grad=True)
y = torch.randn(10, 4, requires_grad=True)

print(x)
print(y)
print(W)

tensor([[-1.1258, -1.1524, -0.2506, -0.4339],
        [ 0.8487,  0.6920, -0.3160, -2.1152],
        [ 0.3223, -1.2633,  0.3500,  0.3081],
        [ 0.1198,  1.2377,  1.1168, -0.2473],
        [-1.3527, -1.6959,  0.5667,  0.7935],
        [ 0.5988, -1.5551, -0.3414,  1.8530],
        [-0.2159, -0.7425,  0.5627,  0.2596],
        [-0.1740, -0.6787,  0.9383,  0.4889],
        [ 1.2032,  0.0845, -1.2001, -0.0048],
        [-0.5181, -0.3067, -1.5810,  1.7066]], requires_grad=True)
tensor([[ 1.5091,  2.0820,  1.7067,  2.3804],
        [-1.1256, -0.3170, -1.0925, -0.0852],
        [ 0.3276, -0.7607, -1.5991,  0.0185],
        [-0.7504,  0.1854,  0.6211,  0.6382],
        [-0.0033, -0.5344,  1.1687,  0.3945],
        [ 1.9415,  0.7915, -0.0203, -0.4372],
        [-0.2188, -2.4351, -0.0729, -0.0340],
        [ 0.9625,  0.3492, -0.9215, -0.0562],
        [-0.6227, -0.4637,  1.9218, -0.4025],
        [ 0.1239,  1.1648,  0.9234,  1.3873]], requires_grad=True)
tensor([[ 0.2055, -0.4503, -0.5731, -0

# 目标函数 $f = ||max(XW,0)-Y||^2_F $

# $f = ||\hat{Y}-Y||^2_F $; $\hat{Y} = max(Z,0)$; $Z = XW$

In [25]:
# f = (torch.clamp(x.mm(W), 0) - y).pow(2).sum()

In [26]:
z = x.mm(W)
z.retain_grad()
print(z)

tensor([[ 0.0649, -1.2330, -0.1154,  0.8553],
        [ 4.1687,  1.0353, -1.0558, -3.5272],
        [-1.6094, -2.0869, -0.7125,  0.8028],
        [-0.3500,  2.1129,  0.3719, -1.6785],
        [-3.2240, -2.0529,  0.2291,  2.5247],
        [-3.1207, -3.0911, -0.2830,  3.2112],
        [-1.6198, -0.9920, -0.1762,  0.6243],
        [-2.4140, -0.8861, -0.0917,  0.6813],
        [ 1.8953, -0.6369, -0.5659, -0.1305],
        [-0.7464, -0.8685,  1.0108,  3.5132]], grad_fn=<MmBackward>)


In [27]:
y_hat = torch.clamp(z, 0)
y_hat.retain_grad()
print(y_hat)

tensor([[0.0649, 0.0000, 0.0000, 0.8553],
        [4.1687, 1.0353, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.8028],
        [0.0000, 2.1129, 0.3719, 0.0000],
        [0.0000, 0.0000, 0.2291, 2.5247],
        [0.0000, 0.0000, 0.0000, 3.2112],
        [0.0000, 0.0000, 0.0000, 0.6243],
        [0.0000, 0.0000, 0.0000, 0.6813],
        [1.8953, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 1.0108, 3.5132]], grad_fn=<ClampBackward>)


In [28]:
f = (y_hat - y).pow(2).sum()
print(f)

tensor(99.9048, grad_fn=<SumBackward0>)


In [29]:
# W.grad.zero_()
# print(W.grad)

In [30]:
f.backward()

## 直接求导

In [38]:
print(z.grad)
print(y_hat.grad)
print(W.grad)
print(x.grad)
print(y.grad)

tensor([[-2.8885, -0.0000, -0.0000, -3.0501],
        [10.5886,  2.7045,  0.0000,  0.0000],
        [-0.0000,  0.0000,  0.0000,  1.5687],
        [ 0.0000,  3.8551, -0.4984, -0.0000],
        [ 0.0000,  0.0000, -1.8791,  4.2604],
        [-0.0000, -0.0000,  0.0000,  7.2968],
        [ 0.0000,  0.0000,  0.0000,  1.3166],
        [-0.0000, -0.0000,  0.0000,  1.4750],
        [ 5.0359,  0.0000, -0.0000,  0.0000],
        [-0.0000, -0.0000,  0.1749,  4.2519]])
tensor([[-2.8885e+00, -4.1639e+00, -3.4134e+00, -3.0501e+00],
        [ 1.0589e+01,  2.7045e+00,  2.1849e+00,  1.7039e-01],
        [-6.5523e-01,  1.5214e+00,  3.1982e+00,  1.5687e+00],
        [ 1.5009e+00,  3.8551e+00, -4.9843e-01, -1.2764e+00],
        [ 6.6077e-03,  1.0689e+00, -1.8791e+00,  4.2604e+00],
        [-3.8829e+00, -1.5830e+00,  4.0504e-02,  7.2968e+00],
        [ 4.3767e-01,  4.8701e+00,  1.4583e-01,  1.3166e+00],
        [-1.9250e+00, -6.9834e-01,  1.8429e+00,  1.4750e+00],
        [ 5.0359e+00,  9.2744e-01, -3.8436e

## 公式推导求导

In [33]:
y_hat_grad = 2 * (y_hat - y)
print(y_hat_grad)

tensor([[-2.8885e+00, -4.1639e+00, -3.4134e+00, -3.0501e+00],
        [ 1.0589e+01,  2.7045e+00,  2.1849e+00,  1.7039e-01],
        [-6.5523e-01,  1.5214e+00,  3.1982e+00,  1.5687e+00],
        [ 1.5009e+00,  3.8551e+00, -4.9843e-01, -1.2764e+00],
        [ 6.6077e-03,  1.0689e+00, -1.8791e+00,  4.2604e+00],
        [-3.8829e+00, -1.5830e+00,  4.0504e-02,  7.2968e+00],
        [ 4.3767e-01,  4.8701e+00,  1.4583e-01,  1.3166e+00],
        [-1.9250e+00, -6.9834e-01,  1.8429e+00,  1.4750e+00],
        [ 5.0359e+00,  9.2744e-01, -3.8436e+00,  8.0509e-01],
        [-2.4780e-01, -2.3296e+00,  1.7491e-01,  4.2519e+00]],
       grad_fn=<MulBackward0>)


In [34]:
y_hat_grad[z <= 0] = 0
z_grad = y_hat_grad
print(z_grad)

tensor([[-2.8885,  0.0000,  0.0000, -3.0501],
        [10.5886,  2.7045,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  1.5687],
        [ 0.0000,  3.8551, -0.4984,  0.0000],
        [ 0.0000,  0.0000, -1.8791,  4.2604],
        [ 0.0000,  0.0000,  0.0000,  7.2968],
        [ 0.0000,  0.0000,  0.0000,  1.3166],
        [ 0.0000,  0.0000,  0.0000,  1.4750],
        [ 5.0359,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.1749,  4.2519]], grad_fn=<IndexPutBackward>)


In [45]:
W_grad = x.t().mm(z_grad)
print(W_grad)
print(W.grad==W_grad)

tensor([[ 18.2980,   2.7573,   2.3914,  -0.1974],
        [ 11.0817,   6.6428,   2.5163, -20.3225],
        [ -8.6662,   3.4506,  -1.8979,  -3.3608],
        [-21.1681,  -6.6739,  -1.0693,  27.0278]], grad_fn=<MmBackward>)
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


In [46]:
x_grad = z_grad.mm(W.t())
print(x_grad)
print(x.grad==x_grad)

tensor([[  1.1002,   0.0860,   5.3377,   0.2788],
        [  0.9583,  10.4633, -13.5234, -16.3639],
        [ -0.8712,  -0.9272,  -0.7764,   2.0790],
        [ -1.4504,   5.6914,   0.7613,  -0.9693],
        [ -1.2892,  -3.4714,  -1.9788,   4.8091],
        [ -4.0523,  -4.3127,  -3.6114,   9.6703],
        [ -0.7312,  -0.7782,  -0.6516,   1.7449],
        [ -0.8191,  -0.8718,  -0.7300,   1.9547],
        [  1.0350,   2.9930,  -6.6743,  -7.5333],
        [ -2.4616,  -2.4243,  -2.1164,   5.7128]], grad_fn=<MmBackward>)
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


In [47]:
y_grad = -2 * (y_hat - y)
print(y_grad)
print(y.grad==y_grad)

tensor([[ 2.8885e+00,  4.1639e+00,  3.4134e+00,  3.0501e+00],
        [-1.0589e+01, -2.7045e+00, -2.1849e+00, -1.7039e-01],
        [ 6.5523e-01, -1.5214e+00, -3.1982e+00, -1.5687e+00],
        [-1.5009e+00, -3.8551e+00,  4.9843e-01,  1.2764e+00],
        [-6.6077e-03, -1.0689e+00,  1.8791e+00, -4.2604e+00],
        [ 3.8829e+00,  1.5830e+00, -4.0504e-02, -7.2968e+00],
        [-4.3767e-01, -4.8701e+00, -1.4583e-01, -1.3166e+00],
        [ 1.9250e+00,  6.9834e-01, -1.8429e+00, -1.4750e+00],
        [-5.0359e+00, -9.2744e-01,  3.8436e+00, -8.0509e-01],
        [ 2.4780e-01,  2.3296e+00, -1.7491e-01, -4.2519e+00]],
       grad_fn=<MulBackward0>)
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


### PyTorch Tensor 与 Numpy 转换

In [39]:
import numpy as np
import torch

a = np.random.randn(3,4)
print(a)
print(type(a))

[[ 1.82343957  0.36097133  0.1333064   0.92750436]
 [-0.37621722 -0.16657024  2.01840463 -2.18181294]
 [ 0.99416876  0.34965001  0.54580467  0.66424866]]
<class 'numpy.ndarray'>


In [40]:
a_tensor = torch.from_numpy(a)
print(a_tensor)
print(type(a_tensor))

tensor([[ 1.8234,  0.3610,  0.1333,  0.9275],
        [-0.3762, -0.1666,  2.0184, -2.1818],
        [ 0.9942,  0.3497,  0.5458,  0.6642]], dtype=torch.float64)
<class 'torch.Tensor'>


In [41]:
b = a_tensor.numpy()
print(b)
print(type(b))

[[ 1.82343957  0.36097133  0.1333064   0.92750436]
 [-0.37621722 -0.16657024  2.01840463 -2.18181294]
 [ 0.99416876  0.34965001  0.54580467  0.66424866]]
<class 'numpy.ndarray'>


In [42]:
d_tensor = torch.randn(3, 4, requires_grad=False)
d_numpy = d_tensor.numpy()
print(d_numpy)
print(type(d_numpy))

[[-0.4583958  -0.33598807 -1.5699861   1.2315004 ]
 [ 1.3946317   1.1711024   0.43351194 -1.7342502 ]
 [-1.3360486   0.88709605  0.76795745  0.057113  ]]
<class 'numpy.ndarray'>


In [43]:
d_tensor = torch.randn(3, 4, requires_grad=True)
d_numpy = d_tensor.numpy()
print(d_numpy)
print(type(d_numpy))

RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead.

In [44]:
d_tensor = torch.randn(3, 4, requires_grad=True)
d_numpy = d_tensor.data.numpy()
print(d_numpy)
print(d_tensor.data)
print(type(d_numpy))

[[-0.6461524  -0.15909262 -1.7786636   0.84765124]
 [ 0.24594283 -0.13116787 -0.17851807 -0.5958931 ]
 [ 0.27386975  0.5679263  -0.67310244 -1.2095324 ]]
tensor([[-0.6462, -0.1591, -1.7787,  0.8477],
        [ 0.2459, -0.1312, -0.1785, -0.5959],
        [ 0.2739,  0.5679, -0.6731, -1.2095]])
<class 'numpy.ndarray'>
