In [1]:
import torch
print(torch.__version__)

ModuleNotFoundError: No module named 'torch'

In [2]:
x = torch.randn(4,1)

In [3]:
print(x)

tensor([[ 1.1229],
        [ 0.1073],
        [-1.6773],
        [-0.5879]])


In [4]:
print(x.requires_grad)

False


### 自动求导机制

In [41]:
x = torch.randn(4,1, requires_grad=True)
y = torch.randn(4,1, requires_grad=True)
W = torch.randn(4,4)
print(x)
print(y)
print(W)

tensor([[-0.4584],
        [-0.3360],
        [-1.5700],
        [ 1.2315]], requires_grad=True)
tensor([[ 1.3946],
        [ 1.1711],
        [ 0.4335],
        [-1.7343]], requires_grad=True)
tensor([[-0.2596,  0.1183,  0.2440,  1.1646],
        [ 0.2886,  0.3866, -0.2011, -0.1179],
        [ 0.1922, -0.7722, -1.9003,  0.1307],
        [-0.7043,  0.3147,  0.1574,  0.3854]])


In [42]:
torch.trace(W.mm(y).t().mm(x))

tensor(1.8064, grad_fn=<TraceBackward0>)

In [43]:
x.mm(W.mm(y).t())

tensor([[ 0.9798, -0.4458,  0.7732,  0.5564],
        [ 0.7182, -0.3268,  0.5667,  0.4078],
        [ 3.3558, -1.5269,  2.6481,  1.9056],
        [-2.6323,  1.1977, -2.0771, -1.4947]], grad_fn=<MmBackward0>)

In [None]:
torch.trace(x.mm(W.mm(y).t()))

## $z = x^T W y $

In [6]:
z = torch.mm(torch.mm(torch.t(x), W),y)
print(z)

tensor([[-0.3186]], grad_fn=<MmBackward>)


In [7]:
z = x.t().mm(W).mm(y)
print(z)

tensor([[-0.3186]], grad_fn=<MmBackward>)


In [8]:
print(x.grad)

None


In [9]:
print(y.grad)

None


In [10]:
z.backward()

In [11]:
print(x.grad)

tensor([[-0.8995],
        [-1.0417],
        [-0.3020],
        [ 0.4710]])


In [12]:
print(W.mm(y))

tensor([[-0.8995],
        [-1.0417],
        [-0.3020],
        [ 0.4710]], grad_fn=<MmBackward>)


In [13]:
print(y.grad)

tensor([[ 1.3102],
        [-0.3058],
        [ 1.7528],
        [ 0.0732]])


In [14]:
print(W.t().mm(x))

tensor([[ 1.3102],
        [-0.3058],
        [ 1.7528],
        [ 0.0732]], grad_fn=<MmBackward>)


In [15]:
x.grad.zero_()

tensor([[0.],
        [0.],
        [0.],
        [0.]])

In [16]:
y.grad.zero_()

tensor([[0.],
        [0.],
        [0.],
        [0.]])

In [17]:
print(x.grad)

tensor([[0.],
        [0.],
        [0.],
        [0.]])


### 默认情况下，定义的tensor属性requires_grad为false

In [19]:
x = torch.randn(4,1, requires_grad=True)
print(x)
y = torch.mm(torch.t(x),x)
print(y)
y.backward()

tensor([[ 1.2129],
        [-0.6779],
        [ 1.8371],
        [-0.1568]], requires_grad=True)
tensor([[5.3301]], grad_fn=<MmBackward>)


In [20]:
print(x.grad)

tensor([[ 2.4259],
        [-1.3558],
        [ 3.6741],
        [-0.3135]])


In [21]:
print(y)

print(x.grad)

print(2*x)

tensor([[5.3301]], grad_fn=<MmBackward>)
tensor([[ 2.4259],
        [-1.3558],
        [ 3.6741],
        [-0.3135]])
tensor([[ 2.4259],
        [-1.3558],
        [ 3.6741],
        [-0.3135]], grad_fn=<MulBackward0>)


# 给定数据

In [23]:
import torch
torch.manual_seed(0)

x = torch.randn(10,4)
W = torch.randn(4,4)
y = torch.randn(10,4, requires_grad=True)


print(x)
print(y)
print(W)

tensor([[-1.1258, -1.1524, -0.2506, -0.4339],
        [ 0.8487,  0.6920, -0.3160, -2.1152],
        [ 0.3223, -1.2633,  0.3500,  0.3081],
        [ 0.1198,  1.2377,  1.1168, -0.2473],
        [-1.3527, -1.6959,  0.5667,  0.7935],
        [ 0.5988, -1.5551, -0.3414,  1.8530],
        [-0.2159, -0.7425,  0.5627,  0.2596],
        [-0.1740, -0.6787,  0.9383,  0.4889],
        [ 1.2032,  0.0845, -1.2001, -0.0048],
        [-0.5181, -0.3067, -1.5810,  1.7066]])
tensor([[ 1.5091,  2.0820,  1.7067,  2.3804],
        [-1.1256, -0.3170, -1.0925, -0.0852],
        [ 0.3276, -0.7607, -1.5991,  0.0185],
        [-0.7504,  0.1854,  0.6211,  0.6382],
        [-0.0033, -0.5344,  1.1687,  0.3945],
        [ 1.9415,  0.7915, -0.0203, -0.4372],
        [-0.2188, -2.4351, -0.0729, -0.0340],
        [ 0.9625,  0.3492, -0.9215, -0.0562],
        [-0.6227, -0.4637,  1.9218, -0.4025],
        [ 0.1239,  1.1648,  0.9234,  1.3873]], requires_grad=True)
tensor([[ 0.2055, -0.4503, -0.5731, -0.5554],
        [ 0.

### 目标函数 $f = ||max(XW,0)-Y||^2_F $

### $f = ||\hat{Y}-Y||^2_F $; $\hat{Y} = max(Z,0)$; $Z = XW$

In [None]:
# f = (torch.clamp(x.mm(W), 0) - y).pow(2).sum()    #clamp 将范围统一到（min,max）之间

In [24]:
z = x.mm(W)

In [25]:
y_hat = torch.clamp(z, 0)

In [26]:
f = (y_hat - y).pow(2).sum()

In [27]:
print(f)

tensor(99.9048, grad_fn=<SumBackward0>)


In [28]:
print(f)

tensor(99.9048, grad_fn=<SumBackward0>)


In [29]:
# W.grad.zero_()
print(W.grad)


None


In [30]:
f.backward()

## 直接求导

In [31]:
print(W.grad)
print(y.grad)

None
tensor([[ 2.8885e+00,  4.1639e+00,  3.4134e+00,  3.0501e+00],
        [-1.0589e+01, -2.7045e+00, -2.1849e+00, -1.7039e-01],
        [ 6.5523e-01, -1.5214e+00, -3.1982e+00, -1.5687e+00],
        [-1.5009e+00, -3.8551e+00,  4.9843e-01,  1.2764e+00],
        [-6.6077e-03, -1.0689e+00,  1.8791e+00, -4.2604e+00],
        [ 3.8829e+00,  1.5830e+00, -4.0504e-02, -7.2968e+00],
        [-4.3767e-01, -4.8701e+00, -1.4583e-01, -1.3166e+00],
        [ 1.9250e+00,  6.9834e-01, -1.8429e+00, -1.4750e+00],
        [-5.0359e+00, -9.2744e-01,  3.8436e+00, -8.0509e-01],
        [ 2.4780e-01,  2.3296e+00, -1.7491e-01, -4.2519e+00]])


## 公式推导求导

In [32]:
y_hat_grad = -2 * (y_hat - y)
y_grad_gt = y.grad
# print(y_hat_grad)
print(torch.equal(y_hat_grad, y_grad_gt))
# print(y_hat_grad)

True


In [33]:
z_grad = 

# print(z_grad)

In [34]:
W_grad = 

tensor([[ 18.2980,   2.7573,   2.3914,  -0.1974],
        [ 11.0817,   6.6428,   2.5163, -20.3225],
        [ -8.6662,   3.4506,  -1.8979,  -3.3608],
        [-21.1681,  -6.6739,  -1.0693,  27.0278]], grad_fn=<MmBackward>)


### PyTorch Tensor 与 Numpy 转换

In [None]:
import numpy as np
import torch

a = np.random.randn(3,4)
print(a)
print(type(a))

In [None]:
a_tensor = torch.from_numpy(a)
print(a_tensor)
print(type(a_tensor))

In [None]:
b = a_tensor.numpy()
print(b)
print(type(b))

In [None]:
d_tensor = torch.randn(3, 4, requires_grad=False)
d_numpy = d_tensor.numpy()
print(d_numpy)
print(type(d_numpy))

In [None]:
d_tensor = torch.randn(3, 4, requires_grad=True)
d_numpy = d_tensor.numpy()
print(d_numpy)
print(type(d_numpy))

In [None]:
d_tensor = torch.randn(3, 4, requires_grad=True)
d_numpy = d_tensor.data.numpy()
print(d_numpy)
print(d_tensor.data)
print(type(d_numpy))