# 预备知识

In [23]:
import torch

In [4]:
x = torch.arange(12)
x

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [5]:
x.shape

torch.Size([12])

In [9]:
x.reshape(3, -1) # 使用 -1 进行省略计算

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

In [8]:
x.numel() # num_elements 元素的总数

12

In [10]:
torch.tensor([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])

tensor([[2, 1, 4, 3],
        [1, 2, 3, 4],
        [4, 3, 2, 1]])

In [11]:
X = torch.arange(12, dtype=torch.float32).reshape((3,4)) 
Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]]) 
torch.cat((X, Y), dim=0), torch.cat((X, Y), dim=1)

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [ 2.,  1.,  4.,  3.],
         [ 1.,  2.,  3.,  4.],
         [ 4.,  3.,  2.,  1.]]),
 tensor([[ 0.,  1.,  2.,  3.,  2.,  1.,  4.,  3.],
         [ 4.,  5.,  6.,  7.,  1.,  2.,  3.,  4.],
         [ 8.,  9., 10., 11.,  4.,  3.,  2.,  1.]]))

In [12]:
# 使用切片赋值，避免多余的内存开销，numpy 也是一样的原理
# X += Y 也可以避免内存开销
Z = torch.zeros_like(Y) 
print('id(Z):', id(Z)) 
Z[:] = X + Y 
print('id(Z):', id(Z))

id(Z): 140517752776160
id(Z): 140517752776160


In [18]:
import numpy as np

Z = np.zeros_like(Y) 
print('id(Z):', id(Z)) 
# Z[:] = X + Y 
Z += Z
print('id(Z):', id(Z))

id(Z): 140517752752816
id(Z): 140517752752816


In [22]:
a = np.array(3) + np.array(4)
type(a), type(np.array(3))

(numpy.int64, numpy.ndarray)

In [27]:
np.arange(3)[:] # tensor 有个 clone 可以直接复制内存

array([0, 1, 2])

In [30]:
torch.mv, torch.mm

(<function torch._VariableFunctionsClass.mv>,
 <function torch._VariableFunctionsClass.mm>)

In [41]:
# 自动微分
x = torch.arange(4.0, requires_grad=True)
x

tensor([0., 1., 2., 3.], requires_grad=True)

In [42]:
y = 2 * torch.dot(x, x) # 构建计算图，并且关联到 y
y

tensor(28., grad_fn=<MulBackward0>)

In [43]:
y.backward() # 反向传播自动计算 y 关于 x 的梯度
x.grad

tensor([ 0.,  4.,  8., 12.])

In [44]:
4 * x # 验证计算是否正确

tensor([ 0.,  4.,  8., 12.], grad_fn=<MulBackward0>)

In [45]:
# 清除梯度重新计算另一个
x.grad.zero_()
y = x.sum()
y.backward()
x.grad

tensor([1., 1., 1., 1.])

In [46]:
# 对非标量调用backward需要传入一个gradient参数,该参数指定微分函数关于self的梯度。
# 本例只想求偏导数的和,所以传递一个1的梯度是合适的
x.grad.zero_() 
y = x * x 
# 等价于y.backward(torch.ones(len(x))) 
y.sum().backward() 
x.grad

tensor([0., 2., 4., 6.])

In [None]:
# 可以解离一个变量，使得后面的计算中忽略这部分的梯度（视为常数）

In [47]:
# 

tensor(0.2314, requires_grad=True)

In [48]:
# notebook 的技巧：?
list? # 显示帮助

[0;31mInit signature:[0m [0mlist[0m[0;34m([0m[0miterable[0m[0;34m=[0m[0;34m([0m[0;34m)[0m[0;34m,[0m [0;34m/[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Built-in mutable sequence.

If no argument is given, the constructor creates a new empty list.
The argument must be an iterable if specified.
[0;31mType:[0m           type
[0;31mSubclasses:[0m     _HashedSeq, StackSummary, ParamSpec, _ConcatenateGenericAlias, _Threads, ConvertingList, DeferredConfigList, _ymd, SList, _ImmutableLineList, ...

In [50]:
list?? # 显示源代码

[0;31mInit signature:[0m [0mlist[0m[0;34m([0m[0miterable[0m[0;34m=[0m[0;34m([0m[0;34m)[0m[0;34m,[0m [0;34m/[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Built-in mutable sequence.

If no argument is given, the constructor creates a new empty list.
The argument must be an iterable if specified.
[0;31mType:[0m           type
[0;31mSubclasses:[0m     _HashedSeq, StackSummary, ParamSpec, _ConcatenateGenericAlias, _Threads, ConvertingList, DeferredConfigList, _ymd, SList, _ImmutableLineList, ...