In [141]:
import numpy as np
import torch

print(torch.__version__)

1.3.1


注意点 saving memory
- `torch.tensor()` always copies data. If you have a Tensor data and just want to change its requires_grad flag, use `requires_grad_()` or `detach()` to avoid a copy. If you have a numpy array and want to avoid a copy, use `torch.as_tensor()`
- 

---

```
# https://pytorch.org/docs/stable/tensors.html?highlight=view#
cuda0 = torch.device('cuda:0')
torch.ones([2, 4], dtype=torch.float64, device=cuda0)
```

### arange, shape, size

#### numpy

In [142]:
# numpy
x = np.arange(12)
x, x.shape, x.size

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11]), (12,), 12)

In [150]:
x_reshape = x.reshape(3, -1)
x_reshape, x_reshape.shape, x_reshape.size

(tensor([[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]]), torch.Size([3, 4]), <function Tensor.size>)

#### pytorch

In [151]:
# pytorch
# tensor.size() 和 numpy 作用的不同
x = torch.arange(12)
x, x.shape, x.size()

(tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11]),
 torch.Size([12]),
 torch.Size([12]))

In [153]:
x_reshape = x.reshape(3, -1)
x_reshape, x_reshape.shape, x_reshape.size(), x.nelement()

(tensor([[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]]), torch.Size([3, 4]), torch.Size([3, 4]), 12)

### 初始化: empty, zeros, ones

#### numpy

In [154]:
# np.empty_like
# np.zeros_like
# np.ones_like
np.empty((4,6)), np.zeros((2,3)), np.ones((2,3))

(array([[8.39911598e-323, 0.00000000e+000, 0.00000000e+000,
         4.82337433e+228, 6.14415221e-144, 1.16097020e-028],
        [2.57706778e-057, 9.71810387e-067, 7.71446427e-043,
         3.97062373e+246, 1.16318408e-028, 1.51981376e-051],
        [1.22613517e-046, 5.99059159e-066, 1.57401788e+161,
         5.04621361e+180, 8.37170571e-144, 5.01163195e+217],
        [5.49419094e-143, 1.50008929e+248, 3.75598257e+199,
         8.01238364e+165, 3.26756682e-032, 2.47197850e-056]]),
 array([[0., 0., 0.],
        [0., 0., 0.]]),
 array([[1., 1., 1.],
        [1., 1., 1.]]))

In [155]:
np.random.normal(0, 1, size=(2,3))

array([[-0.74800845, -2.66295144,  0.14097163],
       [-0.21949403, -0.35966057, -0.7769174 ]])

#### pytorch

In [156]:
# ~_like
torch.empty((2,3)), torch.zeros((2,3)), torch.ones((2,3))

(tensor([[ 0.0000e+00, -4.6566e-10,  0.0000e+00],
         [-4.6566e-10,  1.4013e-44, -0.0000e+00]]), tensor([[0., 0., 0.],
         [0., 0., 0.]]), tensor([[1., 1., 1.],
         [1., 1., 1.]]))

In [157]:
# normal distributions of mean=0, variance=1
torch.randn(2, 3)

tensor([[ 0.1203,  0.0653,  1.1552],
        [ 0.0687, -1.2439, -1.6453]])

In [158]:
torch.normal(2, 3, size=(1, 4))

tensor([[1.1110, 0.0872, 0.2613, 5.9212]])

#### pytorch <-> numpy

In [159]:
torch.tensor([1.,2.]).numpy()

array([1., 2.], dtype=float32)

In [160]:
# torch.from_numpy(np.array([1.,2.]))
torch.tensor(np.array([1.,2.]))

tensor([1., 2.], dtype=torch.float64)

### Operation: basic, broadcasting, indexing, slicing

#### numpy

In [161]:
x = np.array([1, 2, 4, 8])
y = np.array([2, 2, 2, 2])
x + y, x - y, x * y, x / y, x ** y  # The ** operator is exponentiation

(array([ 3,  4,  6, 10]),
 array([-1,  0,  2,  6]),
 array([ 2,  4,  8, 16]),
 array([0.5, 1. , 2. , 4. ]),
 array([ 1,  4, 16, 64]))

In [162]:
# Broadcasting
x = x.reshape(1, 4)
y = y.reshape(4, 1)
x + y

array([[ 3,  4,  6, 10],
       [ 3,  4,  6, 10],
       [ 3,  4,  6, 10],
       [ 3,  4,  6, 10]])

In [163]:
# Indexing and Slicing
x = np.arange(12).reshape((3,4))
print(x, end='\n\n') 
print(x[-1], end='\n\n')
print(x[1:3], end='\n\n') 
print(x[:,1:3], end='\n\n')

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

[ 8  9 10 11]

[[ 4  5  6  7]
 [ 8  9 10 11]]

[[ 1  2]
 [ 5  6]
 [ 9 10]]



#### pytorch

In [164]:
x = torch.tensor([1, 2, 4, 8])
y = torch.tensor([2, 2, 2, 2])
x + y, x - y, x * y, x / y, x ** y

(tensor([ 3,  4,  6, 10]),
 tensor([-1,  0,  2,  6]),
 tensor([ 2,  4,  8, 16]),
 tensor([0, 1, 2, 4]),
 tensor([ 1,  4, 16, 64]))

In [165]:
x**2, x*x, x.pow(2)

(tensor([ 1,  4, 16, 64]), tensor([ 1,  4, 16, 64]), tensor([ 1,  4, 16, 64]))

In [166]:
# Broadcasting
x = x.reshape(4, 1)
y = y.reshape(1, 4)
x + y

tensor([[ 3,  3,  3,  3],
        [ 4,  4,  4,  4],
        [ 6,  6,  6,  6],
        [10, 10, 10, 10]])

In [167]:
# Indexing and Slicing
x = torch.arange(12).reshape((3,4))
print(x, end='\n\n') 
print(x[-1], end='\n\n')
print(x[1:3], end='\n\n') 
print(x[:,1:3], end='\n\n')

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

tensor([ 8,  9, 10, 11])

tensor([[ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

tensor([[ 1,  2],
        [ 5,  6],
        [ 9, 10]])



### Operation: Hadamard product, reduction (sum, mean, cumsum)

#### numpy

In [177]:
A = np.arange(12).reshape((3, 4))
print(A, end='\n\n')
print(A*A)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

[[  0   1   4   9]
 [ 16  25  36  49]
 [ 64  81 100 121]]


In [178]:
print(A.sum(), end='\n\n')
print(A.sum(axis=0), end='\n\n')
print(A.sum(axis=1), end='\n\n')
print(A.sum(axis=1, keepdims=True), end='\n\n')
print(a.sum(axis=0).sum(axis=0), end='\n\n')

66

[12 15 18 21]

[ 6 22 38]

[[ 6]
 [22]
 [38]]

tensor(20.6578)



In [180]:
print(A.mean(), A.sum()/A.size)
print(A.mean(axis=0), A.sum(axis=0)/A.shape[0])
print(A.mean(axis=1), A.sum(axis=1)/A.shape[1])

5.5 5.5
[4. 5. 6. 7.] [4. 5. 6. 7.]
[1.5 5.5 9.5] [1.5 5.5 9.5]


In [181]:
A.cumsum(axis=0)

array([[ 0,  1,  2,  3],
       [ 4,  6,  8, 10],
       [12, 15, 18, 21]])

#### pytorch

In [182]:
A = torch.arange(12, dtype=torch.float32).reshape((3, 4))
print(A, end='\n\n')
print(A*A)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])

tensor([[  0.,   1.,   4.,   9.],
        [ 16.,  25.,  36.,  49.],
        [ 64.,  81., 100., 121.]])


In [183]:
print(A.sum(), end='\n\n')
print(A.sum(dim=0), end='\n\n')
print(A.sum(dim=1), end='\n\n')
print(A.sum(dim=1, keepdim=True), end='\n\n')
print(A.sum(dim=[0,1]), end='\n\n')

tensor(66.)

tensor([12., 15., 18., 21.])

tensor([ 6., 22., 38.])

tensor([[ 6.],
        [22.],
        [38.]])

tensor(66.)



In [184]:
print(A.mean(), A.sum()/A.nelement())
print(A.mean(dim=0), A.sum(dim=0)/A.shape[0])

tensor(5.5000) tensor(5.5000)
tensor([4., 5., 6., 7.]) tensor([4., 5., 6., 7.])


### Operation: Linear Algebra: matrix\*vector, matrix\*matrix, norms

#### numpy

In [185]:
x = np.array([3., 4.])
A = np.ones((4, 9))
print('l1', np.linalg.norm(x, ord=1), np.abs(x).sum())
print('l2', np.linalg.norm(x))
print('Frobenius norm', np.linalg.norm(A))

l1 7.0 7.0
l2 5.0
Frobenius norm 6.0


#### pytorch

In [186]:
x = torch.tensor([3., 4.])
A = torch.ones((4, 9))
print('l1', torch.norm(x, p=1), torch.abs(x).sum())
print('l2', torch.norm(x))
print('Frobenius norm', torch.norm(A))

l1 tensor(7.) tensor(7.)
l2 tensor(5.)
Frobenius norm tensor(6.)


#### numpy

In [187]:
A = np.arange(12).reshape((3,4))
B = np.arange(12, 24).reshape((4,3))
x = np.arange(4)
A, B, x

print(A.dot(x), end='\n\n')
print(A.dot(B), end='\n\n')

[14 38 62]

[[114 120 126]
 [378 400 422]
 [642 680 718]]



#### pytorch

In [188]:
A = torch.arange(12).reshape((3,4))
B = torch.arange(12, 24).reshape((4,3))
x = torch.arange(4)
A, B, x

print(torch.mv(A, x), end='\n\n')
print(torch.mm(A, B), end='\n\n')

tensor([14, 38, 62])

tensor([[114, 120, 126],
        [378, 400, 422],
        [642, 680, 718]])



### saving memory

`+=`, `[:]` 操作都不会创建新的对象, 而会复用内存

In [189]:
# https://d2l.ai/chapter_preliminaries/ndarray.html#saving-memory

a = np.random.normal(size=(2,3))
print(id(a))
a += 1
print(id(a))
a[:] = a + 1
print(id(a))
a = a + 1
print(id(a))

4871180736
4871180736
4871180736
4871181616


In [190]:
a = torch.randn(size=(2,3))
print(id(a))
a += 1
print(id(a))
a[:] = a + 1
print(id(a))
a = a + 1
print(id(a))

4841101712
4841101712
4841101712
4836010312
