# PyTorch基础

In [1]:
%matplotlib inline

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision

In [3]:
from pprint import pprint

import matplotlib.pyplot as plt
import numpy as np
from IPython.core.debugger import set_trace

# Tensors
- pytorch
- tf
- numpy
- ...  
所有数值计算的基础。

## numpy和pytorch中的Tensors的用法

In [4]:
import numpy as np
from numpy.linalg import inv
from numpy.linalg import multi_dot as mdot

In [5]:
# numpy
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [6]:
# torch
torch.eye(3)

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [7]:
# numpy
X = np.random.random((5, 3))
X

array([[0.1324223 , 0.67456006, 0.83705422],
       [0.87641656, 0.64720623, 0.21189409],
       [0.70997199, 0.87370658, 0.42398806],
       [0.81573078, 0.57613066, 0.02361098],
       [0.27251299, 0.52769473, 0.08677278]])

In [8]:
# pytorch
Y = torch.rand((5, 3))
Y

tensor([[0.3334, 0.8665, 0.8952],
        [0.5611, 0.6842, 0.6112],
        [0.1427, 0.7762, 0.9140],
        [0.4560, 0.7681, 0.2065],
        [0.6011, 0.5460, 0.9433]])

In [9]:
X.shape

(5, 3)

In [10]:
Y.shape, Y.size()

(torch.Size([5, 3]), torch.Size([5, 3]))

In [11]:
# numpy
np.matmul(X.T, X)
#X.T @ X

array([[2.02938192, 1.89062743, 0.64047871],
       [1.89062743, 2.24765863, 1.13161624],
       [0.64047871, 1.13161624, 0.93341175]])

In [12]:
# torch
torch.matmul(Y.t(), Y)
#Y.t() @ Y

tensor([[1.0156, 1.4621, 1.4331],
        [1.4621, 2.7096, 2.5771],
        [1.4331, 2.5771, 2.9429]])

In [13]:
# numpy
inv(X.T @ X)

array([[ 5.79826534, -7.37666742,  4.96446628],
       [-7.37666742, 10.5266196 , -7.70024083],
       [ 4.96446628, -7.70024083,  7.00021468]])

In [14]:
# torch
torch.inverse(Y.t() @ Y)

tensor([[ 4.4840, -2.0511, -0.3874],
        [-2.0511,  3.1465, -1.7566],
        [-0.3874, -1.7566,  2.0667]])

## 更多PyTorch Tensors

常用运算方法作为Tensors Class的method  
`.add()`

In [15]:
A = torch.eye(3)
A.add(1)

tensor([[2., 1., 1.],
        [1., 2., 1.],
        [1., 1., 2.]])

In [16]:
A

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

inplace运算方法，`.add_()`

In [17]:
A.add_(1)
A

tensor([[2., 1., 1.],
        [1., 2., 1.],
        [1., 1., 2.]])

## Indexing和broadcasting
和numpy原理相同

In [18]:
A = torch.eye(3)
A

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [19]:
A[0, 0]

tensor(1.)

In [20]:
A[0]

tensor([1., 0., 0.])

In [21]:
A[0:2]

tensor([[1., 0., 0.],
        [0., 1., 0.]])

In [22]:
A[:, 1:3]

tensor([[0., 0.],
        [1., 0.],
        [0., 1.]])

In [23]:
A + 1

tensor([[2., 1., 1.],
        [1., 2., 1.],
        [1., 1., 2.]])

In [24]:
A + torch.tensor([1,0,0])

tensor([[2., 0., 0.],
        [1., 1., 0.],
        [1., 0., 1.]])

## pytorch和numpy Tensors之间转换

In [25]:
A = torch.eye(3)
A

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [26]:
# torch --> numpy
B = A.numpy()
B

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]], dtype=float32)

注意: torch变量和numpy变量可以共享内存

In [27]:
#修改变量A，B也跟着变了
A.add_(.5)
A

tensor([[1.5000, 0.5000, 0.5000],
        [0.5000, 1.5000, 0.5000],
        [0.5000, 0.5000, 1.5000]])

In [28]:
B

array([[1.5, 0.5, 0.5],
       [0.5, 1.5, 0.5],
       [0.5, 0.5, 1.5]], dtype=float32)

In [29]:
# numpy --> torch
torch.from_numpy(np.eye(3))

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]], dtype=torch.float64)

## 现在使用GPU

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

假如你的机器上装有GPU，可以看到: 
`device(type='cuda', index=0)`

将数据移到GPU上 `.to(device)`.

In [5]:
data = torch.eye(3)
data.to(device)

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]], device='cuda:0')

注意：此时，所有计算发生在GPU上

In [6]:
res = data + data
res

tensor([[2., 0., 0.],
        [0., 2., 0.],
        [0., 0., 2.]])

In [7]:
res.device

device(type='cpu')

# 自动求导 `.backward()`

你只需要`torch.ones(3, requires_grad=True)`

参考:
- https://pytorch.org/docs/stable/autograd.html
- https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html

求$y = x ^ 2$的导数

In [8]:
x = torch.tensor(2.)
x

tensor(2.)

In [9]:
#先定义变量x是可导的
x = torch.tensor(2., requires_grad=True)
x

tensor(2., requires_grad=True)

In [10]:
print(x.requires_grad)

True


In [11]:
print(x.grad)

None


In [12]:
y = x ** 2

print("导数 x:", x.grad)

导数 x: None


In [13]:
##多次运行会发生什么？
y = x ** 2
y.backward()

print("导数 x:", x.grad)

导数 x: tensor(4.)


In [14]:
x = torch.tensor(2., requires_grad=False)
x.backward()

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [15]:
# 推断时，关掉自动求导机制

params = torch.tensor(2., requires_grad=True)
#。。。更新参数

with torch.no_grad():
    y = x * x
    print(x.grad_fn)

None
