In [1]:
import numpy as np
import torch

## Tensor初始化

### 直接初始化

In [2]:
data = [[4,5,6],[7,8,9]]

x_data = torch.tensor(data)

### NumPy数组初始化

In [3]:
np_array = np.array(data)

x_np = torch.from_numpy(np_array)

### 从另一个Tensor初始化

In [4]:
x_ones = torch.ones_like(x_data)

In [5]:
x_ones

tensor([[1, 1, 1],
        [1, 1, 1]])

In [6]:
x_rand = torch.rand_like(x_data, dtype=torch.float)

In [7]:
x_rand

tensor([[0.5633, 0.4191, 0.5487],
        [0.3460, 0.0721, 0.8198]])

### 常量

In [8]:
shape = (2, 3)

In [9]:
torch.ones(shape)

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [10]:
torch.zeros(shape)

tensor([[0., 0., 0.],
        [0., 0., 0.]])

### 随机值

In [11]:
torch.rand(shape)

tensor([[0.6829, 0.4193, 0.5559],
        [0.9714, 0.6573, 0.6876]])

In [12]:
torch.randn(shape)

tensor([[-0.7272, -1.0632, -0.5874],
        [-1.2113,  0.1305,  1.4179]])

## Tensor属性

In [15]:
tensor = torch.rand(3, 4)

### Shape

In [16]:
tensor.shape

torch.Size([3, 4])

### 元素数据类型

In [17]:
tensor.dtype

torch.float32

### 存储设备

In [18]:
tensor.device

device(type='cpu')

## Tensor操作

In [19]:
if torch.cuda.is_available():
    tensor = tensor.to("cuda")

In [21]:
tensor = torch.ones(4, 4)

In [22]:
tensor[:, 1] = 0

In [23]:
tensor

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])

### 拼接

In [24]:
t1 = torch.cat([tensor, tensor, tensor], dim=1)

In [25]:
t1

tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.]])

In [26]:
t1.shape

torch.Size([4, 12])

### 乘法

In [37]:
tensor * tensor

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])

In [38]:
tensor.mul(tensor)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])

In [39]:
tensor.matmul(tensor)

tensor([[3., 0., 3., 3.],
        [3., 0., 3., 3.],
        [3., 0., 3., 3.],
        [3., 0., 3., 3.]])

In [40]:
tensor @ tensor

tensor([[3., 0., 3., 3.],
        [3., 0., 3., 3.],
        [3., 0., 3., 3.],
        [3., 0., 3., 3.]])

In [41]:
tensor @ tensor.T

tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]])

### 原址操作

In [42]:
tensor.add_(5)

tensor([[6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.]])

In [43]:
tensor

tensor([[6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.]])

In [46]:
tensor.t()

tensor([[6., 6., 6., 6.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [6., 6., 6., 6.]])

有`_`结尾的函数通常对Tensor进行原址修改。

## 与NumPy的联系

在CPU上的Tensor和NumPy数组可以共享内存，改变一个的数据，另一个也自然会变。

### Tensor->NumPy数组

In [48]:
t = torch.ones(5)

In [49]:
t

tensor([1., 1., 1., 1., 1.])

In [50]:
n = t.numpy()

In [51]:
n

array([1., 1., 1., 1., 1.], dtype=float32)

In [52]:
t.add_(1)

tensor([2., 2., 2., 2., 2.])

In [53]:
t

tensor([2., 2., 2., 2., 2.])

In [54]:
n

array([2., 2., 2., 2., 2.], dtype=float32)

### NumPy数组->Tensor

In [55]:
n = np.ones(5)

In [57]:
t = torch.from_numpy(n)

In [58]:
t

tensor([1., 1., 1., 1., 1.], dtype=torch.float64)

In [59]:
np.add(n, 1, out=n)

array([2., 2., 2., 2., 2.])

In [60]:
n

array([2., 2., 2., 2., 2.])

In [61]:
t

tensor([2., 2., 2., 2., 2.], dtype=torch.float64)

## torch.autograd

In [62]:
import torchvision

In [63]:
model = torchvision.models.resnet18(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to C:\Users\96233/.cache\torch\checkpoints\resnet18-5c106cde.pth


HBox(children=(FloatProgress(value=0.0, max=46827520.0), HTML(value='')))




In [64]:
data = torch.rand(1, 3, 64, 64)

In [65]:
labels = torch.rand(1, 1000)

In [66]:
prediction = model(data)

In [68]:
prediction.shape

torch.Size([1, 1000])

In [69]:
loss = (prediction - labels).sum()

In [70]:
loss

tensor(-493.9296, grad_fn=<SumBackward0>)

In [71]:
loss.backward()

每个参数的梯度会存储在其`grad`属性中。

调用优化器的`step()`函数来启动梯度下降。优化器通过存储在参数`grad`属性中的梯度来调整每个参数。

In [77]:
optim.step()

## Autograd中的微分

In [87]:
a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)

a，b Tensor的`requires_grad=True`参数表明Autograd会追踪所有在二者上的操作，以计算梯度。

如果不需要计算的话，可以设置其为`False`。

In [88]:
Q = 3*a**3 - b**2

In [89]:
external_grad = torch.tensor([1., 1.])

In [90]:
Q.backward(gradient=external_grad)

In [91]:
a.grad

tensor([36., 81.])

In [92]:
b.grad

tensor([-12.,  -8.])

In [93]:
a.grad == 9 * a ** 2

tensor([True, True])

In [94]:
b.grad == -2 * b

tensor([True, True])

有时候我们想在已训练好的模型上进行细微的调整：

In [96]:
model = torchvision.models.resnet18(pretrained=True)

for param in model.parameters():
    param.requires_grad = False

将分类类别改为10：

In [98]:
from torch import nn

In [99]:
model.fc = nn.Linear(512, 10)

In [101]:
from torch import optim

In [102]:
optimizer = optim.SGD(model.fc.parameters(), lr=1e-2, momentum=0.9)

In [105]:
for param in model.fc.parameters():
    print(param.requires_grad)

True
True


torch.no_grad: https://pytorch.org/docs/stable/generated/torch.no_grad.html

In [109]:
x = torch.tensor([1.], requires_grad=True)

In [110]:
y = x + 1

In [111]:
y.requires_grad

True

In [112]:
with torch.no_grad():
    z = x + 1
    print(z.requires_grad)

False


In [114]:
@torch.no_grad()
def doubler(x):
    return x * 2

In [115]:
z = doubler(x)
z.requires_grad

False

## 神经网络

![](./Images/1/1.png)

### 定义网络结构

In [116]:
import torch.nn.functional as F

In [117]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channel, 3x3 square convolution
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 6 * 6, 120) # 6x6 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [118]:
net = Net()
net

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [119]:
params = list(net.parameters())
print(len(params))
print(params[0].size())  # conv1's .weight

10
torch.Size([6, 1, 3, 3])


In [124]:
x = torch.randn(1, 1, 32, 32)

In [126]:
out = net(x)

In [127]:
net.zero_grad()

In [128]:
out.backward(torch.randn(1, 10))

### 损失函数

In [132]:
output = net(x)
target = torch.randn(10)  # a dummy target, for example
target = target.view(1, -1)  # make it the same shape as output
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

tensor(0.7090, grad_fn=<MseLossBackward>)


### 反向传播

In [133]:
net.zero_grad()     # zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([ 0.0154, -0.0044,  0.0254, -0.0260, -0.0045,  0.0145])


### 更新权重

In [134]:
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [136]:
# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# in your training loop:
optimizer.zero_grad()   # zero the gradient buffers
output = net(x)
loss = criterion(output, target)
loss.backward()
optimizer.step()    # Does the update

## TRAINING A CLASSIFIER

Training a Classifier: https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html