In [8]:
import torch
from __future__ import print_function

# 基础

1、张量、张量对应的操作

2、张量和numpy相互转换

In [26]:
x = torch.empty(5, 3)

# Autograd 自动分化

## requires_grad 跟踪某个张量的所有操作

In [37]:
x = torch.ones(2, 2, requires_grad=True)

In [40]:
y = x + 2
y

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)

In [41]:
y.grad_fn

<AddBackward0 at 0x11a428b50>

In [42]:
z = y * y * 3
out = z.mean()

In [45]:
print(z, out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward1>)


In [53]:
out.requires_grad_(True)

tensor(27., grad_fn=<MeanBackward1>)

## Grad

In [55]:
out.backward()

In [56]:
x.grad

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])

梯度的解释：

1、一维 => 一阶倒数

2、多维 => 雅克比矩阵

In [57]:
x = torch.randn(3, requires_grad=True)

In [59]:
y = x * 2

In [64]:
while y.data.norm() < 1000:
    y = y * 2

此时y为矢量

In [69]:
v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)

In [71]:
y.backward(v)

In [72]:
x.grad

tensor([5.1200e+01, 5.1200e+02, 5.1200e-02])

# 神经网络

1、定义【可学习参数】 

2、迭代输入数据集

3、网络处理输入

4、计算损失

5、将Grad传播回网络参数

6、通过简单的更新规则，更新权重

In [76]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [1]:
from simple_cnn import *

In [2]:
net = Net()

In [3]:
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [4]:
params = list(net.parameters())
print(len(params))
print(params[0].size())

10
torch.Size([6, 1, 5, 5])


In [11]:
input = torch.randn(1, 1, 32, 32)

In [13]:
out = net(input)

In [14]:
print(out)

tensor([[ 0.1106,  0.1117,  0.0090, -0.0404,  0.0966, -0.0638, -0.0526,  0.0233,
          0.0194,  0.0564]], grad_fn=<AddmmBackward>)


In [15]:
net.zero_grad()
out.backward(torch.randn(1,10))

## 计算损失

In [16]:
output = net(input)
target = torch.randn(10)

In [18]:
target = target.view(1, -1)

In [20]:
criterion = nn.MSELoss()

In [21]:
loss = criterion(output, target)

In [34]:
print(loss.grad_fn)

<MseLossBackward object at 0x11d461f50>


In [35]:
print(loss.grad_fn.next_functions[0][0])

<AddmmBackward object at 0x109c8f710>


## 后向传播，获取梯度

In [36]:
net.zero_grad()

In [37]:
print(net.conv1.bias.grad)

tensor([0., 0., 0., 0., 0., 0.])


In [38]:
loss.backward()

In [40]:
print(net.conv1.bias.grad)

tensor([-0.0092,  0.0011, -0.0011,  0.0051, -0.0040, -0.0018])


## 更新权重

w = w - learning_rate * gradient

In [45]:
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [53]:
import torch.optim as optim

# create optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

for i in range(100):
    # traning loop
    optimizer.zero_grad()
    output = net(input)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()

# 训练分类器