In [1]:
from __future__ import print_function
import torch
import torchvision

## 张量

In [5]:
# 未初始化
x = torch.empty(5, 3)
print(x)

# 随机初始化
x = torch.rand(5, 3)
print(x)

# 0初始化
x = torch.zeros(5, 3, dtype=torch.long)
print(x)

# 给定数据创建张量
x = torch.tensor([[1,1],[2,2]])
print(x)

# 从已有的张量创建张量
x = x.new_ones(5, 3, dtype=torch.double)
print(x)
x = torch.randn_like(x, dtype=torch.float)
print(x)

print(x.size())

tensor([[1.1210e-44, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00]])
tensor([[0.6136, 0.1158, 0.6799],
        [0.9316, 0.6019, 0.2764],
        [0.6095, 0.8856, 0.6233],
        [0.9068, 0.7389, 0.4268],
        [0.8680, 0.0348, 0.0028]])
tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])
tensor([[1, 1],
        [2, 2]])
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)
tensor([[-1.1777,  0.8893, -0.4335],
        [-1.3371, -1.4099,  0.7527],
        [-0.8562,  1.7667,  1.0009],
        [ 1.6526,  1.0280,  0.4585],
        [-0.5309, -0.9224, -0.7697]])
torch.Size([5, 3])


## 基本运算

In [7]:
y = torch.rand(5, 3)
print(x+y)
print(torch.add(x, y))

result = torch.empty(5, 3)
torch.add(x, y, out=result)
print(result)

# 后缀_的运算，基本上都表示对调用方直接进行更改
y.add_(x)
print(y)

tensor([[-0.2641,  0.9867, -0.2788],
        [-0.4667, -0.6177,  1.0333],
        [-0.2591,  2.0931,  1.5914],
        [ 2.1619,  1.1920,  0.9952],
        [ 0.0812, -0.2362, -0.3309]])
tensor([[-0.2641,  0.9867, -0.2788],
        [-0.4667, -0.6177,  1.0333],
        [-0.2591,  2.0931,  1.5914],
        [ 2.1619,  1.1920,  0.9952],
        [ 0.0812, -0.2362, -0.3309]])
tensor([[-0.2641,  0.9867, -0.2788],
        [-0.4667, -0.6177,  1.0333],
        [-0.2591,  2.0931,  1.5914],
        [ 2.1619,  1.1920,  0.9952],
        [ 0.0812, -0.2362, -0.3309]])
tensor([[-0.2641,  0.9867, -0.2788],
        [-0.4667, -0.6177,  1.0333],
        [-0.2591,  2.0931,  1.5914],
        [ 2.1619,  1.1920,  0.9952],
        [ 0.0812, -0.2362, -0.3309]])


In [9]:
# 和np类似，表示x索引为1的一列
print(x[:, 1])

# view用于改变形状，-1表示推导出的默认值
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)
print(x.size(), y.size(), z.size())

tensor([ 0.8893, -1.4099,  1.7667,  1.0280, -0.9224])
torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [10]:
x = torch.randn(1)
val = x.item()
print(val)

1.4854084253311157


## Numpy 转换

In [13]:
a = torch.ones(5)
print(a)

b = a.numpy()
print(b)

a.add_(1)
print(a)
print(b)

tensor([1., 1., 1., 1., 1.])
[1. 1. 1. 1. 1.]
tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [14]:
import numpy as np

a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


In [15]:
# 当有CUDA时
if torch.cuda.is_available():
    device = torch.device("cuda")
    # 直接在gpu建立张量y
    y = torch.ones_like(x, device=device)
    # 或者调用to()方法，改变存储位置
    x = x.to(device)
    z = x + y
    print(z)
    print(z.to("cpu", torch.double))

## autograd

In [11]:
x = torch.ones(3, 3, requires_grad=True)
y = torch.rand(3, 3, requires_grad=True)
z = torch.sum(x+y)
print(z)

tensor(14.5018, grad_fn=<SumBackward0>)


In [12]:
z.backward()

print(x.grad)
print(y.grad)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])


In [13]:
x = torch.ones(3, 3, requires_grad=True)
y = torch.rand(3, 3, requires_grad=True)

z = x**2 + y**3
print(z)

tensor([[1.0004, 1.4347, 1.2560],
        [1.2186, 1.4952, 1.9323],
        [1.9170, 1.1717, 1.7803]], grad_fn=<AddBackward0>)


In [15]:
print(x)
print(y)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], requires_grad=True)
tensor([[0.0747, 0.7575, 0.6350],
        [0.6024, 0.7911, 0.9769],
        [0.9715, 0.5558, 0.9206]], requires_grad=True)


In [14]:
# backword传入参数？
z.backward(torch.ones_like(x))
print(x.grad)
print(y.grad)

tensor([[2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.]])
tensor([[0.0167, 1.7216, 1.2096],
        [1.0886, 1.8777, 2.8630],
        [2.8316, 0.9267, 2.5427]])


In [16]:
dir(z)

['T',
 '__abs__',
 '__add__',
 '__and__',
 '__array__',
 '__array_priority__',
 '__array_wrap__',
 '__bool__',
 '__class__',
 '__contains__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__div__',
 '__doc__',
 '__eq__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__idiv__',
 '__ilshift__',
 '__imul__',
 '__index__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__irshift__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__long__',
 '__lshift__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pow__',
 '__radd__',
 '__rdiv__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__rfloordiv__',
 '__rmul__',
 '__rpow__',
 '__rshift__',
 '__rsub__',
 '__rtruediv__',
 '__setattr__',
 '__se

In [17]:
print(x.is_leaf)   # 手动创建的创建变量
print(z.is_leaf)   # 计算得到的结果变量

True
False


In [19]:
dir(z.grad_fn)

['__call__',
 '__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '_register_hook_dict',
 'metadata',
 'name',
 'next_functions',
 'register_hook',
 'requires_grad']

In [20]:
z.grad_fn.next_functions

((<PowBackward0 at 0x1192b4a20>, 0), (<PowBackward0 at 0x1192b4ba8>, 0))

In [21]:
xg = z.grad_fn.next_functions[0][0]
dir(xg)

['__call__',
 '__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '_register_hook_dict',
 'metadata',
 'name',
 'next_functions',
 'register_hook',
 'requires_grad']

In [22]:
xg.next_functions

((<AccumulateGrad at 0x119541278>, 0),)

In [26]:
x_leaf = xg.next_functions[0][0]
print(x_leaf.variable)
print('id of x_leaf: '+str(id(x_leaf.variable)))
print('id of x: '+str(id(x)))

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], requires_grad=True)
id of x_leaf: 4700117680
id of x: 4700117680


In [27]:
from torch.autograd.function import Function

class multiConst(Function):
    @staticmethod
    def forward(ctx, tensor, constant):
        ctx.constant = constant
        return tensor*constant
    @staticmethod
    def backward(ctx, grad_output):
        return grad_output, None

In [28]:
a = torch.ones(3, 3, requires_grad=True)
b = multiConst.apply(a, 5)
print(b)

b.backward(torch.ones_like(a))
print(a.grad)

tensor([[5., 5., 5.],
        [5., 5., 5.],
        [5., 5., 5.]], grad_fn=<multiConstBackward>)
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])


In [32]:
x = torch.ones(3, 3)
print(x.mean())
print(torch.mean(x, 1, True))
print(torch.mean(x, 1, False))

tensor(1.)
tensor([[1.],
        [1.],
        [1.]])
tensor([1., 1., 1.])


In [33]:
x = torch.ones(2, 2, requires_grad=True)
print(x)

y = x+2
print(y)
print(y.grad_fn)

z = y * y * 3
out = z.mean()
print(z, out)

print(x.grad_fn)
print(y.grad_fn)
print(z.grad_fn)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x119435630>
tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)
None
<AddBackward0 object at 0x119435630>
<MulBackward0 object at 0x1198ee128>


In [34]:
out.backward()

print(x.grad)

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


In [42]:
x = torch.randn(3, requires_grad=True)
y = x*2
print(y)
# L2 norm of y
print(y.data.norm())
while y.data.norm() < 1000:
    y = y*2

print(y.data.norm())
print(y)

tensor([-0.0400, -0.5336, -0.7244], grad_fn=<MulBackward0>)
tensor(0.9006)
tensor(1844.3717)
tensor([  -81.8700, -1092.7855, -1483.5175], grad_fn=<MulBackward0>)


In [43]:
# v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
y.backward(torch.ones_like(x))

print(x.grad)

tensor([4096., 4096., 4096.])


In [39]:
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

y = x.detach()
print(y.requires_grad)

True
True
False
False


## Neural Network

In [44]:
import torch.nn as nn
import torch.nn.functional as F

In [45]:
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        # Conv2d: input_channels, filters(output_channels), kernel_size
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        # Linear: input_dims, output_dims
        self.fc1 = nn.Linear(16*6*6, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    def num_flat_features(self, x):
        size = x.size()[1:]  # 除了batch以外的维度
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    
    def forward(self, x):
        # CONV1 -> RELU -> MAX_POOLING(2x2)
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        # FLATTEN
        x = x.view(-1, self.num_flat_features(x))
        # FC
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [46]:
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [51]:
params = list(net.parameters())
print(len(params))
print(params[0].size())   # conv1's weights 
print(params[1].size())   # conv1's bias

10
torch.Size([6, 1, 3, 3])
torch.Size([6])


In [52]:
inp = torch.randn(1, 1, 32, 32)
out = net(inp)
print(out)

tensor([[ 0.0674, -0.0449,  0.0888, -0.0770, -0.0797, -0.0442, -0.1092,  0.0879,
         -0.0070,  0.0452]], grad_fn=<AddmmBackward>)


In [53]:
# clear grad buffers
net.zero_grad()

out.backward(torch.randn(1, 10))

计算损失函数

In [54]:
input = torch.randn(1,1,32,32)
output = net(input)
target = torch.randn(10)
target = target.view(1, -1)
metric = nn.MSELoss()

loss = metric(output, target)
print(loss)

tensor(0.7974, grad_fn=<MseLossBackward>)


In [59]:
print(loss.grad_fn)
print(loss.grad_fn.next_functions)
print(loss.grad_fn.next_functions[0][0].next_functions[1][0])

<MseLossBackward object at 0x128556e10>
((<AddmmBackward object at 0x12841b588>, 0),)
<ReluBackward0 object at 0x128469a20>


BP参数更新

In [60]:
net.zero_grad()

print('conv1.bias.bp before bp:')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.bp after bp:')
print(net.conv1.bias.grad)

conv1.bias.bp before bp:
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.bp after bp:
tensor([-0.0058, -0.0046,  0.0042,  0.0011, -0.0093, -0.0064])


In [61]:
print(net.conv1.bias)

learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

print(net.conv1.bias)

Parameter containing:
tensor([ 0.2666, -0.0945, -0.0147,  0.0309,  0.0408,  0.1198],
       requires_grad=True)
Parameter containing:
tensor([ 0.2667, -0.0944, -0.0147,  0.0309,  0.0409,  0.1199],
       requires_grad=True)


使用优化器

In [62]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.01)

optimizer.zero_grad()

output = net(input)
loss = metric(output, target)
loss.backward()

optimizer.step()