# torch-tensor

In [1]:
import torch
import numpy as np

In [2]:
x = torch.Tensor(5,4)

In [3]:
x

tensor([[6.6741e+22, 4.6304e+27, 7.2151e+22, 1.7836e+31],
        [1.3563e-19, 1.3563e-19, 6.1083e-04, 4.6162e+24],
        [6.9840e+31, 1.2694e+36, 7.2443e+22, 1.1022e+24],
        [7.2734e+22, 4.7429e+30, 1.0555e+18, 4.7294e+22],
        [1.7749e+28, 7.2065e+31, 6.8524e+16, 4.8409e+25]])

In [4]:
y = torch.rand(5,4)

In [5]:
y

tensor([[0.7647, 0.6399, 0.0046, 0.0609],
        [0.1106, 0.7692, 0.1075, 0.9520],
        [0.5310, 0.5174, 0.8417, 0.6709],
        [0.4656, 0.8683, 0.4163, 0.7799],
        [0.3758, 0.9058, 0.2126, 0.3041]])

In [6]:
z = torch.rand(5,4)

# 两种加法 add与add_，带_会改变原来的变量

In [7]:
y.add(z)

tensor([[1.2949, 1.1085, 0.9185, 0.4804],
        [1.1030, 1.4496, 0.9173, 1.8425],
        [0.9404, 1.3971, 1.2985, 0.8877],
        [1.0070, 1.8347, 1.1618, 1.1811],
        [1.2959, 1.4801, 0.6838, 1.2320]])

In [8]:
y

tensor([[0.7647, 0.6399, 0.0046, 0.0609],
        [0.1106, 0.7692, 0.1075, 0.9520],
        [0.5310, 0.5174, 0.8417, 0.6709],
        [0.4656, 0.8683, 0.4163, 0.7799],
        [0.3758, 0.9058, 0.2126, 0.3041]])

In [9]:
y.add_(z)

tensor([[1.2949, 1.1085, 0.9185, 0.4804],
        [1.1030, 1.4496, 0.9173, 1.8425],
        [0.9404, 1.3971, 1.2985, 0.8877],
        [1.0070, 1.8347, 1.1618, 1.1811],
        [1.2959, 1.4801, 0.6838, 1.2320]])

In [10]:
y

tensor([[1.2949, 1.1085, 0.9185, 0.4804],
        [1.1030, 1.4496, 0.9173, 1.8425],
        [0.9404, 1.3971, 1.2985, 0.8877],
        [1.0070, 1.8347, 1.1618, 1.1811],
        [1.2959, 1.4801, 0.6838, 1.2320]])

# numpy与torch之间的互相转换

In [11]:
a = np.ones(5)
a

array([1., 1., 1., 1., 1.])

In [12]:
b = torch.from_numpy(a)
b

tensor([1., 1., 1., 1., 1.], dtype=torch.float64)

In [13]:
c = b.numpy()
c

array([1., 1., 1., 1., 1.])

## Tensor与numpy共享内存，一个改变，另一个也会改变

In [14]:
b.add_(1)
b

tensor([2., 2., 2., 2., 2.], dtype=torch.float64)

In [15]:
a

array([2., 2., 2., 2., 2.])

## Tensor可以通过 .cuda 转为GPU的Tensor

In [16]:
if torch.cuda.is_available():
    x = x.cuda()
    y = y.cuda()
    x+y
    print("gpu is valiable!")

# 自动微分

autograd模块实现了自动微分功能，Variable是其中的核心，Variable封装了Tensor，其主要包含三个属性：

data：保存Variable所包含的Tensor

grad：保存data的梯度，是一个累加量，Variable类型

grad_fn：指向一个function对象，用来反向传播梯度

In [17]:
from torch.autograd import Variable

In [18]:
x = Variable(torch.ones(2,2), requires_grad = True)
x

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [19]:
y = x.sum()
y

tensor(4., grad_fn=<SumBackward0>)

In [20]:
y.grad_fn

<SumBackward0 at 0x7fca443d5940>

## 进行反向传播

In [21]:
y.backward()

由于是sum，梯度为1

In [22]:
x.grad

tensor([[1., 1.],
        [1., 1.]])

### 反向传播的梯度是累加的

In [23]:
y.backward()

In [24]:
x.grad

tensor([[2., 2.],
        [2., 2.]])

In [25]:
y.backward()

In [26]:
x.grad

tensor([[3., 3.],
        [3., 3.]])

In [27]:
x.grad.data.zero_() #将梯度置0

tensor([[0., 0.],
        [0., 0.]])

In [28]:
y.backward()

In [29]:
x.grad

tensor([[1., 1.],
        [1., 1.]])

### Variable与Tensor之间的关系

In [30]:
x = torch.rand(2,3)

In [31]:
y = Variable(x, requires_grad = True)

In [32]:
x == y.data

tensor([[True, True, True],
        [True, True, True]])

# 神经网络

torch.nn是为神经网络设计的模块化接口，建立在Autograd之上。nn.Module是nn最重要的类，可看作网络的封装，包含网络各层定义和forward方法，调用forward方(input)方法，可以实现前向传播。

## 定义网络

定义网络时，需要继承nn.Module，并实现它的forward方法，把网络中的可学习参数放到构造函数__init__中。若某一层（如Relu）不具有可学习参数，则放不放都可以。

In [33]:
import torch.nn as nn
import torch.nn.functional as F

In [34]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5) #分别代表in_channel,out_channel,kernel_size
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16*5*5, 120) #full connection layer
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    def forward(self, x): #注意forward有输入，在nn.Module的子类中定义了forward类后，backward函数就会利用Autograd自动实现
        #print("init x size:", x.size())
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        #print("after 1 conv:", x.size())
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        #print("after 2 convs:",x.size())
        x = x.view(x.size()[0], -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()
print(net) #输出的为__init__中的内容
        

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


网络的可学习参数通过nn.parameters()返回，nn.named_parameters()可同时返回可学习参数及名称

In [35]:
net.parameters()

<generator object Module.parameters at 0x7fca443c8150>

In [36]:
for i in net.parameters():
    print(i) #默认被初始化

Parameter containing:
tensor([[[[-0.0220,  0.0009, -0.1558, -0.1124, -0.0535],
          [-0.0823, -0.0141, -0.0129, -0.0793, -0.0659],
          [ 0.0085,  0.1019, -0.0991,  0.0689,  0.0124],
          [-0.1334,  0.0619,  0.1996,  0.1182, -0.0153],
          [ 0.0085, -0.1369, -0.1661,  0.0244,  0.0414]]],


        [[[-0.1188,  0.1376, -0.1233, -0.0572,  0.0400],
          [-0.1948,  0.1635,  0.0753, -0.0559, -0.0668],
          [ 0.0632, -0.1845, -0.1019, -0.0094,  0.0455],
          [ 0.1072,  0.0713, -0.0613, -0.0196,  0.1300],
          [-0.0907, -0.0052,  0.0843, -0.1475, -0.0078]]],


        [[[ 0.0845,  0.0212, -0.1848, -0.1424, -0.1535],
          [ 0.0519, -0.0954, -0.1276,  0.1531,  0.0762],
          [ 0.0376, -0.1507,  0.1321,  0.0859,  0.0204],
          [ 0.0223, -0.0475, -0.0053, -0.0432, -0.0968],
          [-0.1269, -0.1678, -0.1642, -0.0275,  0.0557]]],


        [[[ 0.0027, -0.1525, -0.1283, -0.1185, -0.1477],
          [-0.1468,  0.0683, -0.0982,  0.1251,  0.1538

In [37]:
for name, parameter in net.named_parameters():
    print(name,":",parameter.size())

conv1.weight : torch.Size([6, 1, 5, 5])
conv1.bias : torch.Size([6])
conv2.weight : torch.Size([16, 6, 5, 5])
conv2.bias : torch.Size([16])
fc1.weight : torch.Size([120, 400])
fc1.bias : torch.Size([120])
fc2.weight : torch.Size([84, 120])
fc2.bias : torch.Size([84])
fc3.weight : torch.Size([10, 84])
fc3.bias : torch.Size([10])


forward 函数的输入和输出都是Variabel，只有Variable才具有自动求导功能，所以在输入时，需要把Tensor封装为Variable

In [38]:
inputs = Variable(torch.randn(1,1,32,32))
out = net(inputs)
out.size()
out

tensor([[ 0.0413,  0.0177,  0.0695, -0.0254,  0.1251,  0.0619,  0.1362, -0.1597,
          0.0007,  0.0918]], grad_fn=<AddmmBackward>)

进行反向传播，并将所有梯度清零

在进行backward时，若out为标量时，不需要指定grad_tensors，而若out为tensor时，则需要制定，且grad_tensors的shape必须与out的相同，并将其设置为1，计算时为：grad_tensord*grad

In [39]:
net.zero_grad()
out.backward(torch.ones(1,10))

注：torch.nn只支持mini_bitches，每次输入必须为一个batch，若想只输入一个样本，则需要使用inputs.unsqueeze(0)，将batch_size设置为1。例如nn.Conv2d的输入必须为4维，且必须为batch_size*c*h*w

## 损失函数

nn实现了神经网络中绝大数的损失函数，例如，nn.MSELoss用来计算均方差误差，nn.CrossEntropyLoss用来计算交叉熵损失

torch.range(0, 10) ----> tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.])
torch.arange(0, 10) ---->tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [40]:
out = net(inputs)
target = Variable(torch.arange(0, 10.0))
criterion = nn.MSELoss()
loss = criterion(out, target)
print(loss)

tensor(28.2236, grad_fn=<MseLossBackward>)


  return F.mse_loss(input, target, reduction=self.reduction)


In [41]:
# 运行backward，并观察参数梯度的变化情况
net.zero_grad() #学习前将梯度清零
print("反向传播前的梯度：", net.conv1.bias.grad)
loss.backward()
print("反向传播后的梯度：", net.conv1.bias.grad)

反向传播前的梯度： tensor([0., 0., 0., 0., 0., 0.])
反向传播后的梯度： tensor([-0.0138,  0.1222,  0.0078,  0.0218,  0.0409,  0.0432])


## 优化器
反向传播后会得到所有参数的梯度，还需要使用优化方法来更新网络参数。torch.optim中集成了绝大多数的优化方法，RMSProp,Adam和SGD等。

In [42]:
import torch.optim as optim

#指定优化器，并指定要学习的参数和学习率
optimizer = optim.SGD(net.parameters(), lr = 0.01)

#训练过程中需要先将梯度清零
optimizer.zero_grad() #与net.zero_grad()相同效果

output = net(inputs)
loss = criterion(output, target)

loss.backward()

#更新参数
optimizer.step()

## 图像加载和与处理

pytorch中提供了常用的数据加载和处理功能。如Imagenet,CIFA10,MNIST等，以及常用的数据转换操作。