In [115]:
from __future__ import print_function
import torch

In [116]:
x = torch.Tensor(5,3)
x

tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 1.0700e-34]])

In [117]:
x.size()

torch.Size([5, 3])

In [118]:
x = torch.rand(5,3)

In [119]:
x

tensor([[0.9310, 0.1852, 0.5039],
        [0.5701, 0.4723, 0.3937],
        [0.5122, 0.4529, 0.4210],
        [0.7478, 0.5877, 0.5297],
        [0.0749, 0.5573, 0.0579]])

In [120]:
y = torch.rand(5,3)
y

tensor([[0.4129, 0.0074, 0.5132],
        [0.8724, 0.5890, 0.4506],
        [0.2324, 0.6200, 0.0672],
        [0.8254, 0.6396, 0.6155],
        [0.5530, 0.4882, 0.4282]])

In [121]:
x + y

tensor([[1.3439, 0.1926, 1.0172],
        [1.4425, 1.0613, 0.8443],
        [0.7447, 1.0729, 0.4881],
        [1.5732, 1.2273, 1.1452],
        [0.6279, 1.0454, 0.4862]])

In [122]:
torch.add(x,y)

tensor([[1.3439, 0.1926, 1.0172],
        [1.4425, 1.0613, 0.8443],
        [0.7447, 1.0729, 0.4881],
        [1.5732, 1.2273, 1.1452],
        [0.6279, 1.0454, 0.4862]])

In [123]:
result = torch.Tensor(5,3)
torch.add(x,y,out=result)

tensor([[1.3439, 0.1926, 1.0172],
        [1.4425, 1.0613, 0.8443],
        [0.7447, 1.0729, 0.4881],
        [1.5732, 1.2273, 1.1452],
        [0.6279, 1.0454, 0.4862]])

## 任何改变tensor内容的操作都会在方法名后加一个下划线"_"

In [124]:
y.add_(x)

tensor([[1.3439, 0.1926, 1.0172],
        [1.4425, 1.0613, 0.8443],
        [0.7447, 1.0729, 0.4881],
        [1.5732, 1.2273, 1.1452],
        [0.6279, 1.0454, 0.4862]])

In [125]:
y

tensor([[1.3439, 0.1926, 1.0172],
        [1.4425, 1.0613, 0.8443],
        [0.7447, 1.0729, 0.4881],
        [1.5732, 1.2273, 1.1452],
        [0.6279, 1.0454, 0.4862]])

In [126]:
x[:,1]

tensor([0.1852, 0.4723, 0.4529, 0.5877, 0.5573])

In [127]:
x

tensor([[0.9310, 0.1852, 0.5039],
        [0.5701, 0.4723, 0.3937],
        [0.5122, 0.4529, 0.4210],
        [0.7478, 0.5877, 0.5297],
        [0.0749, 0.5573, 0.0579]])

## Torch的tensor 和 numpy 的array 共享他们存储空间，修改一个会导致另外一个也被修改

In [128]:
a = torch.ones(5)
b = a.numpy()

In [129]:
a

tensor([1., 1., 1., 1., 1.])

In [130]:
b

array([1., 1., 1., 1., 1.], dtype=float32)

In [131]:
a.add_(1)
a

tensor([2., 2., 2., 2., 2.])

In [132]:
b

array([2., 2., 2., 2., 2.], dtype=float32)

In [133]:
## 将numpy的Array转换成torch的tensor
import  numpy as np
a = np.ones(5)

In [134]:
b = torch.from_numpy(a)

In [135]:
np.add(a,1,out=a)

array([2., 2., 2., 2., 2.])

In [136]:
b

tensor([2., 2., 2., 2., 2.], dtype=torch.float64)

## 求导运算 requrires_grad = True 用来跟踪该变量相关的计算操作

In [137]:
x = torch.ones(2,2,requires_grad = True)
x

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [138]:
y = x +2
y

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)

In [139]:
## 每个tensor 都带有属性.grad_fn，该属性引用创建了这个变量的Fucntion
y.grad_fn

<AddBackward0 at 0x7fee007c6590>

In [140]:
z = y*y*3
out = z.mean()
z,out

(tensor([[27., 27.],
         [27., 27.]], grad_fn=<MulBackward0>),
 tensor(27., grad_fn=<MeanBackward0>))

In [141]:
## 来计算梯度  梯度计算需要调用方法.backward(),该变量是一个标量，即仅有一个元素，
## 那么不需要要传递任何参数给的方法 .backward(),当包含多个元素时候，那么就必须指定一个
## gradient 参数 来匹配尺寸的大小的tensor
out.backward()

In [142]:
x.grad

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])

In [143]:
# 一般来说 torch.autograd 就是用于计算雅克比向量乘积的工具
x = torch.randn(3,requires_grad=True)
y = x*2
while y.data.norm() < 1000:
    y = y*2
y

tensor([-830.6531, -753.8757,  242.3695], grad_fn=<MulBackward0>)

In [144]:
v = torch.tensor([0.1,1.0,0.000001],dtype=torch.float)
y.backward(v)
x.grad

tensor([5.1200e+01, 5.1200e+02, 5.1200e-04])

In [145]:
## 加入with torch.no_grad()就可以停止追踪变量历史进行自动梯度计算
with torch.no_grad():
    print((x**2).requires_grad)

False


## 神经网络

### 一个简单神经网络
.用神经对输入进行处理

.计算代价值

.将梯度传播回神经网络的参数中

.更新网络中的权重 weight = weight+ learning_rate*gradient

In [146]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,6,5) # 1 input image channel, 6 output channels, 5x5 square convolution kernel
        self.conv2 = nn.Conv2d(6,16,5)
        self.fc1 = nn.Linear(16*5*5,120) # an affine operation: y = Wx + b
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)
    def forward(self,x):
        x = F.max_pool2d(F.relu(self.conv1(x)),(2,2)) # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv2(x)),2) # If the size is a square you can only specify a single number
        x = x.view(-1,self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    def num_flat_features(self,x):
        size = x.size()[1:] ## all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
net = Net()
net

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [147]:
## 可以在forward 函数中使用tensor 中操作
params = list(net.parameters())
params

[Parameter containing:
 tensor([[[[ 1.8910e-01,  1.4928e-01,  6.2433e-02,  7.6362e-03, -1.3160e-01],
           [-1.6989e-01,  1.0392e-01,  9.8101e-03, -9.0725e-02, -3.8924e-02],
           [-1.1899e-01, -1.8024e-01, -1.6835e-01,  8.7790e-02, -1.2563e-01],
           [-1.6018e-01,  1.7934e-01, -6.3487e-02, -2.5271e-02,  1.0180e-04],
           [-1.5666e-02,  1.3138e-01,  1.4238e-01,  1.4650e-01, -1.5941e-01]]],
 
 
         [[[-1.8333e-01,  5.8387e-02,  7.4210e-02,  3.1307e-04,  3.2712e-02],
           [ 8.9550e-02, -1.5790e-01,  1.9824e-01, -1.6551e-02,  8.3199e-02],
           [-1.7080e-01, -1.5473e-01,  8.1232e-02,  9.8257e-02, -1.1718e-01],
           [ 1.3240e-01,  1.9457e-01, -1.0972e-01,  1.9739e-01,  3.7865e-02],
           [-3.3993e-02, -8.4689e-02,  1.7730e-01, -2.9686e-04, -3.8559e-02]]],
 
 
         [[[-9.9288e-02,  6.9548e-02, -1.5976e-01,  1.4247e-01, -1.7339e-01],
           [-1.3801e-01, -1.3348e-01,  3.1307e-02,  7.5065e-02,  1.8500e-01],
           [-1.3560e-01, -7.3

In [148]:
len(params)

10

In [149]:
params[0].size() #conv1's weight

torch.Size([6, 1, 5, 5])

In [150]:
input = Variable(torch.randn(1,1,32,32))
out = net.forward(input)
out

tensor([[-0.1009, -0.1753,  0.0722,  0.0634,  0.0077, -0.0767, -0.0371, -0.1386,
         -0.0131, -0.0438]], grad_fn=<AddmmBackward>)

In [151]:
net.zero_grad() ## 对所有的参数梯度缓冲去进行归零
out.backward(torch.randn(1,10)) # 使用随机的梯度进行反向传播

## torch.nn 只接受小批量数据
整个torch.nn包只接受那种小批量样本的数据，而非单个样本，例如nn.Conv2d
能够结构一个四维的TensornSamples * nChannels * Height * Width
如果你拿到是当个样本那么使用input.unsqueeze(0)来加一个维度就可以

In [152]:
## nn.MSELoss计算输入和目标之间的均方差
output = net.forward(input)
target = Variable(torch.arange(1,11))
criterion =  nn.MSELoss()
loss = criterion(output,target)
loss

tensor(38.9734, grad_fn=<MseLossBackward>)

In [153]:
target

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [154]:
## 现在当调用loss.backward() 之后来看看conv1's在进行反馈之后的偏置梯度如何
net.zero_grad()
net.conv1.bias.grad

tensor([0., 0., 0., 0., 0., 0.])

## 更新网络的权重
## 最简单方法是随机梯度下降法(SGD)
weight = weight - learning_rate* gradient

In [155]:
learning_rate  = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [156]:
## 当我们想用不同种类的更新权重方法 可以考虑torch.optim 这个功能
import torch.optim as optim
optimizer = optim.SGD(net.parameters(),lr=0.001)
# in your training looop
optimizer.zero_grad() ## zero the gradient buffers
output = net.forward(input)
loss = criterion(output,target)
loss.backward()
optimizer.step()

RuntimeError: expected dtype Float but got dtype Long (validate_dtype at ../aten/src/ATen/native/TensorIterator.cpp:143)
frame #0: c10::Error::Error(c10::SourceLocation, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) + 135 (0x1282fef47 in libc10.dylib)
frame #1: at::TensorIterator::compute_types() + 4335 (0x11b39730f in libtorch_cpu.dylib)
frame #2: at::TensorIterator::build() + 690 (0x11b39b6c2 in libtorch_cpu.dylib)
frame #3: at::native::mse_loss_backward_out(at::Tensor&, at::Tensor const&, at::Tensor const&, at::Tensor const&, long long) + 520 (0x11b1d1f08 in libtorch_cpu.dylib)
frame #4: at::CPUType::mse_loss_backward_out_grad_input(at::Tensor&, at::Tensor const&, at::Tensor const&, at::Tensor const&, long long) + 247 (0x11b64ad07 in libtorch_cpu.dylib)
frame #5: at::native::mse_loss_backward(at::Tensor const&, at::Tensor const&, at::Tensor const&, long long) + 193 (0x11b1d1c01 in libtorch_cpu.dylib)
frame #6: at::CPUType::mse_loss_backward(at::Tensor const&, at::Tensor const&, at::Tensor const&, long long) + 234 (0x11b64b05a in libtorch_cpu.dylib)
frame #7: c10::detail::wrap_kernel_functor_unboxed_<c10::detail::WrapRuntimeKernelFunctor_<at::Tensor (*)(at::Tensor const&, at::Tensor const&, at::Tensor const&, long long), at::Tensor, c10::guts::typelist::typelist<at::Tensor const&, at::Tensor const&, at::Tensor const&, long long> >, at::Tensor (at::Tensor const&, at::Tensor const&, at::Tensor const&, long long)>::call(c10::OperatorKernel*, at::Tensor const&, at::Tensor const&, at::Tensor const&, long long) + 27 (0x11b68557b in libtorch_cpu.dylib)
frame #8: torch::autograd::VariableType::mse_loss_backward(at::Tensor const&, at::Tensor const&, at::Tensor const&, long long) + 2608 (0x11d460810 in libtorch_cpu.dylib)
frame #9: c10::detail::wrap_kernel_functor_unboxed_<c10::detail::WrapRuntimeKernelFunctor_<at::Tensor (*)(at::Tensor const&, at::Tensor const&, at::Tensor const&, long long), at::Tensor, c10::guts::typelist::typelist<at::Tensor const&, at::Tensor const&, at::Tensor const&, long long> >, at::Tensor (at::Tensor const&, at::Tensor const&, at::Tensor const&, long long)>::call(c10::OperatorKernel*, at::Tensor const&, at::Tensor const&, at::Tensor const&, long long) + 27 (0x11b68557b in libtorch_cpu.dylib)
frame #10: at::Tensor c10::OperatorHandle::callUnboxed<at::Tensor, at::Tensor const&, at::Tensor const&, at::Tensor const&, long long>(at::Tensor const&, at::Tensor const&, at::Tensor const&, long long) const + 294 (0x11c5826c6 in libtorch_cpu.dylib)
frame #11: torch::autograd::generated::MseLossBackward::apply(std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor> >&&) + 311 (0x11d1b4c77 in libtorch_cpu.dylib)
frame #12: torch::autograd::Node::operator()(std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor> >&&) + 658 (0x11d97c742 in libtorch_cpu.dylib)
frame #13: torch::autograd::Engine::evaluate_function(std::__1::shared_ptr<torch::autograd::GraphTask>&, torch::autograd::Node*, torch::autograd::InputBuffer&) + 1408 (0x11d972f10 in libtorch_cpu.dylib)
frame #14: torch::autograd::Engine::thread_main(std::__1::shared_ptr<torch::autograd::GraphTask> const&, bool) + 497 (0x11d972121 in libtorch_cpu.dylib)
frame #15: torch::autograd::Engine::thread_init(int) + 152 (0x11d971eb8 in libtorch_cpu.dylib)
frame #16: torch::autograd::python::PythonEngine::thread_init(int) + 52 (0x11a4a6dc4 in libtorch_python.dylib)
frame #17: void* std::__1::__thread_proxy<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, void (torch::autograd::Engine::*)(int), torch::autograd::Engine*, int> >(void*) + 66 (0x11d9814a2 in libtorch_cpu.dylib)
frame #18: _pthread_start + 148 (0x7fff72612109 in libsystem_pthread.dylib)
frame #19: thread_start + 15 (0x7fff7260db8b in libsystem_pthread.dylib)
