- [PyTorch Tutorial for Deep Learning Researchers](https://github.com/yunjey/pytorch-tutorial/)

# pytorch

## 安装 
[移步官网选择相应连接](http://pytorch.org/)

## 基本数据结构-张量

In [3]:
from __future__ import print_function
import torch ## Tensor package

### 定义张量

In [13]:
x = torch.Tensor(5,3) # 不带初始化
x


-4.6141e+13  4.5786e-41 -4.6141e+13
 4.5786e-41  0.0000e+00  0.0000e+00
 8.4078e-45  0.0000e+00  0.0000e+00
 1.2612e-44  0.0000e+00  0.0000e+00
 1.6816e-44  0.0000e+00  0.0000e+00
[torch.FloatTensor of size 5x3]

In [14]:
x = torch.rand(5,3)#默认为[0,1) 区间的均匀分布
x


 0.4396  0.8173  0.4852
 0.5678  0.6112  0.4855
 0.0864  0.8335  0.1343
 0.2960  0.9020  0.2687
 0.3942  0.3616  0.4453
[torch.FloatTensor of size 5x3]

In [15]:
x.size()

torch.Size([5, 3])

In [16]:
y = torch.rand(5,3)

### 同形矩阵相加

In [17]:
x + y   # 拷贝操作


 1.0180  1.5455  0.5706
 0.9235  1.5471  1.1553
 0.5546  1.1777  0.7905
 0.3564  1.4118  0.8956
 0.4205  0.9716  1.2850
[torch.FloatTensor of size 5x3]

In [18]:
torch.add(x,y)  #拷贝操作


 1.0180  1.5455  0.5706
 0.9235  1.5471  1.1553
 0.5546  1.1777  0.7905
 0.3564  1.4118  0.8956
 0.4205  0.9716  1.2850
[torch.FloatTensor of size 5x3]

In [19]:
y.add_(x)   #就地操作    带下划线后缀的为就地操作
y 


 1.0180  1.5455  0.5706
 0.9235  1.5471  1.1553
 0.5546  1.1777  0.7905
 0.3564  1.4118  0.8956
 0.4205  0.9716  1.2850
[torch.FloatTensor of size 5x3]

### 输出

In [25]:
result = None
torch.add(x,y,out=result)
result

### Numpy-ndarray互转【！存储空间共享】

In [31]:
a = torch.ones(5)
a


 1
 1
 1
 1
 1
[torch.FloatTensor of size 5]

In [32]:
b = a.numpy()
b

array([ 1.,  1.,  1.,  1.,  1.], dtype=float32)

In [34]:
a.add_(1)   #张量操作 =【运算传播】
print(a,b)


 2
 2
 2
 2
 2
[torch.FloatTensor of size 5]
 [ 2.  2.  2.  2.  2.]


#### 回转

In [35]:
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a,1,out=a)       #制定输出实现就地操作
print(a,b)

[ 2.  2.  2.  2.  2.] 
 2
 2
 2
 2
 2
[torch.DoubleTensor of size 5]



### GPU与CPU运算互转

## 运算【自动求导包】【运行时定义框架】
- autograd.Variable 核心类 包装Tensor以支持运算以及梯度计算(.backward)
- ![](https://pic4.zhimg.com/v2-08e0530dfd6879ff2bee56cfc5cc5073_b.png)
- .data ->(raw Tensor)
- .grad 梯度
- .creator  -->Function
- .backward() 【标量无需指定grad_output参数】

In [37]:
from torch.autograd import Variable

In [38]:
x = Variable(torch.ones(2,2),requires_grad = True)
x

Variable containing:
 1  1
 1  1
[torch.FloatTensor of size 2x2]

In [43]:
y = x + 2
y

Variable containing:
 3  3
 3  3
[torch.FloatTensor of size 2x2]

In [46]:
z = y * y * 3
out = z.mean()

In [47]:
out.backward()

In [48]:
x.grad   # d(out)/dx

Variable containing:
 4.5000  4.5000
 4.5000  4.5000
[torch.FloatTensor of size 2x2]

In [52]:
x = torch.randn(3)
x = Variable(x, requires_grad = True)
y = x * 2
while y.data.norm() < 1000:
    y = y * 2
gradients = torch.FloatTensor([0.1, 1.0, 0.0001])
y.backward(gradients)
x.grad

Variable containing:
  51.2000
 512.0000
   0.0512
[torch.FloatTensor of size 3]

## 神经网路

In [63]:
import torch.nn as nn
import torch.nn.functional as F

In [64]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5) # 1 input image channel, 6 output channels, 5x5 square convolution kernel
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1   = nn.Linear(16*5*5, 120) # an affine operation: y = Wx + b
        self.fc2   = nn.Linear(120, 84)
        self.fc3   = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv2(x)), 2) # If the size is a square you can only specify a single number
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:] # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [65]:
net = Net()
net

Net (
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear (400 -> 120)
  (fc2): Linear (120 -> 84)
  (fc3): Linear (84 -> 10)
)

# Deep Learning for Natural Language Processing with Pytorch

In [4]:
import torch 
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x7f439c516fa8>

## 1. Introduction to Torch's tensor library

In [19]:
x = torch.randn(3,4,5)  # normal distribution
# torch.randn((3,4,5))
x


(0 ,.,.) = 
  0.7371  1.2528  0.8503 -0.4165 -0.7499
  1.0632  0.0073 -1.4252 -0.0781 -0.5138
  1.1375 -1.0246 -1.0300 -1.0129  0.0055
 -0.9347 -0.9882  1.3801 -0.1173  0.9317

(1 ,.,.) = 
  1.3267 -1.0173 -1.8575  0.9015  0.1495
 -0.0336 -0.6076 -1.0048 -0.2826 -0.2711
  1.3210  1.1608  0.3457 -0.1136 -0.8910
  0.2900 -2.1017 -1.1279 -0.8191  0.5334

(2 ,.,.) = 
  0.1381  1.6910  1.4114 -0.9804 -0.7578
 -0.3782  1.7211  0.0310 -0.4270 -0.3868
 -0.6089  1.1652 -0.1326 -0.0228  1.1848
 -1.0322 -0.7039  0.8813  1.4276 -0.9245
[torch.FloatTensor of size 3x4x5]

### concatenation

In [22]:
x_1 = torch.randn(2, 5)
y_1 = torch.randn(3, 5)
z_1 =torch.cat([x_1, y_1])  #default axis=0 cat by row
x_1,y_1,z_1

(
 -1.1152 -0.6667  1.0214 -0.1975 -0.8882
 -0.3583  1.8186  0.2141  0.2588 -0.7857
 [torch.FloatTensor of size 2x5], 
  0.1697  1.5807  0.4838 -1.2901 -0.8039
  0.6835  1.0926 -1.2625 -0.0191 -1.0565
 -0.9923 -0.3660  0.6203  0.7167  0.5366
 [torch.FloatTensor of size 3x5], 
 -1.1152 -0.6667  1.0214 -0.1975 -0.8882
 -0.3583  1.8186  0.2141  0.2588 -0.7857
  0.1697  1.5807  0.4838 -1.2901 -0.8039
  0.6835  1.0926 -1.2625 -0.0191 -1.0565
 -0.9923 -0.3660  0.6203  0.7167  0.5366
 [torch.FloatTensor of size 5x5])

In [23]:
x_2 = torch.randn(2, 3)
y_2 = torch.randn(2, 5)
z_2 = torch.cat([x_2, y_2], 1)  # cat by column
x_2,y_2,z_2

(
  0.2121  1.7014  1.6250
  2.1412  0.6155  1.3621
 [torch.FloatTensor of size 2x3], 
  1.0912  0.2104  1.3681 -0.0624 -0.2635
 -1.3562 -0.5342  0.5256  0.0416  0.7511
 [torch.FloatTensor of size 2x5], 
  0.2121  1.7014  1.6250  1.0912  0.2104  1.3681 -0.0624 -0.2635
  2.1412  0.6155  1.3621 -1.3562 -0.5342  0.5256  0.0416  0.7511
 [torch.FloatTensor of size 2x8])

### Reshaping Tensors

In [28]:
x = torch.randn(2,3,4)
print x
print x.view(2,12)
print x.view(2,-1) # auto inference


(0 ,.,.) = 
 -1.1253  0.9556 -0.3170  2.1666
 -0.7013 -0.6323 -0.2351  0.1094
 -0.0641 -1.4385 -1.7741 -0.6436

(1 ,.,.) = 
  0.5324 -0.6021  0.0531 -0.1751
 -0.1346 -1.0441 -0.3360  1.6257
 -0.8132  0.8363 -0.4685 -1.0825
[torch.FloatTensor of size 2x3x4]



Columns 0 to 9 
-1.1253  0.9556 -0.3170  2.1666 -0.7013 -0.6323 -0.2351  0.1094 -0.0641 -1.4385
 0.5324 -0.6021  0.0531 -0.1751 -0.1346 -1.0441 -0.3360  1.6257 -0.8132  0.8363

Columns 10 to 11 
-1.7741 -0.6436
-0.4685 -1.0825
[torch.FloatTensor of size 2x12]



Columns 0 to 9 
-1.1253  0.9556 -0.3170  2.1666 -0.7013 -0.6323 -0.2351  0.1094 -0.0641 -1.4385
 0.5324 -0.6021  0.0531 -0.1751 -0.1346 -1.0441 -0.3360  1.6257 -0.8132  0.8363

Columns 10 to 11 
-1.7741 -0.6436
-0.4685 -1.0825
[torch.FloatTensor of size 2x12]



## 2. Computation Graphs and Automatic Differentiation

In [39]:
x = autograd.Variable(torch.Tensor([1.,2.,3.]),requires_grad=True)
x.data

In [44]:
y = autograd.Variable(torch.Tensor([4.,5.,6.]),requires_grad=True)
y.data


 4
 5
 6
[torch.FloatTensor of size 3]

In [48]:
z = x + y

In [52]:
z.grad_fn

<torch.autograd.function.AddBackward at 0x7f435552cde0>

In [57]:
s = z.sum()
print s
s.grad_fn

Variable containing:
 21
[torch.FloatTensor of size 1]



<torch.autograd.function.SumBackward at 0x7f43550a3148>

In [58]:
s.backward()
print z.grad,'\n',y.grad,'\n',x.grad # 向量-标量 求导

None 
Variable containing:
 2
 2
 2
[torch.FloatTensor of size 3]
 
Variable containing:
 2
 2
 2
[torch.FloatTensor of size 3]



## 3. Deep Learning Building Blocks: Affine maps, non-linearities and objectives

### Affine Maps [Wx+b]

In [59]:
lin = nn.Linear(5,3)
data = autograd.Variable(torch.randn(2,5))  # x[2,5] W[5,3] b[3] Wx+b[2,3] 
lin(data)

Variable containing:
 0.2415 -1.6309 -0.2372
 0.5908 -0.8664 -0.5928
[torch.FloatTensor of size 2x3]

### Non-Linearities[tanh,sigmoid,relu]

In [60]:
data = autograd.Variable(torch.randn(3,5))
print data
print F.relu(data)

Variable containing:
 0.3061  0.7042 -1.8685 -0.2919  0.6717
 1.4367 -1.1453 -1.1061 -0.5888  0.2625
 0.5964  0.3147 -1.2956 -0.2395 -0.2033
[torch.FloatTensor of size 3x5]

Variable containing:
 0.3061  0.7042  0.0000  0.0000  0.6717
 1.4367  0.0000  0.0000  0.0000  0.2625
 0.5964  0.3147  0.0000  0.0000  0.0000
[torch.FloatTensor of size 3x5]



### Softmax and Probabilities

In [61]:
data = autograd.Variable( torch.randn(5) )
print data
print F.softmax(data)
print F.softmax(data).sum()  #exp_softmax
print F.log_softmax(data)    #log_softmax

Variable containing:
 0.1687
 0.0594
-0.2800
 0.1535
-0.4353
[torch.FloatTensor of size 5]

Variable containing:
 0.2459
 0.2205
 0.1570
 0.2422
 0.1344
[torch.FloatTensor of size 5]

Variable containing:
 1
[torch.FloatTensor of size 1]

Variable containing:
-1.4028
-1.5120
-1.8515
-1.4180
-2.0068
[torch.FloatTensor of size 5]



## 5.Logistic Regression Bag-of-Words classifier

In [None]:
data = [ ("me gusta comer en la cafeteria".split(), "SPANISH"),
         ("Give it to me".split(), "ENGLISH"),
         ("No creo que sea una buena idea".split(), "SPANISH"),
         ("No it is not a good idea to get lost at sea".split(), "ENGLISH") ]

test_data = [ ("Yo creo que si".split(), "SPANISH"),
              ("it is lost on me".split(), "ENGLISH")]
word_to_ix = {}                                            # 数字化 token list to index list
for sent, _ in data + test_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print word_to_ix

VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2


class BoWClassifier(nn.Module):
    
    def __init__(self, num_labels, vocab_size):
        super(BoWClassifier, self).__init__()
        self.linear = nn.Linear(vocab_size, num_labels)  #
        
    def forward(self, bow_vec):
        return F.log_softmax(self.linear(bow_vec))
    
def make_bow_vector(sentence, word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]] += 1
    return vec.view(1, -1)

def make_target(label, label_to_ix):
    return torch.LongTensor([label_to_ix[label]])

model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

for param in model.parameters():
    print param

sample = data[0]
bow_vector = make_bow_vector(sample[0], word_to_ix)
log_probs = model(autograd.Variable(bow_vector))
print log_probs

label_to_ix = { "SPANISH": 0, "ENGLISH": 1 }


# Run on test data before we train, just to see a before-and-after
for instance, label in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
    log_probs = model(bow_vec)
    print log_probs
print next(model.parameters())[:,word_to_ix["creo"]] # Print the matrix column corresponding to "creo"