In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import numpy as np
from collections import OrderedDict

torch.manual_seed(1)

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Reference 

* http://pytorch.org/docs/master/
* https://github.com/rguthrie3/DeepLearningForNLPInPytorch
* https://github.com/yunjey/pytorch-tutorial
* https://github.com/hunkim/PyTorchZeroToAll

# 1. What is a Pytorch? 

텐서와 옵티마이저, 뉴럴넷 등 GPU 연산에 최적화된 모듈을 이용하여 빠르게 <strong>딥러닝 모델을 구현할 수 있는 프레임워크</strong> <br>
Facebook이 밀고 있던 lua 기반의 torch를 python 버전으로 포팅함

# 2. What is difference between Pytorch and Tensorflow? 

* https://medium.com/towards-data-science/pytorch-vs-tensorflow-spotting-the-difference-25c75777377b
* http://cs231n.stanford.edu/slides/2017/cs231n_2017_lecture8.pdf
* https://devblogs.nvidia.com/parallelforall/recursive-neural-networks-pytorch/?utm_campaign=Revue%20newsletter&utm_medium=Newsletter&utm_source=revue

# 3. Why Pytorch? 

* Pythonic 
* Easy debugging
* Intuitive => Easy to understand

# 1> Pytorch Basic 

* Tensor
* Variable
* Module
* Parameter

파이토치의 가장 기본 단위는 Tensor이다. 디폴트 Tensor는 FloatTensor이고, LongTensor, ByteTensor, 등 여러 종류가 있다. 

# 1. Create Tensor

### 파이썬 리스트로부터 생성 

In [9]:
V_data = [1., 2., 3.] # 벡터
V = torch.Tensor(V_data) 
print(V)

M_data = [[1., 2., 3.], [4., 5., 6]] # 매트릭스
M = torch.Tensor(M_data)
print(M)

T_data = [[[1.,2.], [3.,4.]], # 3차원 텐서
          [[5.,6.], [7.,8.]]]
T = torch.Tensor(T_data)
print(T)


 1
 2
 3
[torch.FloatTensor of size 3]


 1  2  3
 4  5  6
[torch.FloatTensor of size 2x3]


(0 ,.,.) = 
  1  2
  3  4

(1 ,.,.) = 
  5  6
  7  8
[torch.FloatTensor of size 2x2x2]



In [13]:
T.tolist() # 반대로 파이토치 텐서 -> 파이썬 리스트로 꺼내오기

[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]]

### numpy ndarray 객체로부터 생성

In [5]:
V_data = np.array([1.,2.,3.])# 벡터
V = torch.Tensor(V_data) # 파이썬의 리스트를 바로 랩핑할 수 있다.
print(V)

M_data = np.array([[1., 2., 3.], [4., 5., 6]]) # 매트릭스
M = torch.Tensor(M_data)
print(M)

T_data = np.array([[[1.,2.], [3.,4.]], # 3차원 텐서
          [[5.,6.], [7.,8.]]])
T = torch.Tensor(T_data)
print(T)


 1
 2
 3
[torch.FloatTensor of size 3]


 1  2  3
 4  5  6
[torch.FloatTensor of size 2x3]


(0 ,.,.) = 
  1  2
  3  4

(1 ,.,.) = 
  5  6
  7  8
[torch.FloatTensor of size 2x2x2]



In [8]:
T.numpy() # 반대로 파이토치 텐서 -> 넘파이로 꺼내오기

array([[[ 1.,  2.],
        [ 3.,  4.]],

       [[ 5.,  6.],
        [ 7.,  8.]]], dtype=float32)

### 기타 다른 생성 방법들 

In [21]:
x = torch.zeros(2,3)
print(x)

x = torch.ones(2,3)
print(x)

x = torch.rand(3,4)
print(x)

x = torch.randn(3, 4) # 표준정규분포에서 샘플링
print(x) # FloatTensor가 디폴트

x = torch.randperm(5) # permutation of integers from 0 to n - 1
print(x) # LongTensor


 0  0  0
 0  0  0
[torch.FloatTensor of size 2x3]


 1  1  1
 1  1  1
[torch.FloatTensor of size 2x3]


 0.1769  0.8206  0.6776  0.6237
 0.4009  0.3440  0.5848  0.8869
 0.5716  0.2636  0.9967  0.5107
[torch.FloatTensor of size 3x4]


 0.6964  1.1296  0.2214 -0.0558
 1.2057  1.9486 -0.0766 -0.8562
-0.7870 -0.8161  0.5470 -1.1707
[torch.FloatTensor of size 3x4]


 3
 4
 1
 0
 2
[torch.LongTensor of size 5]



# 2. Indexing, Slicing, Joining, Mutating Ops

http://pytorch.org/docs/0.3.0/torch.html?#indexing-slicing-joining-mutating-ops

In [22]:
x = torch.randn(3, 4, 5)

In [23]:
x[0] # 직관적(파이써닉한) 인덱싱 가능


-0.1127  1.5980 -0.8445 -1.0489  0.9387
 0.5378  1.5372 -0.6943  0.2174 -0.2995
-0.3749  1.8673  0.9042  0.1181  1.8941
-0.4229  0.7431  0.0756  1.1366 -1.9280
[torch.FloatTensor of size 4x5]

In [24]:
x[0][0]


-0.1127
 1.5980
-0.8445
-1.0489
 0.9387
[torch.FloatTensor of size 5]

In [25]:
x[0][0][0]

-0.11267814040184021

### torch.select_index

In [5]:
x = torch.randn(3, 4)
print(x)

indices = torch.LongTensor([0, 2])
print(indices) # 선택할 index (LongTensor)

print(torch.index_select(x, 0, indices)) # row 기준 select index

print(torch.index_select(x, 1, indices)) # column 기준 select index


 0.3255 -0.4791  1.3790  2.5286
 0.4107 -0.9880 -0.9081  0.5423
 0.1103 -2.2590  0.6067 -0.1383
[torch.FloatTensor of size 3x4]


 0
 2
[torch.LongTensor of size 2]


 0.3255 -0.4791  1.3790  2.5286
 0.1103 -2.2590  0.6067 -0.1383
[torch.FloatTensor of size 2x4]


 0.3255  1.3790
 0.4107 -0.9081
 0.1103  0.6067
[torch.FloatTensor of size 3x2]



### torch.masked_select

In [6]:
x = torch.randn(3, 4)
print(x)

mask = x.ge(0.5) # x 텐서에서 0.5보다 크거나 같은값 마스킹 (ByteTensor)
print(mask) 

print(torch.masked_select(x, mask)) # 마스킹 된 값 선택하기


 0.8310 -0.2477 -0.8029  0.2366
 0.2857  0.6898 -0.6331  0.8795
-0.6842  0.4533  0.2912 -0.8317
[torch.FloatTensor of size 3x4]


 1  0  0  0
 0  1  0  1
 0  0  0  0
[torch.ByteTensor of size 3x4]


 0.8310
 0.6898
 0.8795
[torch.FloatTensor of size 3]



### torch.cat => concat : 두 텐서를 붙인다(Concatenation)

In [10]:
# row-wise concat
x_1 = torch.randn(2, 5)
y_1 = torch.randn(3, 5)
z_1 =torch.cat([x_1, y_1]) # 디폴트는 첫번째 차원 기준으로 콘캣 (0)
print(z_1)

# column-wise concat
x_2 = torch.randn(2, 3)
y_2 = torch.randn(2, 5)
z_2 = torch.cat([x_2, y_2], 1) # 두번째 인자로 콘캣할 차원축을 선택할 수 있다.
print(z_2)


-2.5667 -1.4303  0.5009  0.5438 -0.4057
 1.1341 -1.1115  0.3501 -0.7703 -0.1473
 0.6272  1.0935  0.0939  1.2381 -1.3459
 0.5119 -0.6933 -0.1668 -0.9999 -1.6476
 0.8098  0.0554  1.1340 -0.5326  0.6592
[torch.FloatTensor of size 5x5]


-1.5964 -0.3769 -3.1020 -0.0020 -1.0952  0.6016  0.6984 -0.8005
-0.0995 -0.7213  1.2708  1.5381  1.4673  1.5951 -1.5279  1.0156
[torch.FloatTensor of size 2x8]



### torch.view => reshape

In [11]:
x = torch.randn(2, 3, 4)
print(x)
print(x.view(2, 12)) # 2x12로 reshape
print(x.view(2, -1)) # -1이 들어가면 각 차원을 다 곱한 값에서 명시적인 차원수를 나눠서 추론할 수 있음


(0 ,.,.) = 
 -0.2020 -1.2865  0.8231 -0.6101
 -1.2960 -0.9434  0.6684  1.1628
 -0.3229  1.8782 -0.5666  0.4016

(1 ,.,.) = 
 -0.1153  0.3170  0.5629  0.8662
 -0.3528  0.3482  1.1371 -0.3339
 -1.4724  0.7296 -0.1312 -0.6368
[torch.FloatTensor of size 2x3x4]



Columns 0 to 9 
-0.2020 -1.2865  0.8231 -0.6101 -1.2960 -0.9434  0.6684  1.1628 -0.3229  1.8782
-0.1153  0.3170  0.5629  0.8662 -0.3528  0.3482  1.1371 -0.3339 -1.4724  0.7296

Columns 10 to 11 
-0.5666  0.4016
-0.1312 -0.6368
[torch.FloatTensor of size 2x12]



Columns 0 to 9 
-0.2020 -1.2865  0.8231 -0.6101 -1.2960 -0.9434  0.6684  1.1628 -0.3229  1.8782
-0.1153  0.3170  0.5629  0.8662 -0.3528  0.3482  1.1371 -0.3339 -1.4724  0.7296

Columns 10 to 11 
-0.5666  0.4016
-0.1312 -0.6368
[torch.FloatTensor of size 2x12]



### torch.squeeze

In [10]:
x = torch.zeros(2,1,2,1)
print(x)
print(x.squeeze()) # 차원의 사이즈가 1인 차원을 제거한다
print(x.squeeze(1)) # 차원수 명시 가능


(0 ,0 ,.,.) = 
  0
  0

(1 ,0 ,.,.) = 
  0
  0
[torch.FloatTensor of size 2x1x2x1]


 0  0
 0  0
[torch.FloatTensor of size 2x2]


(0 ,.,.) = 
  0
  0

(1 ,.,.) = 
  0
  0
[torch.FloatTensor of size 2x2x1]



### torch.unsqueeze  

In [15]:
x = torch.randn(5)
print(x)

print(x.unsqueeze(0)) # squeeze의 반대, 사이즈가 1인 차원을 추가한다
print(x.unsqueeze(1))


-0.7703
-0.1473
 0.6272
 1.0935
 0.0939
[torch.FloatTensor of size 5]


-0.7703 -0.1473  0.6272  1.0935  0.0939
[torch.FloatTensor of size 1x5]


-0.7703
-0.1473
 0.6272
 1.0935
 0.0939
[torch.FloatTensor of size 5x1]



# 3. Math operation 

http://pytorch.org/docs/0.3.0/torch.html?#math-operations

### add 

In [63]:
x = torch.Tensor([ 1., 2., 3. ])
y = torch.Tensor([ 4., 5., 6. ])
z = x + y # torch.add(x,y)
print(z)
print(torch.add(x,y))


 5
 7
 9
[torch.FloatTensor of size 3]


 5
 7
 9
[torch.FloatTensor of size 3]



### sum 

In [64]:
x = torch.Tensor([ 1., 2., 3. ])
print(x.sum())

6.0


### dot product 

In [61]:
x = torch.Tensor([ 1., 2., 3. ])
y = torch.Tensor([ 4., 5., 6. ])
z = x.dot(y) # torch.dot(x,y)
print(z)
print(torch.dot(x,y))

32.0
32.0


### mul 

In [62]:
x = torch.Tensor([ 1., 2., 3. ])
y = torch.Tensor([ 4., 5., 6. ])
z = x.mul(y)
print(z)


  4
 10
 18
[torch.FloatTensor of size 3]



### mm: matrix multiplication

In [66]:
x = torch.randn(2,2)
y = torch.randn(2,3)
z = x.mm(y)
print(z)


 0.0504  1.2295 -0.1465
 0.1589  0.8862 -0.3472
[torch.FloatTensor of size 2x3]



### max

In [78]:
x = torch.Tensor([[1.,2.],[3.,4.]])
print(x.max())
print(x.max(1))

4.0
(
 2
 4
[torch.FloatTensor of size 2]
, 
 1
 1
[torch.LongTensor of size 2]
)


# 4. Computation Graphs and Automatic Differentiation 

딥러닝 프레임워크의 가장 큰 장점인 자동 미분 기능을 사용하기 위해서는 Variable, Parameter 클래스로 래핑해야 함

In [12]:
# Variable은 텐서를 래핑한다 (autograd 기능에 의한 미분값의 필요여부 결정할 수 있음)
x = Variable( torch.Tensor([1., 2., 3]), requires_grad=True )

# Variable로 감싸준 상태에서 .data로 원래 텐서에 접근할 수 있다 
print(x.data)

# 역시 Variable로 래핑한 다른 텐서들과 연산할 수 있다
y = Variable( torch.Tensor([4., 5., 6]), requires_grad=True )
z = x + y
print(z.data)

# 하지만 연산의 결과인 z는 뭔가 추가적인 정보를 알고 있다!
print(z.grad_fn)


 1
 2
 3
[torch.FloatTensor of size 3]


 5
 7
 9
[torch.FloatTensor of size 3]

<AddBackward1 object at 0x7fc244c68198>


In [13]:
s = z.sum()
print(s)
print(s.grad_fn)

Variable containing:
 21
[torch.FloatTensor of size 1]

<SumBackward0 object at 0x7fc244c6d3c8>


In [14]:
s.backward() # 이 값으로부터 backpropagation을 하고 싶다면 .backward()를 call하면 됨
print(x.grad) # x의 gradient가 계산되었음!

Variable containing:
 1
 1
 1
[torch.FloatTensor of size 3]



$$ s = \overbrace{x_0 + y_0}^\text{$z_0$} + \overbrace{x_1 + y_1}^\text{$z_1$} + \overbrace{x_2 + y_2}^\text{$z_2$} $$

$$ \frac{\partial s}{\partial x_0} $$

### Variable vs Parameter 

- Variable : 학습 및 모델 추론에 필요한 변수
- Parameter : 학습을 통해 찾아야 할 값

In [15]:
x = torch.Tensor([1.,2.,3.])
vx = Variable(x) 

In [19]:
vx.requires_grad

False

In [22]:
px = nn.Parameter(x)

In [25]:
print(px.data)


 1
 2
 3
[torch.FloatTensor of size 3]



In [24]:
px.requires_grad

True

# 5. torch.nn 

### nn.Linear (Affine Maps)

$$ f(x) = Ax + b $$

In [2]:
lin = nn.Linear(5, 3) # maps from R^5 to R^3, parameters A, b
data = Variable( torch.randn(2, 5) ) 
print(lin(data)) # (2x3)

Variable containing:
 0.1755 -0.3268 -0.5069
-0.6602  0.2260  0.1089
[torch.FloatTensor of size 2x3]



### 비선형 함수들 (Non-Linearities)

In [21]:
data = Variable( torch.randn(2, 2) )
print(data)
print(F.relu(data))
print(F.tanh(data))
print(F.sigmoid(data))

Variable containing:
-1.0246 -1.0300
-1.0129  0.0055
[torch.FloatTensor of size 2x2]

Variable containing:
1.00000e-03 *
  0.0000  0.0000
  0.0000  5.5350
[torch.FloatTensor of size 2x2]

Variable containing:
-0.7717 -0.7739
-0.7670  0.0055
[torch.FloatTensor of size 2x2]

Variable containing:
 0.2641  0.2631
 0.2664  0.5014
[torch.FloatTensor of size 2x2]



### Softmax

In [22]:
# Softmax is also in torch.functional
data = Variable( torch.randn(5) )
print(data)
print(F.softmax(data))
print(F.softmax(data).sum()) # Sums to 1 because it is a distribution!
print(F.log_softmax(data))

Variable containing:
-0.9347
-0.9882
 1.3801
-0.1173
 0.9317
[torch.FloatTensor of size 5]

Variable containing:
 0.0481
 0.0456
 0.4867
 0.1089
 0.3108
[torch.FloatTensor of size 5]

Variable containing:
 1
[torch.FloatTensor of size 1]

Variable containing:
-3.0350
-3.0885
-0.7201
-2.2176
-1.1686
[torch.FloatTensor of size 5]



# 6. Containers 

### nn.Module 

파이토치는 모델을 클래스처럼 다룰 수 있는데, torch.nn.Module을 상속받아서 부모 클래스를 초기화하는 방법 <br>
선정의 된 함수 forward에 Variable을 인자 값으로 보내면 forward 계산을 하면서 Parameter와의 backward 계산을 해서 가지고 있는다

In [8]:
class simpleNN(nn.Module):
    
    def __init__(self):
        super(simpleNN, self).__init__() # 부모 클래스까지 초기화(부모 클래스 생성자 호출)
        self.linear = nn.Linear(2,2)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self,inputs):
        return self.sigmoid(self.linear(inputs))
#         return F.sigmoid(self.linear(inputs))

In [9]:
snn = simpleNN()

In [10]:
snn

simpleNN(
  (linear): Linear(in_features=2, out_features=2)
  (sigmoid): Sigmoid()
)

### 모듈 파라미터 혹은 서브 모듈에 접근 

In [11]:
for param in snn.parameters():
    print(param)

Parameter containing:
 0.3026 -0.3286
 0.6938 -0.2992
[torch.FloatTensor of size 2x2]

Parameter containing:
 0.5303
 0.0084
[torch.FloatTensor of size 2]



In [12]:
for param in snn.named_parameters():
    print(param) # (name, Parameter) tuple

('linear.weight', Parameter containing:
 0.3026 -0.3286
 0.6938 -0.2992
[torch.FloatTensor of size 2x2]
)
('linear.bias', Parameter containing:
 0.5303
 0.0084
[torch.FloatTensor of size 2]
)


In [26]:
for param in snn.named_parameters():
    if "weight" in param[0]:
        print(param[1])

Parameter containing:
 0.3026 -0.3286
 0.6938 -0.2992
[torch.FloatTensor of size 2x2]



In [83]:
for child in snn.children():
    print(child) # module

Linear(in_features=2, out_features=2)
Sigmoid()


### forward 

In [85]:
inputs = Variable(torch.randn(1,2))
inputs

Variable containing:
 2.2820 -1.2080
[torch.FloatTensor of size 1x2]

In [86]:
outputs = snn(inputs)
print(outputs)

Variable containing:
 0.1198  0.5264
[torch.FloatTensor of size 1x2]



### nn.Sequential() 

In [87]:
model = nn.Sequential(
                                 nn.Linear(2,2),
                                 nn.Sigmoid()
                                  )

In [88]:
type(model)

torch.nn.modules.container.Sequential

In [89]:
model

Sequential(
  (0): Linear(in_features=2, out_features=2)
  (1): Sigmoid()
)

In [90]:
outputs = model(inputs)
print(outputs)

Variable containing:
 0.7876  0.7099
[torch.FloatTensor of size 1x2]



In [52]:
model = nn.Sequential(OrderedDict([
          ('conv1', nn.Conv2d(1,20,5)),
          ('relu1', nn.ReLU()),
          ('conv2', nn.Conv2d(20,64,5)),
          ('relu2', nn.ReLU())
        ])) 

## Loss function 

http://pytorch.org/docs/0.3.0/nn.html#loss-functions

In [48]:
loss_function = nn.MSELoss()

## Optimizer

In [49]:
optimizer = optim.SGD(snn.parameters(),lr=0.01)

In [51]:
optimizer.param_groups

[{'dampening': 0,
  'lr': 0.01,
  'momentum': 0,
  'nesterov': False,
  'params': [Parameter containing:
   -0.6167  0.5950
   -0.1340  0.1427
   [torch.FloatTensor of size 2x2], Parameter containing:
    0.0262
   -0.4506
   [torch.FloatTensor of size 2]],
  'weight_decay': 0}]