In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
import torch.utils.data as data
import torchvision.datasets as datasets
import torchvision
import numpy as np

torch.manual_seed(1)

<torch._C.Generator at 0x7fc29c071250>

* http://pytorch.org/docs/master/
* https://github.com/rguthrie3/DeepLearningForNLPInPytorch

# 1. What is a Pytorch? 

텐서와 옵티마이저, 뉴럴넷 등 GPU 연산에 최적화된 모듈을 이용하여 빠르게 <strong>딥러닝 모델을 구현할 수 있는 프레임워크</strong> <br>
Facebook이 밀고 있던 lua 기반의 torch를 python 버전으로 포팅함

# 2. What is difference between Pytorch and Tensorflow? 

* https://medium.com/towards-data-science/pytorch-vs-tensorflow-spotting-the-difference-25c75777377b
* http://cs231n.stanford.edu/slides/2017/cs231n_2017_lecture8.pdf
* https://devblogs.nvidia.com/parallelforall/recursive-neural-networks-pytorch/?utm_campaign=Revue%20newsletter&utm_medium=Newsletter&utm_source=revue

# 3. Why Pytorch? 

* Pythonic 
* Easy debugging
* Intuitive => Easy to understand

# 1> Pytorch Basic 

* Tensor
* Variable

파이토치의 가장 기본 단위는 Tensor이다. 디폴트 Tensor는 FloatTensor이고, LongTensor, ByteTensor, 등 여러 종류가 있다. 

## 1. 텐서 만들기 

In [3]:
V_data = [1., 2., 3.] # 벡터
V = torch.Tensor(V_data) # 파이썬의 리스트를 바로 랩핑할 수 있다.
print(V)

M_data = [[1., 2., 3.], [4., 5., 6]] # 매트릭스
M = torch.Tensor(M_data)
print(M)

T_data = [[[1.,2.], [3.,4.]], # 3차원 텐서
          [[5.,6.], [7.,8.]]]
T = torch.Tensor(T_data)
print(T)


 1
 2
 3
[torch.FloatTensor of size 3]


 1  2  3
 4  5  6
[torch.FloatTensor of size 2x3]


(0 ,.,.) = 
  1  2
  3  4

(1 ,.,.) = 
  5  6
  7  8
[torch.FloatTensor of size 2x2x2]



numpy 객체로부터 로딩

In [4]:
V_data = np.array([1.,2.,3.])# 벡터
V = torch.Tensor(V_data) # 파이썬의 리스트를 바로 랩핑할 수 있다.
print(V)

M_data = np.array([[1., 2., 3.], [4., 5., 6]]) # 매트릭스
M = torch.Tensor(M_data)
print(M)

T_data = np.array([[[1.,2.], [3.,4.]], # 3차원 텐서
          [[5.,6.], [7.,8.]]])
T = torch.Tensor(T_data)
print(T)


 1
 2
 3
[torch.FloatTensor of size 3]


 1  2  3
 4  5  6
[torch.FloatTensor of size 2x3]


(0 ,.,.) = 
  1  2
  3  4

(1 ,.,.) = 
  5  6
  7  8
[torch.FloatTensor of size 2x2x2]



In [5]:
x = torch.randn((3, 4, 5)) # 3x4x5 텐서를 랜덤으로 초기화하기
print(x) # FloatTensor가 디폴트


(0 ,.,.) = 
  0.6614  0.2669  0.0617  0.6213 -0.4519
 -0.1661 -1.5228  0.3817 -1.0276 -0.5631
 -0.8923 -0.0583 -0.1955 -0.9656  0.4224
  0.2673 -0.4212 -0.5107 -1.5727 -0.1232

(1 ,.,.) = 
  3.5870 -1.8313  1.5987 -1.2770  0.3255
 -0.4791  1.3790  2.5286  0.4107 -0.9880
 -0.9081  0.5423  0.1103 -2.2590  0.6067
 -0.1383  0.8310 -0.2477 -0.8029  0.2366

(2 ,.,.) = 
  0.2857  0.6898 -0.6331  0.8795 -0.6842
  0.4533  0.2912 -0.8317 -0.5525  0.6355
 -0.3968 -0.6571 -1.6428  0.9803 -0.0421
 -0.8206  0.3133 -1.1352  0.3773 -0.2824
[torch.FloatTensor of size 3x4x5]



In [6]:
x[0] # indexing도 직관적이다


 0.6614  0.2669  0.0617  0.6213 -0.4519
-0.1661 -1.5228  0.3817 -1.0276 -0.5631
-0.8923 -0.0583 -0.1955 -0.9656  0.4224
 0.2673 -0.4212 -0.5107 -1.5727 -0.1232
[torch.FloatTensor of size 4x5]

In [7]:
x[0][0]


 0.6614
 0.2669
 0.0617
 0.6213
-0.4519
[torch.FloatTensor of size 5]

In [8]:
x[0][0][0]

0.6613521575927734

## 2. 텐서 연산 

In [9]:
x = torch.Tensor([ 1., 2., 3. ])
y = torch.Tensor([ 4., 5., 6. ])
z = x + y
print(z)


 5
 7
 9
[torch.FloatTensor of size 3]



### 앞으로 자주 쓰게 될 연산 <strong>cat</strong> => concat : 두 텐서를 연결한다(Concatenation)

In [10]:
# By default, it concatenates along the first axis (concatenates rows)
x_1 = torch.randn(2, 5)
y_1 = torch.randn(3, 5)
z_1 =torch.cat([x_1, y_1]) # 디폴트는 첫번째 차원 기준으로 콘캣 (0)
print(z_1)

# Concatenate columns:
x_2 = torch.randn(2, 3)
y_2 = torch.randn(2, 5)
z_2 = torch.cat([x_2, y_2], 1) # 두번째 인자로 콘캣할 차원축을 선택할 수 있다.
print(z_2)


-2.5667 -1.4303  0.5009  0.5438 -0.4057
 1.1341 -1.1115  0.3501 -0.7703 -0.1473
 0.6272  1.0935  0.0939  1.2381 -1.3459
 0.5119 -0.6933 -0.1668 -0.9999 -1.6476
 0.8098  0.0554  1.1340 -0.5326  0.6592
[torch.FloatTensor of size 5x5]


-1.5964 -0.3769 -3.1020 -0.0020 -1.0952  0.6016  0.6984 -0.8005
-0.0995 -0.7213  1.2708  1.5381  1.4673  1.5951 -1.5279  1.0156
[torch.FloatTensor of size 2x8]



### 앞으로 자주 쓰게 될 연산 <strong>view</strong> => reshape

In [11]:
x = torch.randn(2, 3, 4)
print(x)
print(x.view(2, 12)) # Reshape to 2 rows, 12 columns
print(x.view(2, -1)) # Same as above.  If one of the dimensions is -1, its size can be inferred


(0 ,.,.) = 
 -0.2020 -1.2865  0.8231 -0.6101
 -1.2960 -0.9434  0.6684  1.1628
 -0.3229  1.8782 -0.5666  0.4016

(1 ,.,.) = 
 -0.1153  0.3170  0.5629  0.8662
 -0.3528  0.3482  1.1371 -0.3339
 -1.4724  0.7296 -0.1312 -0.6368
[torch.FloatTensor of size 2x3x4]



Columns 0 to 9 
-0.2020 -1.2865  0.8231 -0.6101 -1.2960 -0.9434  0.6684  1.1628 -0.3229  1.8782
-0.1153  0.3170  0.5629  0.8662 -0.3528  0.3482  1.1371 -0.3339 -1.4724  0.7296

Columns 10 to 11 
-0.5666  0.4016
-0.1312 -0.6368
[torch.FloatTensor of size 2x12]



Columns 0 to 9 
-0.2020 -1.2865  0.8231 -0.6101 -1.2960 -0.9434  0.6684  1.1628 -0.3229  1.8782
-0.1153  0.3170  0.5629  0.8662 -0.3528  0.3482  1.1371 -0.3339 -1.4724  0.7296

Columns 10 to 11 
-0.5666  0.4016
-0.1312 -0.6368
[torch.FloatTensor of size 2x12]



## 2. Computation Graphs and Automatic Differentiation 

딥러닝 프레임워크의 가장 큰 장점인 자동 미분 기능을 사용하려면 텐서를 Variable이라고 클래스로 래핑해야 함. (<strong>Variable</strong>)

In [12]:
# Variables wrap tensor objects
x = Variable( torch.Tensor([1., 2., 3]), requires_grad=True )
# You can access the data with the .data attribute
print(x.data)

# You can also do all the same operations you did with tensors with Variables.
y = Variable( torch.Tensor([4., 5., 6]), requires_grad=True )
z = x + y
print(z.data)

# BUT z knows something extra.
print(z.grad_fn)


 1
 2
 3
[torch.FloatTensor of size 3]


 5
 7
 9
[torch.FloatTensor of size 3]

<AddBackward1 object at 0x7fc244c68198>


In [13]:
# Lets sum up all the entries in z
s = z.sum()
print(s)
print(s.grad_fn)

Variable containing:
 21
[torch.FloatTensor of size 1]

<SumBackward0 object at 0x7fc244c6d3c8>


In [14]:
s.backward() # calling .backward() on any variable will run backprop, starting from it.
print(x.grad)

Variable containing:
 1
 1
 1
[torch.FloatTensor of size 3]



$$ s = \overbrace{x_0 + y_0}^\text{$z_0$} + \overbrace{x_1 + y_1}^\text{$z_1$} + \overbrace{x_2 + y_2}^\text{$z_2$} $$

$$ \frac{\partial s}{\partial x_0} $$

## 3. torch Module 

### 주요 모듈 : nn.Linear (Affine Maps)

$$ f(x) = Ax + b $$

In [46]:
lin = nn.Linear(5, 3) # maps from R^5 to R^3, parameters A, b
data = Variable( torch.randn(2, 5) ) 
print(lin(data)) # (2x3)

Variable containing:
 0.0114 -0.5978 -1.0424
-0.0682 -0.4090  0.1108
[torch.FloatTensor of size 2x3]



### 주요 모듈 : 비선형 함수들 (Non-Linearities)

In [21]:
data = Variable( torch.randn(2, 2) )
print(data)
print(F.relu(data))
print(F.tanh(data))
print(F.sigmoid(data))

Variable containing:
-1.0246 -1.0300
-1.0129  0.0055
[torch.FloatTensor of size 2x2]

Variable containing:
1.00000e-03 *
  0.0000  0.0000
  0.0000  5.5350
[torch.FloatTensor of size 2x2]

Variable containing:
-0.7717 -0.7739
-0.7670  0.0055
[torch.FloatTensor of size 2x2]

Variable containing:
 0.2641  0.2631
 0.2664  0.5014
[torch.FloatTensor of size 2x2]



### 주요 모듈 : Softmax

In [22]:
# Softmax is also in torch.functional
data = Variable( torch.randn(5) )
print(data)
print(F.softmax(data))
print(F.softmax(data).sum()) # Sums to 1 because it is a distribution!
print(F.log_softmax(data))

Variable containing:
-0.9347
-0.9882
 1.3801
-0.1173
 0.9317
[torch.FloatTensor of size 5]

Variable containing:
 0.0481
 0.0456
 0.4867
 0.1089
 0.3108
[torch.FloatTensor of size 5]

Variable containing:
 1
[torch.FloatTensor of size 1]

Variable containing:
-3.0350
-3.0885
-0.7201
-2.2176
-1.1686
[torch.FloatTensor of size 5]



파이토치는 모델을 클래스처럼 다룰 수 있는데, torch.nn.Module을 상속받아서 부모 클래스를 초기화하면 된다. <br>
선정의 된 함수 forward에 Variable을 인자 값으로 보내면 forward 계산을 하면서 Parameter와의 backward 연결을 알아서 한다.

In [38]:
class simpleNN(nn.Module):
    
    def __init__(self):
        super(simpleNN, self).__init__() # 부모 클래스까지 초기화(부모 클래스 생성자 호출)
        self.linear = nn.Linear(2,2)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self,inputs):
        return self.sigmoid(self.linear(inputs))
#         return F.sigmoid(self.linear(inputs))

In [39]:
snn = simpleNN()

In [40]:
snn

simpleNN(
  (linear): Linear(in_features=2, out_features=2)
  (sigmoid): Sigmoid()
)

In [41]:
for param in snn.named_parameters():
    print(param)

('linear.weight', Parameter containing:
-0.6167  0.5950
-0.1340  0.1427
[torch.FloatTensor of size 2x2]
)
('linear.bias', Parameter containing:
 0.0262
-0.4506
[torch.FloatTensor of size 2]
)


In [42]:
inputs = Variable(torch.randn(1,2))
inputs

Variable containing:
 0.3892 -0.0115
[torch.FloatTensor of size 1x2]

In [45]:
outputs = snn(inputs)
print(outputs)

Variable containing:
 0.4451  0.3765
[torch.FloatTensor of size 1x2]



## Loss function 

In [48]:
loss_function = nn.MSELoss()

## Optimizer

In [49]:
optimizer = optim.SGD(snn.parameters(),lr=0.01)

In [51]:
optimizer.param_groups

[{'dampening': 0,
  'lr': 0.01,
  'momentum': 0,
  'nesterov': False,
  'params': [Parameter containing:
   -0.6167  0.5950
   -0.1340  0.1427
   [torch.FloatTensor of size 2x2], Parameter containing:
    0.0262
   -0.4506
   [torch.FloatTensor of size 2]],
  'weight_decay': 0}]