In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from konlpy.tag import Kkma
tagger= Kkma()

## 1. RNNCell 

$$h′=tanh(wih∗x+bih+whh∗h+bhh)$$

In [2]:
input_size = 10 # input dimension (word embedding) D
hidden_size = 30 # hidden dimension H
batch_size = 3
length = 4

In [3]:
rnncell = nn.RNNCell(input_size=input_size,hidden_size=hidden_size,bias=True,nonlinearity='tanh')

In [4]:
rnncell

RNNCell(10, 30)

In [5]:
input = Variable(torch.randn(length, batch_size, input_size)) # T, B, D    (embedding matrix에서 인덱싱한 워드 벡터)
hidden = Variable(torch.zeros(batch_size, hidden_size)) # first hidden state
output = []
for i in range(length):
    hidden = rnncell(input[i], hidden)
    output.append(hidden)

In [6]:
hidden.size() # Batch size, Hidden_size

torch.Size([3, 30])

In [7]:
len(output)

4

## 2. RNN

In [2]:
input_size = 10
hidden_size = 30
batch_size = 3
length = 4
output_size = 5

rnn = nn.RNN(input_size, hidden_size,batch_first=True) #,num_layers=1,bias=True,nonlinearity='tanh', batch_first=True, dropout=0, bidirectional=False)

# (num_layers * num_directions, batch, hidden_size)
input = Variable(torch.randn(batch_size,length,input_size)) # B,T,D  <= batch_first
hidden = Variable(torch.zeros(1,batch_size,hidden_size)) # 1,B,H    (num_layers * num_directions, batch, hidden_size)

output, hidden = rnn(input,hidden)

In [3]:
output.size() # B,T,H

torch.Size([3, 4, 30])

In [73]:
hidden.size() # 1,B,H

torch.Size([1, 3, 30])

In [4]:
linear = nn.Linear(hidden_size,output_size)
output = F.softmax(linear(output),1)
output.size()

torch.Size([3, 4, 5])

## 3. Bidirectional RNN 

In [14]:
rnn = nn.RNN(input_size, hidden_size,num_layers=1,bias=True,nonlinearity='tanh', batch_first=True, dropout=0, bidirectional=True)

In [15]:
input = Variable(torch.randn(batch_size,length,input_size)) # B,T,D
hidden = Variable(torch.zeros(2,batch_size,hidden_size)) # 2,B,H    (num_layers * num_directions, batch, hidden_size)

In [16]:
output, hidden = rnn(input,hidden)

In [17]:
output.size() # concat of forward,backward

torch.Size([3, 4, 60])

In [18]:
hidden.size() # forward, backward

torch.Size([2, 3, 30])

## 4. Multi-layer RNN 

In [19]:
rnn = nn.RNN(input_size, hidden_size, num_layers=3,bias=True, nonlinearity='tanh', batch_first=True, dropout=0, bidirectional=True)

In [20]:
input = Variable(torch.randn(batch_size,length,input_size)) # B,T,D
hidden = Variable(torch.zeros(3*2,batch_size,hidden_size)) # 6,B,H    (num_layers * num_directions, batch, hidden_size)

In [21]:
output, hidden = rnn(input,hidden)

In [22]:
output.size()

torch.Size([3, 4, 60])

In [23]:
hidden.size() # (forward, backward)*num_layers

torch.Size([6, 3, 30])

## 5. GRU 

In [14]:
rnn = nn.GRU(input_size,hidden_size,batch_first=True) #,num_layers=1,bias=True,batch_first=True,bidirectional=True)

In [15]:
input = Variable(torch.randn(batch_size,length,input_size)) # B,T,D
hidden = Variable(torch.zeros(1,batch_size,hidden_size)) # 2,B,H

In [16]:
output, hidden = rnn(input,hidden)

In [17]:
print(output.size())
print(hidden.size())

torch.Size([3, 4, 30])
torch.Size([1, 3, 30])


## 6. LSTM

In [6]:
input_size = 10
hidden_size = 30
output_size = 10
batch_size = 3
length = 4
num_layers = 3

In [8]:
rnn = nn.LSTM(input_size,hidden_size,batch_first=True) #,num_layers=num_layers,bias=True,batch_first=True,bidirectional=True)

In [10]:
input = Variable(torch.randn(batch_size,length,input_size)) # B,T,D
hidden = Variable(torch.zeros(1,batch_size,hidden_size)) # (num_layers * num_directions, batch, hidden_size)
cell = Variable(torch.zeros(1,batch_size,hidden_size)) # (num_layers * num_directions, batch, hidden_size)

In [4]:
input = Variable(torch.randn(batch_size,length,input_size)) # B,T,D
hidden = Variable(torch.zeros(num_layers*2,batch_size,hidden_size)) # (num_layers * num_directions, batch, hidden_size)
cell = Variable(torch.zeros(num_layers*2,batch_size,hidden_size)) # (num_layers * num_directions, batch, hidden_size)

In [11]:
output, (hidden,cell) = rnn(input,(hidden,cell))

In [13]:
print(output.size())
print(hidden.size())
print(cell.size())

torch.Size([3, 4, 30])
torch.Size([1, 3, 30])
torch.Size([1, 3, 30])


In [11]:
linear = nn.Linear(hidden_size*2,output_size)
output = F.softmax(linear(output),1)
output.size()

torch.Size([3, 4, 10])

## TODO 각 timestep마다 그 다음에 올 단어를 예측하는 Language model을 만드시오 

* 다음 Corpus(sentences)를 tokenized하고 Vocab을 만드시오
* Embedding matrix(vector size는 10)
* hidden state의 size가 20인 Bidirectional GRU(num_layers=1)

In [39]:
from konlpy.tag import Kkma
tagger = Kkma()

In [40]:
sentences=["나는 오늘 삼계탕을 먹었다","그런데도 배가 아직 고프다","이제 영화보러 가야겠다 요즘 뭐가 재밌지","날씨가 진짜 좋다"]

In [41]:
tokenized = [tagger.morphs(s) for s in sentences]
vocab = list((set([token for tokens in tokenized for token in tokens])))
word2index = {v:i for i,v in enumerate(vocab)}

In [45]:
len(word2index)

29

In [44]:
V = len(word2index)
D = 10
H = 20

In [17]:
class LM(nn.Module):
    def __init__(self,V,D,H):
        super(LM,self).__init__()
        self.hidden_size = H
        
        self.embed = nn.Embedding(V,D) # VxD
        self.gru = nn.GRU(D,H,1,batch_first=True,bidirectional=True)
        self.linear = nn.Linear(H*2,V)
    
    def init_hidden(self,batch_size):
        hidden = Variable(torch.zeros(2,batch_size,self.hidden_size))
        return hidden
    
    def forward(self,inputs):
        """
        inputs : B,T # LongTensor
        """
        embed = self.embed(inputs) # B,T,D
        hidden = self.init_hidden(inputs.size(0)) # 2,B,H
        output, hidden = self.gru(embed,hidden)
        # output : B,T,2H
        # hidden : 2,B,H
        
        output = self.linear(output) # B,T,V
        return output

In [18]:
model = LM(len(word2index),10,20)

## 7. Standard form 

In [161]:
class RNN(nn.Module):
    def __init__(self,input_size,embed_size,hidden_size,output_size,num_layers=1,bidirec=False):
        super(RNN,self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        if bidirec:
            self.num_directions = 2
        else:
            self.num_directions = 1
            
        self.embed = nn.Embedding(input_size,embed_size)
        self.lstm = nn.LSTM(embed_size,hidden_size,num_layers,batch_first=True,bidirectional=bidirec)
        self.linear = nn.Linear(hidden_size*self.num_directions,output_size)
        
    def init_hidden(self,batch_size):
        # (num_layers * num_directions, batch_size, hidden_size)
        hidden = Variable(torch.zeros(self.num_layers*self.num_directions,batch_size,self.hidden_size))
        cell = Variable(torch.zeros(self.num_layers*self.num_directions,batch_size,self.hidden_size))
        return hidden, cell
    
    def forward(self,inputs):
        """
        inputs : B,T
        """
        embed = self.embed(inputs) # word vector indexing
        hidden, cell = self.init_hidden(inputs.size(0)) # initial hidden,cell
        
        output, (hidden,cell) = self.lstm(embed,(hidden,cell))
        
        # Many-to-Many
        output = self.linear(output) # B,T,H -> B,T,V
        
        # Many-to-One
        #hidden = hidden[-self.num_directions:] # (num_directions,B,H)
        #hidden = torch.cat([h for h in hidden],1)
        #output = self.linear(hidden) # last hidden
        
        return output

In [162]:
VOCAB=1000 # input_size
EMBED = 50 # embedding_size
HIDDEN = 100 # hidden_size

In [163]:
rnn = RNN(VOCAB,EMBED,HIDDEN,VOCAB,bidirec=True)

In [164]:
test_input = Variable(torch.randperm(32*10)).view(32,10)

In [165]:
test_input # 길이 10개짜리 문장의 32개 배치

Variable containing:
    7   231   163   119   173   222    66    83    25   176
   28    64   156    87   268   153    62    86   177   224
  317   241   306    78     2   149    38   110   309   300
  155   273   207   205   150    88    16     5   191   108
   54   127   154    10   303    71   129   264   187    61
  230    46   151   122    65   135   284    98   260   157
  240    60    30   107   184   232   142   138    26   130
  213   302   277    14   275   310   140   238   243    96
  283    99   218   115   144    59   199   196   145   248
  313   291   166   295   114    39   186   276    57   132
  111   270    27   152   280   247   200    42   192   252
  272   269   147   168   106   126   162    81   208   282
  312    44    37   182    13   288   209   263    31   258
  318   113   297    95   188   120   244   274   116   158
  164   229   305   220    91   143    84    12    67   228
  100    47   316   216   249   204    68    69   206    53
    8    48    29  

In [166]:
output = rnn(test_input)

In [169]:
output.size()

torch.Size([32, 10, 1000])

## TODO : Sentence Classifier

* data/train.txt를 torchtext로 load하시오(배치사이즈 5)
* Bidirectional LSTM을 선언(num_layers=2)
* 마지막 히든 스테이트를 이용하여 Binary Classifier를 만드시오(Many-to-One)
* train 시키시오

In [20]:
from torchtext.data import Field,Iterator,TabularDataset

In [22]:
TEXT = Field(tokenize=tagger.morphs,use_vocab=True, batch_first=True)
LABEL = Field(sequential=False,use_vocab=True,unk_token=None)

train_data = TabularDataset(path="data/train.txt",
                                          format="tsv",
                                          fields=[('TEXT',TEXT),('LABEL',LABEL)])

In [24]:
print(train_data.examples[0].TEXT)
print(train_data.examples[0].LABEL)

['배고프', '다', '밥', '주', '어']
FOOD


In [25]:
TEXT.build_vocab(train_data)
LABEL.build_vocab(train_data)

In [26]:
train_loader =  Iterator(train_data, batch_size=5, device=-1, # device -1 : cpu, device 0 : 남는 gpu
    sort_key=lambda x: len(x.TEXT),sort_within_batch=True,repeat=False) # x.TEXT 길이 기준으로 정렬

In [32]:
class RNN(nn.Module):
    def __init__(self,input_size,embed_size,hidden_size,output_size,num_layers=1,bidirec=False):
        super(RNN,self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        if bidirec:
            self.num_directions = 2
        else:
            self.num_directions = 1
            
        self.embed = nn.Embedding(input_size,embed_size)
        self.lstm = nn.LSTM(embed_size,hidden_size,num_layers,batch_first=True,bidirectional=bidirec)
        self.linear = nn.Linear(hidden_size*self.num_directions,output_size)
        
    def init_hidden(self,batch_size):
        # (num_layers * num_directions, batch_size, hidden_size)
        hidden = Variable(torch.zeros(self.num_layers*self.num_directions,batch_size,self.hidden_size))
        cell = Variable(torch.zeros(self.num_layers*self.num_directions,batch_size,self.hidden_size))
        return hidden, cell
    
    def forward(self,inputs):
        """
        inputs : B,T
        """
        embed = self.embed(inputs) # word vector indexing
        hidden, cell = self.init_hidden(inputs.size(0)) # initial hidden,cell
        
        output, (hidden,cell) = self.lstm(embed,(hidden,cell))
        
        # Many-to-One
        hidden = hidden[-self.num_directions:] # (num_directions,B,H)
        hidden = torch.cat([h for h in hidden],1)
        output = self.linear(hidden) # last hidden
        
        return output

In [79]:
model = RNN(len(TEXT.vocab),30,50,len(LABEL.vocab),bidirec=True)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=0.001)

In [90]:
for batch in train_loader:
    model.zero_grad()
    preds = model(batch.TEXT)
    loss = loss_function(preds,batch.LABEL)
    print(loss.data[0])
    loss.backward()
    optimizer.step()

0.16062602400779724
0.07795029878616333
0.10190250724554062
0.16149498522281647
0.23214185237884521
0.10463999211788177
0.14870978891849518
0.0772019475698471
