
신경망 기계번역
================

Category


-  Encoder
-  Packed Padded Sequence
-  Generator
-  Attention
-  BeamSearch


### 1. Encoder
층을 통과하면 결과적으로 문장 임베딩 벡터를 생성 

양방향 LSTM 으로 하려면 Bidirectional  = True


In [10]:
from torch import *
import torch.nn as nn
class Encoder(nn.Module):
    def __init__(self, word_vec_dim, hidden_size, n_layers=4, dropout_p = .2):
        super(Encoder, self).__init__()
        
        self.rnn = nn.LSTM(wrod_vec_dim,
                          int(hidden_size/2),
                          num_layers=n_layers,
                           dropout = dropout_p,
                           bidirectional=True,
                           batch_first = True
                          )
        
    def forward(self, emb):
        
        if ininstance(emb, tuple):
            x,lengths = emb
            x = pack(x,lengths.tolist(), batch_first = True)
            
        else:
            x = emb
            
        y,h = self.rnn(x)
        
        if isinstance(emb, tuple):
            y,_ = unpack(y,batch_first=True)
        
        return y,h


### 2.Packed Padded Sequence

기존의 샘플별 미니배치를 time-step별로 표현해준다. 

In [15]:
import torch
from torch.nn.utils.rnn import *
a = [torch.tensor([1,2,3]), torch.tensor([3,4])]
b = torch.nn.utils.rnn.pad_sequence(a, batch_first = True)
tensor([[1,2,3],
        [3,4,0]])
torch.nn.utils.rnn.pack_padded_sequence(b, batch_first = True, lengths=[3,2])
PackedSequence(data = tensor([1,3,2,4,3]), batch_sizes = tensor([2,2,1]))

PackedSequence(data=tensor([1, 3, 2, 4, 3]), batch_sizes=tensor([2, 2, 1]), sorted_indices=None, unsorted_indices=None)

### 3. Generator

In [17]:
import torch.nn as nn
class Generator(nn.Module):
    
    def __init__(self, hidden_size, output_size):
        supre(Generator, self).__init__()
        
        self.output = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=-1)
        
    def forward(self, x):
        y = self.softmax(self.output(x))
        
        return y
    
    # logsoftmax 함수를 이용해서 로그 확률을 구한다.
    def _get_loss(self, y_hat, y, crit=None):
        crit = self.crit if crit is None else crit
        loss = crit(y_hat.contiguous().view(-1, y_hat.size(-1)),
                   y.contiguous().view(-1))
        return loss

### 4. Attention

파이썬 딕셔러니를 이용한 key  - value를 활용한다. 
전체적인 스코어를 총합하는 과정이 필요하다

In [5]:
# how_similar 는 코사인 유사도를 반환
# Query 는 벡터 임베팅 되었다고 가정


def key_value_func(query):
    weights = []
    
    for key in dic.keys():
        weights += [how_similar(key, query)]
    
    weights = softmax(weights)
    answer = 0
    
    for weight, value in zip(weights, dic.values()):
        answer += weight * value
    
    return answer
# encoder에서 time-step별 출력을 키와 벨류로 삼고, 
# 현재 time-step의 디코더 출력을 쿼리로 삼아 어텐션을 계산한다. 
# 가중합


### Attention Class

In [7]:
import torch.nn as nn

class Attention(nn.Module):
    
    def __init__(self, hidden_size):
        super(Attention,self).__init__()
        
        self.linear = nn.Linear(hidden_size, hidden_size, bias=False)
        self.softmax = nn.Softmax(dim=-1)
    def forward(self, h_src, h_t_tgt, mask=None):
        query = self.linear(h_t_tgt.squeeze(1)).unsqueecze(-1)
        
        weight = torch.bmm(h_src, query).squeeze(-1)
        
        if mask is not None:
            weight.masked_fill_(mask, -float('inf'))
        weight = self.softmax(weight)
        
        context_vector = torch.bmm(weight.unsqueeze(1), h_src)
        
        return context_vector
    

### Generate_mask

문장마다 길이가 다르므로 가장 길이가 긴 것을 기준으로 필요없는 부분에 대하여 1의 값을 채워준다. 
 이 경우, 어텐션 가중치를 0으로 만들어 더해도 상관없는 값으로 변환

In [10]:
def generate_mask(self, x, length):
    mask = []
    
    max_length = max(length)
    for l in length:
        if max_length -1 >0:
            mask += [torch.cat([x.new_ones(1,l).zero_(),
                               x.new_ones(1, (max_length -l))], dim = -1)]
        else:mask += [x.new_ones(1,l).zero_()]
    
    mask = torch.cat(mask, dim=0).byte()
    
    return mask

### Seq2Seq Class

앞에서 정의한 인코더, 디코더, 생성자와 어텐션 클래스를 활용한 전체 seq2seq 클래스

In [11]:
class Seq2Seq(nn.Module):
    
    def __init__(self, input_size, word_vec_dim, hidden_size, output_size, n_layers=4, dropout_p=.2):
        self.input_size = input_size
        self.word_vec_dim = word_vec_dim
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout_p = dropout_p
        
        super(Seq2Seq, self).__init__()
        
        self.emb_src = nn.Embedding(input_size, word_vec_dim)
        self.emb_dec = nn.Embedding(output_size, word_vec_dim)
        
        self.encoder = Encoder(word_vec_dim,
                              hidden_size,
                              n_layers=n_layers,
                              dropout_p=dropout_p)
        self.decoder = Decoder(word_vec_dim,
                              hidden_size,
                              n_layers=n_layers,
                              dropout_p=dropout_p)
        self.attn = Attention(hidden_size)
        
        self.concat = nn.Linear(hidden_size*2, hidden_size)
        self.tanh = nn.Tanh()
        self.generator = Generator(hddien_size, output_size)
        
    def merge_encoder_hiddens(self, encoder_hiddens):
        new_hiddens = []
        new_cells =[]
        
        hiddens, cells = encoder_hiddens
        
        for i in range(0, hidden.size(0),2):
            new_hiddens += [torch.cat([hiddens[i], hiddens[h+1]], dim = -1)]
            new_cells += [torch.cat([cells[i], cells[i+1]], dim = -1)]
        
        new_hiddens, new_cells = torch.stack(new_hiddens),torch.stack(new_cells)
        
        return (new_hiddens, new_cells)
        #반복문을 순차적으로하기 보다 병렬적인 작업으로 변경해줘야 한다. 
        
    def forward(self, src, tgt):
        batch_size = tgt.size(0)
        
        mask = None
        x_length = None
        if isinstance(src, tuple):
            x, x_lenght = src
            mask = self.generate_mask(x, x_length)
            
        else:
            x = src
            if isinstance(tgt, tuple):
                tgt =tgt[0]
            
        emb_src = self.emb_src(x)
        h_src, h_0_tgt = self.encoder((emb_src, x_length))
        h_0_tgt, c_0,_tgt = h_0_tgt
        h_0_tgt = h_0_tgt.transpose(0,1).contiguous().view(bathc_size,-1,
                                                           self.hidden_size).tranpose(0,1).contiguous()
        h_0_tgt = (h_0_tgt, c_0_tgt)
        emb_tgt = self.emb_dec(tgt)
        h_tilde = []
        h_t_tilde = None
        decoder_hidden = h_0_tgt
        # time -steop의 끝까지 decoder를 Run
        for i in range(tgt.size(1)):
            emb_t = emb_tgt[:,t,:].unsqueeze(1)
            decoder_output, decoder_hidden = self.decoder(emb_t, h_t_tilde, decoder_hidden)
            context_vector = self.attn(h_src, decoder_output,mask)
            h_t_tilde = self.tanh(Self.concat(torch.cat([decoder_output,
                                                        context_vector],
                                                        dim=-1)))
            h_tilde += [h_t_tilde]
        
        h_tilde = torch.cat(h_tilde, dim=1)
        y_hat = self.generator(h_tilde)
        
        return y_hat
        

### Teaching forcing : 다음 input 에 그 전 단계의 결과값을 넣는다.