In [3]:
import tensorflow as tf

In [4]:
from keras import Model
from keras.layers import Embedding, LSTM, Dropout, Input, Dense, dot, concatenate
from keras.optimizers import Adam

In [5]:
max_length = 300 #for padding
NUM_WORDS = 3000 #본문 기사 내 가장 많이 사용된 3000단어?(or 전체 단어수로 할지 미정)
VECTOR_SIZE_NOT_DETERMINED_YET = 100
MAX_SUMMARIZATION_LENGTH_NOT_DETERMINED_YET = 500

## Seq2Seq

In [129]:
class Seq2Seq():
    def __init__(self):
        self.max_length = 300
        self.VECTOR_SIZE_NOT_DETERMINED_YET = 100
        
#         self.sos =  sos #give some number
#         self.eos =  eos #give some number
        
        self.optimizer = Adam()
        
        encoder_inputs = Input(shape=(self.max_length,self.VECTOR_SIZE_NOT_DETERMINED_YET))
        decoder_inputs = Input(shape=(self.max_length,self.VECTOR_SIZE_NOT_DETERMINED_YET))
        
        self.seq2seq = self.build_seq2seq()

        self.seq2seq.compile(loss='categorical_crossentropy', optimizer=self.optimizer)
        print(self.seq2seq.summary())
        
        
    def build_seq2seq(self):
        def lstm(inputs, hs, seq=False, initial=None):
            output,h,c = LSTM(hs, return_state=True, return_sequences=seq)(inputs, initial_state=initial) #return only last h, c
            return output, h, c
        
        def fc(n_h_layers, inputs, hn):
            for _ in range(n_h_layers):
                d = Dense(hn, activation='tanh')(inputs)
            if n_h_layers==0: 
                d = inputs
            output = Dense(hn, activation='softmax')(d)
            #모든 3000개 단어에 대한 확률값 (해당 위치에서의)
            return output
        
        #(encoder input) already embedded from koBERT(vector size 만큼)
        encoder_inputs = Input(shape=(self.max_length,self.VECTOR_SIZE_NOT_DETERMINED_YET))
        _, h, c = lstm(encoder_inputs, 256)  #Discard encoder outputs
        init_states = [h,c]
        
        decoder_inputs = Input(shape=(self.max_length,self.VECTOR_SIZE_NOT_DETERMINED_YET))
        decoder_outputs, _, _ = lstm(decoder_inputs, 256, seq=True, initial=init_states) #Discard encoder outputs
        
        outputs_softmaxed = fc(1, decoder_outputs, NUM_WORDS) #ㅁ//NUM_WORDS가 맞는지?

        mod = Model([encoder_inputs, decoder_inputs], outputs_softmaxed) 
        return mod

In [130]:
model = Seq2Seq()

Model: "model_137"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_214 (InputLayer)          (None, 300, 100)     0                                            
__________________________________________________________________________________________________
input_215 (InputLayer)          (None, 300, 100)     0                                            
__________________________________________________________________________________________________
lstm_104 (LSTM)                 [(None, 256), (None, 365568      input_214[0][0]                  
__________________________________________________________________________________________________
lstm_105 (LSTM)                 [(None, 300, 256), ( 365568      input_215[0][0]                  
                                                                 lstm_104[0][1]           

In [None]:
model.seq2seq.fit()

## Seq2Seq with Attention

In [14]:
class Seq2Seq_Attention():
    def __init__(self):
        self.max_length = 300
        self.VECTOR_SIZE_NOT_DETERMINED_YET = 100
        
#         self.sos =  sos #give some number
#         self.eos =  eos #give some number
        
        self.optimizer = Adam()
        
        encoder_inputs = Input(shape=(self.max_length,self.VECTOR_SIZE_NOT_DETERMINED_YET))
        decoder_inputs = Input(shape=(self.max_length,self.VECTOR_SIZE_NOT_DETERMINED_YET))
        
        self.att = self.build_att()

        self.att.compile(loss='categorical_crossentropy', optimizer=self.optimizer)
        print(self.att.summary())
        
        
    def build_att(self):
        def lstm(inputs, hs, seq=True, initial=None):
            output,h,c = LSTM(hs, return_state=True, return_sequences=seq)(inputs, initial_state=initial) 
            #return_seq=False & return_state=True: return only last h, c
            #return_seq=Ture & return_state=True: return all h, c
            return output, h, c
        
        def fc(n_h_layers, inputs, hn):
            for _ in range(n_h_layers):
                d = Dense(hn, activation='tanh')(inputs)
            if n_h_layers==0: 
                d = inputs
            output = Dense(hn, activation='softmax')(d)
            #모든 3000개 단어에 대한 확률값 (해당 위치에서의)
            return output
        
        #(encoder input) already embedded from koBERT(vector size 만큼)
        encoder_inputs = Input(shape=(self.max_length,self.VECTOR_SIZE_NOT_DETERMINED_YET))
        encoder_outputs, h, c = lstm(encoder_inputs, 256)  #Discard encoder outputs
        print(encoder_outputs)
        init_states = [h,c]
        
        decoder_inputs = Input(shape=(self.max_length,self.VECTOR_SIZE_NOT_DETERMINED_YET))
        decoder_outputs, _, _ = lstm(decoder_inputs, 256, initial=init_states) #Discard encoder outputs
        
        value = Dense(5000, activation='tanh')(encoder_outputs)
        query = Dense(5000, activation='tanh')(decoder_outputs)
        print(value, query) #300x5000 두개
        
        attention = dot([query, value],axes=[2,2])
        print(attention) #300x300
        
        attention_softmaxed = fc(0, attention, max_length)
        print(attention_softmaxed) #300x300
        print(encoder_outputs)
        
        weighted = dot([attention_softmaxed, encoder_outputs], axes=[2,1]) #give weights to encoder outputs(=각 토큰)
        print(attention_softmaxed)
        print(weighted)
        
        decoder_for_final = concatenate([weighted, decoder_outputs]) #weighted token(3000x256) + decoder output(3000x256)
        #or add? 둘중에 성능좋은거 고르기
        
        decoder_final = fc(1, decoder_for_final, NUM_WORDS)
        print(decoder_final)
        
        mod = Model([encoder_inputs, decoder_inputs], decoder_final) 
        return mod

In [15]:
model = Seq2Seq_Attention()

Tensor("lstm_7/transpose_1:0", shape=(None, 300, 256), dtype=float32)
Tensor("dense_10/Tanh:0", shape=(None, 300, 5000), dtype=float32) Tensor("dense_11/Tanh:0", shape=(None, 300, 5000), dtype=float32)
Tensor("dot_7/MatMul:0", shape=(None, 300, 300), dtype=float32)
Tensor("dense_12/truediv:0", shape=(None, 300, 300), dtype=float32)
Tensor("lstm_7/transpose_1:0", shape=(None, 300, 256), dtype=float32)
Tensor("dense_12/truediv:0", shape=(None, 300, 300), dtype=float32)
Tensor("dot_8/MatMul:0", shape=(None, 300, 256), dtype=float32)
Tensor("dense_14/truediv:0", shape=(None, 300, 3000), dtype=float32)
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_15 (InputLayer)           (None, 300, 100)     0                                            
_________________________________________________________________________________

In [None]:
model.att.fit()

# 모델구현 끝!

### 해야할 것
- 전처리된 데이터(embedded vector) 집어넣고 모델학습
- hyperparameter tuning

- test 데이터로 돌려보기