In [2]:
import tensorflow as tf

In [20]:
from keras import Model
from keras.layers import Embedding, LSTM, Dropout, Input, Dense
from keras.optimizers import Adam

In [101]:
max_length = 300 #for padding
NUM_WORDS = 3000 #본문 기사 내 가장 많이 사용된 3000단어?(or 전체 단어수로 할지 미정)
VECTOR_SIZE_NOT_DETERMINED_YET = 100
MAX_SUMMARIZATION_LENGTH_NOT_DETERMINED_YET = 500

In [129]:
class Seq2Seq():
    def __init__(self):
        self.max_length = 300
        self.VECTOR_SIZE_NOT_DETERMINED_YET = 100
        
#         self.sos =  sos #give some number
#         self.eos =  eos #give some number
        
        self.optimizer = Adam()
        
        encoder_inputs = Input(shape=(self.max_length,self.VECTOR_SIZE_NOT_DETERMINED_YET))
        decoder_inputs = Input(shape=(self.max_length,self.VECTOR_SIZE_NOT_DETERMINED_YET))
        
        self.seq2seq = self.build_seq2seq()

        self.seq2seq.compile(loss='categorical_crossentropy', optimizer=self.optimizer)
        print(self.seq2seq.summary())
        
        
    def build_seq2seq(self):
        def lstm(inputs, hs, seq=False, initial=None):
            output,h,c = LSTM(hs, return_state=True, return_sequences=seq)(inputs, initial_state=initial) #return only last h, c
            return output, h, c
        
        def fc(n_h_layers, inputs, hn):
            for _ in range(n_h_layers):
                d = Dense(hn, activation='tanh')(inputs)
            if n_h_layers==0: 
                d = inputs
            output = Dense(hn, activation='softmax')(d)
            #모든 3000개 단어에 대한 확률값 (해당 위치에서의)
            return output
        
        #(encoder input) already embedded from koBERT(vector size 만큼)
        encoder_inputs = Input(shape=(self.max_length,self.VECTOR_SIZE_NOT_DETERMINED_YET))
        _, h, c = lstm(encoder_inputs, 256)  #Discard encoder outputs
        init_states = [h,c]
        
        decoder_inputs = Input(shape=(self.max_length,self.VECTOR_SIZE_NOT_DETERMINED_YET))
        decoder_outputs, _, _ = lstm(decoder_inputs, 256, seq=True, initial=init_states) #Discard encoder outputs
        
        outputs_softmaxed = fc(1, decoder_outputs, NUM_WORDS) #ㅁ//NUM_WORDS가 맞는지?

        mod = Model([encoder_inputs, decoder_inputs], outputs_softmaxed) 
        return mod

In [130]:
model = Seq2Seq()

Model: "model_137"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_214 (InputLayer)          (None, 300, 100)     0                                            
__________________________________________________________________________________________________
input_215 (InputLayer)          (None, 300, 100)     0                                            
__________________________________________________________________________________________________
lstm_104 (LSTM)                 [(None, 256), (None, 365568      input_214[0][0]                  
__________________________________________________________________________________________________
lstm_105 (LSTM)                 [(None, 300, 256), ( 365568      input_215[0][0]                  
                                                                 lstm_104[0][1]           

In [None]:
model.seq2seq.fit()

In [102]:
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Embedding, Bidirectional, RepeatVector, Dropout
encoder_inputs = Input(shape=(300,100))
print(encoder_inputs)
encoder = LSTM(256, return_state=True) #only last h and c
encoder_outputs, state_h, state_c = encoder(encoder_inputs) #h8(return sequence=false니깐), h8, c8
encoder_states = [state_h, state_c]

Tensor("input_168:0", shape=(None, 300, 100), dtype=float32)


In [103]:
#프랑스어(그앞전단어)
decoder_inputs = Input(shape=(300,100))
print(decoder_inputs)
decoder_lstm = LSTM(256, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs,initial_state=encoder_states) ###디코더의h,c 초기값을 encoder껄로함 - 이걸로 encoder까지불러옴. enc,dec연결
#h1부터h17까지(그리고h2는 첫번째단어만보고맞춘거,h3는 처음두개보고맞춘거...)
decoder_outputs = Dense(2000,activation='tanh')(decoder_outputs)
decoder_dense = Dense(3000, activation='softmax')(decoder_outputs)#다중입력,다중출력
#+1한거는 unk이 있어서.

model2 = Model([encoder_inputs, decoder_inputs], decoder_dense) 
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])

Tensor("input_169:0", shape=(None, 300, 100), dtype=float32)


In [None]:
class Encoder(Model):
    def __init__(self):
        super(Encoder, self).__init__() 
        self.lstm = LSTM(512, return_state=True) 
        
    def __call__(self, x, training=False, mask=None):
        
        _, h, c = self.lstm(x) #Discard encoder outputs.
        return h, c

In [None]:
class Decoder(Model):
    def __init__(self):
        super(Decoder, self).__init__()
        self.lstm = LSTM(512, return_sequences=True, return_state=True) #h1,...hn / c1,...cn까지 모두 출력받음
        
        self.dense1 = Dense(NUM_WORDS, activation='tanh')
        self.dense2 = Dense(NUM_WORDS, activation='softmax') #to make output
        #모든 3000개 단어에 대한 확률값 (해당 위치에서의)
        
    def __call__(self, inputs, training=False, mask=None):
        #inputs given as (decoderinput, h, c)
        #h, c for inital state of decoder lstm
        #x(input) already embedded from koBERT
        x, h, c = inputs
        
        outputs, _, _ = self.lstm(x, initial_state=[h,c]) #Discard h,c
        
        outputs_1 = self.dense1(outputs)
        outputs_2 = self.dense2(outputs_1) 
        return outputs_2

In [12]:
class seq2seq(Model):
    def __init__(self, sos, eos):
        super(seq2seq, self).__init__()
        self.enc = Encoder()
        self.dec = Decoder()
        self.sos = sos
        self.eos = eos
        
    def __call__(self, inputs, training=False):
        if training:
            x, inp = tuple(inputs) #x for encoder inputs. inp for decoder inputs
            
            h,c = self.enc(x)
            
            y = self.dec((inp,h,c)) 
            
            return y
        
        else: #no inputs for decoder
            '''shifted output for inference'''
            x = inputs
            h,c = self.enc(x)
            
            #decoder단에 가장 먼저 sos삽입
            inp = tf.convert_to_tensor(self.sos)
            inp = tf.reshape(inp, (1,1)) #input길이가 하나밖에 없음
            
            seq = tf.TensorArray(tf.int32, 64) #정답array - 최대 64길이까지 출력으로 받는다(summarization을)
            
            #shifted output
            for _ in tf.range(64): #tf.range better for keras Models
                y = self.dec([inp,h,c]) #input길이 하나밖에 없기때문에 한개값 출력받을 것임
                
                #shifted output
                inp = tf.cast(tf.argmax(y, axis=-1), dtype=tf.int32) #for next input
                inp = tf.reshape(inp, (1,1))
                
                seq = seq.write(_,inp)
                
                if inp == self.eos:
                    break
            
            return tf.reshape(seq.stack(), (1,64)) # stack은 그동안 TensorArray로 받은 값을 쌓아주는 작업을 한다.   
            
            