# seq2seq 모델

![](https://aiffelstaticprd.blob.core.windows.net/media/images/GN-4-L-7.max-800x600.jpg)

## Encoder 구현

![](https://aiffelstaticprd.blob.core.windows.net/media/images/GN-4-L-6.max-800x600.jpg)

In [105]:
import tensorflow as tf

In [106]:
class Encoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, enc_units):
        super(Encoder, self).__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim) # 나는 밥을 먹었어
        self.lstm = tf.keras.layers.LSTM(enc_units)
    
    def call(self, x):
        print("입력 shape :", x.shape) # sample input // 춤 추는 소시지 

        x = self.embedding(x)
        print("Embedding Layer를 거친 shape :", x.shape)

        output = self.lstm(x) 
        print("LSTM shape의 output shape :", output.shape)
        
        return output

In [107]:
vocab_size = 30000
emb_size = 256
lstm_size = 512
batch_size = 1
sample_seq_len = 3

print("Vocab Size : {0}".format(vocab_size))
print("Embedding Size : {0}".format(emb_size))
print("LSTM Size : {0}".format(lstm_size))
print("Batch_size : {0}".format(batch_size))
print("Sample Sequence Length : {0}".format(sample_seq_len))

Vocab Size : 30000
Embedding Size : 256
LSTM Size : 512
Batch_size : 1
Sample Sequence Length : 3


In [108]:
encoder = Encoder(vocab_size, emb_size, lstm_size) #vocab_size, embedding_dim, enc_units):
sample_input = tf.zeros((batch_size, sample_seq_len)) # 춤 추는 소시지

sample_output = encoder(sample_input)

입력 shape : (1, 3)
Embedding Layer를 거친 shape : (1, 3, 256)
LSTM shape의 output shape : (1, 512)


## Decoder 구현

![](https://aiffelstaticprd.blob.core.windows.net/media/images/GN-4-L-7.max-800x600.jpg)

In [109]:
class Decoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, dec_units):
        super(Decoder, self).__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.lstm = tf.keras.layers.LSTM(dec_units, return_sequences=True)
        self.fc = tf.keras.layers.Dense(vocab_size)
        self.softmax = tf.keras.layers.Softmax(axis = -1)

    def call(self, x, context_v):
        print("입력 shape : ", x.shape)

        x = self.embedding(x)
        print("Embedding layer를 거친 shape", x.shape)
        
        print("차원 추가 전 shape", context_v.shape)
        print("차원 추가 전 x.shape[1]", x.shape[1])
        context_v = tf.repeat(tf.expand_dims(context_v, axis=1), repeats=x.shape[1], axis=1)
        print("차원 추가 후 shape", context_v.shape)
        x = tf.concat([x, context_v], axis = -1)
        print('Context Vector가 더해진 shape : ', x.shape)

        x = self.lstm(x)
        print("LSTM layer의 output layer : ", x.shape)

        output = self.fc(x)
        print("Decoder의 최종 ouput layer : ", output.shape)

        output = self.softmax(output)

        return output

In [110]:
vocab_size = 30000
emb_size = 256
lstm_size = 512
batch_size = 1
sample_seq_len = 3

print("Vocab Size : {0}".format(vocab_size))
print("Embedding Size : {0}".format(emb_size))
print("LSTM Size : {0}".format(lstm_size))
print("Batch_size : {0}".format(batch_size))
print("Sample Sequence Length : {0}".format(sample_seq_len))

Vocab Size : 30000
Embedding Size : 256
LSTM Size : 512
Batch_size : 1
Sample Sequence Length : 3


In [111]:
decoder = Decoder(vocab_size, emb_size, lstm_size)
sample_input = tf.zeros((batch_size, sample_seq_len)) # 춤 추는 소시지
sample_output = decoder(sample_input, sample_output)

입력 shape :  (1, 3)
Embedding layer를 거친 shape (1, 3, 256)
차원 추가 전 shape (1, 512)
차원 추가 전 x.shape[1] 3
차원 추가 후 shape (1, 3, 512)
Context Vector가 더해진 shape :  (1, 3, 768)
LSTM layer의 output layer :  (1, 3, 512)
Decoder의 최종 ouput layer :  (1, 3, 30000)


In [95]:
class tmp():
  def __init__(self):
    print('hi')

In [96]:
qwdwqd = tmp()

hi


In [112]:
tf.repeat([[1, 2],[3, 4]], repeats = [2,3], axis = 0)

<tf.Tensor: shape=(5, 2), dtype=int32, numpy=
array([[1, 2],
       [1, 2],
       [3, 4],
       [3, 4],
       [3, 4]], dtype=int32)>

In [113]:
tf.repeat([[1, 2],[3, 4]], repeats = [2,3], axis = 1)

<tf.Tensor: shape=(2, 5), dtype=int32, numpy=
array([[1, 1, 2, 2, 2],
       [3, 3, 4, 4, 4]], dtype=int32)>