In [0]:
# https://neurowhai.tistory.com/292

In [0]:
from tensorflow.python.client import device_lib
import tensorflow as tf
device_lib.list_local_devices()


[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 3876883829019089860, name: "/device:XLA_CPU:0"
 device_type: "XLA_CPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 5933003659533636849
 physical_device_desc: "device: XLA_CPU device", name: "/device:XLA_GPU:0"
 device_type: "XLA_GPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 12620358854082576940
 physical_device_desc: "device: XLA_GPU device", name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 15956161332
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 11978132078730237417
 physical_device_desc: "device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0"]

In [1]:
import tensorflow as tf
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True

In [2]:
import tensorflow as tf
tf.test.gpu_device_name()

'/device:GPU:0'

In [3]:
from keras import layers, models
from __future__ import print_function
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Bidirectional
import numpy as np
from keras import datasets
from keras import backend as K
from keras.utils import plot_model
import matplotlib
from matplotlib import ticker
import matplotlib.pyplot as plt


batch_size = 32  # Batch size for training.
epochs = 100  # Number of epochs to train for.
latent_dim = 256  # Latent dimensionality of the encoding space.
num_samples = 10000  # Number of samples to train on.
# Path to the data txt file on disk.
data_path = '/content/dataset.txt'

# Vectorize the data.
input_texts = []
target_texts = []
input_characters = set()
target_characters = set()

with open(data_path, 'r', encoding='utf-8') as f:
    lines = f.read().split('\n')

for line in lines[: min(num_samples, len(lines) - 1)]:
    input_text, target_text = line.split('\t')
    # We use "tab" as the "start sequence" character
    # for the targets, and "\n" as "end sequence" character.
    target_text = '\t' + target_text + '\n'
    input_texts.append(input_text)
    target_texts.append(target_text)
    for char in input_text:
        if char not in input_characters:
            input_characters.add(char)
    for char in target_text:
        if char not in target_characters:
            target_characters.add(char)
# 전처리
input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))
num_encoder_tokens = len(input_characters)
num_decoder_tokens = len(target_characters)
max_encoder_seq_length = max([len(txt) for txt in input_texts])
max_decoder_seq_length = max([len(txt) for txt in target_texts])

print('Number of samples:', len(input_texts))
print('Number of unique input tokens:', num_encoder_tokens)
print('Number of unique output tokens:', num_decoder_tokens)
print('Max sequence length for inputs:', max_encoder_seq_length)
print('Max sequence length for outputs:', max_decoder_seq_length)


# 문자 -> 숫자 변환용 사전
input_token_index = dict(
    [(char, i) for i, char in enumerate(input_characters)])
target_token_index = dict(
    [(char, i) for i, char in enumerate(target_characters)])

# 학습에 사용할 데이터를 담을 3차원 배열
encoder_input_data = np.zeros(
    (len(input_texts), max_encoder_seq_length, num_encoder_tokens),
    dtype='float32')
decoder_input_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
    dtype='float32')
decoder_target_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
    dtype='float32')

# 문장을 문자 단위로 원 핫 인코딩하면서 학습용 데이터를 만듬
for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
    for t, char in enumerate(input_text):
        encoder_input_data[i, t, input_token_index[char]] = 1.
    for t, char in enumerate(target_text):
        decoder_input_data[i, t, target_token_index[char]] = 1.
        if t > 0:
            decoder_target_data[i, t - 1, target_token_index[char]] = 1.

# 숫자 -> 문자 변환용 사전
reverse_input_char_index = dict(
    (i, char) for char, i in input_token_index.items())
reverse_target_char_index = dict(
    (i, char) for char, i in target_token_index.items())

def RepeatVectorLayer(rep, axis):
  return layers.Lambda(lambda x: K.repeat_elements(K.expand_dims(x, axis), rep, axis),
                      lambda x: tuple((x[0],) + x[1:axis] + (rep,) + x[axis:]))


# 인코더 생성
encoder_inputs = layers.Input(shape=(max_encoder_seq_length, num_encoder_tokens))
encoder = layers.GRU(latent_dim, return_sequences=True, return_state=True)
encoder_outputs, state_h = encoder(encoder_inputs)

# 디코더 생성
decoder_inputs = layers.Input(shape=(max_decoder_seq_length, num_decoder_tokens))
decoder = layers.GRU(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _ = decoder(decoder_inputs, initial_state=state_h)

# attention 생성

repeat_d_layer = RepeatVectorLayer(max_encoder_seq_length, 2)
repeat_d = repeat_d_layer(decoder_outputs)

repeat_e_layer = RepeatVectorLayer(max_decoder_seq_length, 1)
repeat_e = repeat_e_layer(encoder_outputs)

concat_for_score_layer = layers.Concatenate(axis=-1)
concat_for_score = concat_for_score_layer([repeat_d, repeat_e])

dense1_t_score_layer = layers.Dense(latent_dim // 2, activation='tanh')
dense1_score_layer = layers.TimeDistributed(dense1_t_score_layer)
dense1_score = dense1_score_layer(concat_for_score)
dense2_t_score_layer = layers.Dense(1)
dense2_score_layer = layers.TimeDistributed(dense2_t_score_layer)
dense2_score = dense2_score_layer(dense1_score)
dense2_score = layers.Reshape((max_decoder_seq_length, max_encoder_seq_length))(dense2_score)

softmax_score_layer = layers.Softmax(axis=-1)
softmax_score = softmax_score_layer(dense2_score)

repeat_score_layer = RepeatVectorLayer(latent_dim, 2)
repeat_score = repeat_score_layer(softmax_score)

permute_e = layers.Permute((2, 1))(encoder_outputs)
repeat_e_layer = RepeatVectorLayer(max_decoder_seq_length, 1)
repeat_e = repeat_e_layer(permute_e)

attended_mat_layer = layers.Multiply()
attended_mat = attended_mat_layer([repeat_score, repeat_e])

context_layer = layers.Lambda(lambda x: K.sum(x, axis=-1),
                             lambda x: tuple(x[:-1]))
context = context_layer(attended_mat)

concat_context_layer = layers.Concatenate(axis=-1)
concat_context = concat_context_layer([context, decoder_outputs])

attention_dense_output_layer = layers.Dense(latent_dim, activation='tanh')
attention_output_layer = layers.TimeDistributed(attention_dense_output_layer)
attention_output = attention_output_layer(concat_context)

decoder_dense = layers.Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(attention_output)


# 모델 생성
model = models.Model([encoder_inputs, decoder_inputs], decoder_outputs)

che = 'keras_model1.model'
point = ModelCheckpoint(filepath=che , monitor='val_loss', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=100)
# Run training
model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_split=0.2,
                    verbose=1,callbacks=[point,early_stopping])
# Save model
# model.save('s2s.h5')
# 


# Next: inference mode (sampling).
# Here's the drill:
# 1) encode input and retrieve initial decoder state
# 2) run one step of decoder with this initial state
# and a "start of sequence" token as target.
# Output will be the next target token
# 3) Repeat with the current target token and current states

# Define sampling models
encoder_model = models.Model(encoder_inputs, [encoder_outputs, state_h])
encoder_outputs_input = layers.Input(shape=(max_encoder_seq_length, latent_dim))

decoder_inputs = layers.Input(shape=(1, num_decoder_tokens))
decoder_state_input_h = layers.Input(shape=(latent_dim,))
decoder_outputs, decoder_h = decoder(decoder_inputs, initial_state=decoder_state_input_h)

repeat_d_layer = RepeatVectorLayer(max_encoder_seq_length, 2)
repeat_d = repeat_d_layer(decoder_outputs)

repeat_e_layer = RepeatVectorLayer(1, axis=1)
repeat_e = repeat_e_layer(encoder_outputs_input)

concat_for_score_layer = layers.Concatenate(axis=-1)
concat_for_score = concat_for_score_layer([repeat_d, repeat_e])

dense1_score_layer = layers.TimeDistributed(dense1_t_score_layer)
dense1_score = dense1_score_layer(concat_for_score)

dense2_score_layer = layers.TimeDistributed(dense2_t_score_layer)
dense2_score = dense2_score_layer(dense1_score)
dense2_score = layers.Reshape((1, max_encoder_seq_length))(dense2_score)

softmax_score_layer = layers.Softmax(axis=-1)
softmax_score = softmax_score_layer(dense2_score)

repeat_score_layer = RepeatVectorLayer(latent_dim, 2)
repeat_score = repeat_score_layer(softmax_score)

permute_e = layers.Permute((2, 1))(encoder_outputs_input)
repeat_e_layer = RepeatVectorLayer(1, axis=1)
repeat_e = repeat_e_layer(permute_e)

attended_mat_layer = layers.Multiply()
attended_mat = attended_mat_layer([repeat_score, repeat_e])

context_layer = layers.Lambda(lambda x: K.sum(x, axis=-1),
                             lambda x: tuple(x[:-1]))
context = context_layer(attended_mat)

concat_context_layer = layers.Concatenate(axis=-1)
concat_context = concat_context_layer([context, decoder_outputs])

attention_output_layer = layers.TimeDistributed(attention_dense_output_layer)
attention_output = attention_output_layer(concat_context)

decoder_att_outputs = decoder_dense(attention_output)

decoder_model = models.Model([decoder_inputs, decoder_state_input_h, encoder_outputs_input],
                            [decoder_outputs, decoder_h, decoder_att_outputs])


def decode_sequence(input_seq):
  # 입력 문장을 인코딩
  enc_outputs, states_value = encoder_model.predict(input_seq)
 
  # 디코더의 입력으로 쓸 단일 문자
  target_seq = np.zeros((1, 1, num_decoder_tokens))
  # 첫 입력은 시작 문자인 '\t'로 설정
  target_seq[0, 0, target_token_index['\t']] = 1.
 
  # 문장 생성
  stop_condition = False
  decoded_sentence = ''
  while not stop_condition:
    # 이전의 출력, 상태를 디코더에 넣어서 새로운 출력, 상태를 얻음
    # 이전 문자와 상태로 다음 문자와 상태를 얻는다고 보면 됨.
    dec_outputs, h, output_tokens = decoder_model.predict(
        [target_seq, states_value, enc_outputs])
 
    # 사전을 사용해서 원 핫 인코딩 출력을 실제 문자로 변환
    sampled_token_index = np.argmax(output_tokens[0, -1, :])
    sampled_char = reverse_target_char_index[sampled_token_index]
    decoded_sentence += sampled_char
 
    # 종료 문자가 나왔거나 문장 길이가 한계를 넘으면 종료
    if (sampled_char == '\n' or len(decoded_sentence) > max_decoder_seq_length):
      stop_condition = True
 
    # 디코더의 다음 입력으로 쓸 데이터 갱신
    target_seq = np.zeros((1, 1, num_decoder_tokens))
    target_seq[0, 0, sampled_token_index] = 1.
    
    states_value = h
 
  return decoded_sentence

for seq_index in range(30):
  input_seq = encoder_input_data[seq_index: seq_index + 1]
  decoded_sentence = decode_sequence(input_seq)
  print('"{}" -> "{}"'.format(input_texts[seq_index], decoded_sentence.strip()))

Using TensorFlow backend.


Number of samples: 1203
Number of unique input tokens: 787
Number of unique output tokens: 749
Max sequence length for inputs: 165
Max sequence length for outputs: 183





Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Train on 962 samples, validate on 241 samples
Epoch 1/100






Epoch 00001: val_loss improved from inf to 0.55828, saving model to keras_model1.model
Epoch 2/100

Epoch 00002: val_loss improved from 0.55828 to 0.54485, saving model to keras_model1.model
Epoch 3/100

Epoch 00003: val_loss improved from 0.54485 to 0.52167, saving model to keras_model1.model
Epoch 4/100

Epoch 00004: val_loss improved from 0.52167 to 0.49898, saving model to keras_model1.model
Epoch 5/100

Epoch 00005: val_loss improved from 0.49898 to 0.47575, saving model to keras_model1.model
Epoch 6/100

Epoch 00006: val_loss did not improve from 0.47575
Epoch 7/100

Epoch 00007: val_loss did not improve from 0.47575
Epoch 8/100

Epoch 00008: val_loss 

In [5]:
for seq_index in range(1000):
  input_seq = encoder_input_data[seq_index: seq_index + 1]
  decoded_sentence = decode_sequence(input_seq)
  print('"{}" -> "{}"'.format(input_texts[seq_index], decoded_sentence.strip()))

"하르방 " -> "할아버지"
"할망 " -> "할머니"
"아방 " -> "아버지"
"어멍 " -> "어머니"
"비바리 " -> "처녀"
"괸당 " -> "친척"
"걸바시 " -> "거지"
"넹바리 " -> "시집간색시"
"다슴아돌 " -> "의붓아들"
"말젯놈 " -> "세번째자식"
"소나이 " -> "사나이"
"성님 " -> "형님"
"작산 거 " -> "어른이된 사람"
"좀녀 " -> "해녀"
"촐람생이 " -> "경솔한사람"
"홀아방 " -> "홀아비"
"가달 " -> "다리"
"꼴랑지 " -> "꼬리"
"구뚱배기 " -> "귀쪽뺨"
"꽝 " -> "뼈"
"굴레 " -> "입"
"대망생이 " -> "머리"
"등땡이 " -> "등어리"
"또꼬망 " -> "똥구멍"
"모감지 " -> "멱살"
"베 봉탱이 " -> "배 불뚝이"
"베아지 볼라불라" -> "배 밟아버린다"
"상판이 " -> "얼굴"
"야게기 " -> "목"
"야굴탁 " -> "턱"
"임댕이 " -> "이마"
"정겡이 " -> "종아리"
"저껭이 " -> "겨드랑이"
"조금태기 " -> "간지롭게"
"좀짐팽이 " -> "종아리"
"허운데기 " -> "얼굴"
"허벅다리 " -> "넓적다리"
"놋 " -> "얼굴"
"간수메 " -> "통조림"
"개역 " -> "미숫가루"
"것 " -> "동물먹이"
"괴기 " -> "고기"
"바당괴기 " -> "바닷고기"
"돗괴기 " -> "돼지"
"쇠괴기 " -> "쇠고기"
"도괴기 " -> "돼지고기"
"곤떡 " -> "쌀로만든하얀떡"
"곤밥 " -> "흰쌀밥"
"놈삐 " -> "무우"
"대사니김치 " -> "마늘장아찌"
"마농 " -> "마늘"
"마농 " -> "마늘"
"조배기 " -> "메밀"
"촐래 " -> "반찬"
"촘지금 " -> "참기름"
"짐치 " -> "김치"
"촙쏠 " -> "찹쌀"
"조팝 " -> "조밥"
"갈옷 " -> "감물들인옷"
"갈 적삼 " -> "감물들인 적삼"
"갈 중이 " -> "감물들인 고의"
"강알터진 바지 " -> "개구멍 바

In [0]:
  # 손실 그래프
  plt.plot(history.history['loss'], 'y', label='train loss')
  plt.plot(history.history['val_loss'], 'r', label='val loss')
  plt.legend(loc='upper left')
  plt.show()

  # 정확도 그래프
  plt.plot(history.history['acc'], 'y', label='train acc')
  plt.plot(history.history['val_acc'], 'r', label='val acc')
  plt.legend(loc='upper left')
  plt.show()

In [0]:
print(decoder_inputs[1])

Tensor("strided_slice_2:0", shape=(?, 734), dtype=float32)


In [0]:
encoder_model.predict(encoder_input_data[1:2])

[array([[ 0.0000000e+00, -0.0000000e+00, -0.0000000e+00,  0.0000000e+00,
          0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  1.6329135e-01,
          6.4644217e-04, -0.0000000e+00, -9.2373818e-02,  2.9097532e-28,
         -0.0000000e+00, -8.5620570e-01,  0.0000000e+00,  0.0000000e+00,
         -9.6190804e-01,  0.0000000e+00,  1.4762883e-31,  0.0000000e+00,
         -2.5769413e-02, -1.5891892e-01,  0.0000000e+00, -0.0000000e+00,
          0.0000000e+00,  0.0000000e+00, -4.8811927e-01, -5.2259541e-01,
          2.4740072e-01,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         -4.8581851e-03, -1.8517128e-01, -0.0000000e+00, -4.0916356e-01,
          0.0000000e+00, -9.0105736e-01, -0.0000000e+00,  5.5774748e-03,
         -2.9330635e-01, -9.8345417e-01,  0.0000000e+00,  0.0000000e+00,
         -0.0000000e+00,  0.0000000e+00, -0.0000000e+00,  3.7912405e-01,
         -8.5560732e-02, -0.0000000e+00,  0.0000000e+00, -0.0000000e+00,
         -2.9002559e-01, -0.0000000e+00,  0.0000000

In [0]:
model.history.history

{'acc': [0.90837691092862,
  0.9354104222362614,
  0.9352390464916024,
  0.9354153230626095,
  0.9354104250135388,
  0.9354104203135309,
  0.9354104260817223,
  0.9354985636194975,
  0.9354740763650573,
  0.9357874575054347,
  0.9370115768952182,
  0.9379859761097953,
  0.9379174260682958,
  0.9392345844631127,
  0.9392052049277931,
  0.9407378007861449,
  0.9414135181348383,
  0.9425886760475815,
  0.94316646681037,
  0.9442583754071198,
  0.9448704377724706,
  0.945830150530757,
  0.9463393848429444,
  0.9472452385023931,
  0.9479160537429181,
  0.9490373484977257,
  0.9495367941890566,
  0.9505209937745098,
  0.9513484948852157,
  0.9521123431489459,
  0.952788058574909,
  0.9535029479252395,
  0.9545067294524134,
  0.9551873378001661,
  0.9555986517219133,
  0.9564947028741188,
  0.9572438636133748,
  0.9581693034872787,
  0.9589380525773571,
  0.9600250680386806,
  0.9607056838637185,
  0.9617731156742274,
  0.9628699238155051,
  0.9635407493105926,
  0.9643927425893831,
  0.96561

In [0]:
for seq_index in range(2):
    # Take one sequence (part of the training set)
    # for trying out decoding.
    input_seq = encoder_input_data[seq_index: seq_index + 5]
    decoded_sentence = decode_sequence(input_seq)
    print('-')
    print('Input sentence:', input_texts[seq_index])
    print('Decoded sentence:', decoded_sentence)

-
Input sentence: 하르방 
Decoded sentence:  감물들인인옷

-
Input sentence: 할망 
Decoded sentence:  감물들인 옷



In [0]:
decoded_sentence[1:3]

'감물'

In [0]:

# Define sampling models
encoder_model = Model(encoder_inputs, encoder_states)

decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(
    decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)

# Reverse-lookup token index to decode sequences back to
# something readable.
reverse_input_char_index = dict(
    (i, char) for char, i in input_token_index.items())
reverse_target_char_index = dict(
    (i, char) for char, i in target_token_index.items())


def decode_sequence(input_seq):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, num_decoder_tokens))
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0, target_token_index['\t']] = 1.

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_seq] + states_value)

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = reverse_target_char_index[sampled_token_index]
        decoded_sentence += sampled_char

        # Exit condition: either hit max length
        # or find stop character.
        if (sampled_char == '\n' or
           len(decoded_sentence) > max_decoder_seq_length):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, num_decoder_tokens))
        target_seq[0, 0, sampled_token_index] = 1.

        # Update states
        states_value = [h, c]

    return decoded_sentence


for seq_index in range(100):
    # Take one sequence (part of the training set)
    # for trying out decoding.
    input_seq = encoder_input_data[seq_index: seq_index + 10]
    decoded_sentence = decode_sequence(input_seq)
    print('-')
    print('Input sentence:', input_texts[seq_index])
    print('Decoded sentence:', decoded_sentence)

-
Input sentence: 하르방 
Decoded sentence:  감물들인인옷

-
Input sentence: 할망 
Decoded sentence:  감물들인 옷

-
Input sentence: 아방 
Decoded sentence:  감물들인 옷

-
Input sentence: 어멍 
Decoded sentence:  감물들인인옷

-
Input sentence: 비바리 
Decoded sentence:  돼지

-
Input sentence: 괸당 
Decoded sentence:  바지

-
Input sentence: 걸바시 
Decoded sentence:  바지

-
Input sentence: 넹바리 
Decoded sentence:  감물들인 옷

-
Input sentence: 다슴아돌 
Decoded sentence:  감물들인 옷

-
Input sentence: 말젯놈 
Decoded sentence:  바지

-
Input sentence: 소나이 
Decoded sentence:  바지

-
Input sentence: 성님 
Decoded sentence:  여자

-
Input sentence: 작산 거 
Decoded sentence:  감물들인 옷

-
Input sentence: 좀녀 
Decoded sentence:  여자

-
Input sentence: 촐람생이 
Decoded sentence:  바지

-
Input sentence: 홀아방 
Decoded sentence:  바지

-
Input sentence: 가달 
Decoded sentence:  바지

-
Input sentence: 꼴랑지 
Decoded sentence:  감물들인 옷

-
Input sentence: 구뚱배기 
Decoded sentence:  잠잠하다

-
Input sentence: 꽝 
Decoded sentence:  돼지

-
Input sentence: 굴레 
Decoded sentence:  바지

-
Inpu