In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals

try:
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf

import numpy as np
import os
import time

In [0]:
vocab_file = open('/content/drive/My Drive/lyric/vocabs_word_token.txt', 'r', encoding='utf-8')
lines = vocab_file.readlines()
vocab = []
for line in lines:
    if line != '\n':
        line = line.replace('\n', '')
    vocab.append(line)
print('vocab_len: ' + str(len(vocab)))

In [0]:
word2idx = {u:i for i, u in enumerate(vocab)}
idx2word = np.array(vocab)

In [0]:
vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 512

In [0]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),
    tf.keras.layers.GRU(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
  ])
  return model

In [0]:
checkpoint_dir = '/content/drive/My Drive/lyric/model_output_word'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [0]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

In [0]:
def generate_text(model, start_string):
  num_generate = 1000


  input_eval = []
  for words in start_string.split(' '):
    if words in word2idx:
      input_eval.append(word2idx[words])  

  if len(input_eval) < 1:
      input_eval = [0]
  ##input_eval = [word2idx[s] for s in start_string.split(' ')]
  input_eval = tf.expand_dims(input_eval, 0)

  text_generated = []

  temperature = 1

  model.reset_states()
  for i in range(num_generate):
      predictions = model(input_eval)
      predictions = tf.squeeze(predictions, 0)

      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      input_eval = tf.expand_dims([predicted_id], 0)

      text_generated.append(idx2word[predicted_id])

  return (start_string + ' '.join(text_generated))

In [0]:
print(generate_text(model, start_string=u"하늘 아래 서서 고개를 치켜 들고"))