follow: [here](https://colab.research.google.com/github/trekhleb/machine-learning-experiments/blob/master/experiments/text_generation_shakespeare_rnn/text_generation_shakespeare_rnn.ipynb#scrollTo=VPE98xa8PA-u)

ref TF: [here](https://www.tensorflow.org/text/tutorials/text_generation)

In [31]:
import tensorflow as tf

import numpy as np
import os
import time

In [32]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [33]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
vocab = sorted(set(text))

In [34]:
ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab), mask_token=None)

In [35]:
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)

<tf.RaggedTensor [[40, 41, 42, 43, 44, 45, 46], [63, 64, 65]]>

In [37]:
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [38]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([19, 48, 57, ..., 46,  9,  1])>

In [40]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)
seq_length = 100
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

In [41]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [42]:
dataset = sequences.map(split_input_target)

In [43]:
BATCH_SIZE = 64
BUFFER_SIZE = 10000

dataset = (dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE))
dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

In [44]:
vocab_size = len(ids_from_chars.get_vocabulary())
embedding_dim = 256
rnn_units = 1024

In [45]:
class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x

In [46]:
model = MyModel(vocab_size=vocab_size, embedding_dim=embedding_dim, rnn_units=rnn_units)

In [47]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss)

In [49]:
EPOCHS = 20
history = model.fit(dataset, epochs=EPOCHS)

Epoch 1/20


Epoch 2/20


Epoch 3/20


Epoch 4/20


Epoch 5/20


Epoch 6/20


Epoch 7/20


Epoch 8/20


Epoch 9/20


Epoch 10/20


Epoch 11/20


Epoch 12/20


Epoch 13/20


Epoch 14/20


Epoch 15/20


Epoch 16/20


Epoch 17/20


Epoch 18/20


Epoch 19/20


Epoch 20/20



In [50]:
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(values=[-float('inf')]*len(skip_ids), indices=skip_ids, dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()
    predicted_logits, states = self.model(inputs=input_ids, states=states, return_state=True)
    
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    predicted_logits = predicted_logits + self.prediction_mask
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)
    predicted_chars = self.chars_from_ids(predicted_ids)
    return predicted_chars, states

In [51]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [55]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

ROMEO:

I give many gently biof should chance to fight,

And thieves mock'd them will rise and by thy foe.

The manner is the noble demiged

Than caruless fair within me? I'll pray thee of ourselvos,

You must want on; for 'What look on our own dead?



JULIET:

You are one, and marry shows still

Having my dream bolong to peck.



ROMEO:

I talk of day unto thy birth. Now is the

man, I have often heard of you

But so it is nothing: but reason made gentle person?



FRIAR LAURENCE:

Plantagenet, or vanisbed,

Death, with this seven shadow of your great audy,

It seldom be her love.



LEONTES:

How art thou affects me!

He passade the keysion of his patrimony

Destruct with painty hence to have my knighthood.



ISABELLA:

What sayest thou?



First Gentleman:

I shall be shrived! None that will rise, it trieves, it good

to she. no more am in us: good sir,

While I recruent in death make boose of

colours, sir, and Romeo here she knew steel'd.

Here were not well.



HORTENSIO:

Yea,