In [30]:
import tensorflow as tf
import numpy as np
import os
import time

path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')



In [31]:
text = open(path_to_file, 'rb').read().decode('utf-8')
print('Length of text: {} characters'.format(len(text)))
print(text[:100])


Length of text: 1115394 characters
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You


In [32]:
vocab = sorted(set(text))


char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

# Convert all character to int base on char2idx dict
text_as_int = np.array([char2idx[c] for c in text])


In [33]:

seq_length = 100
examples_per_epoch = len(text) // (seq_length + 1)

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(5):
    print(idx2char[i.numpy()])


F
i
r
s
t


In [34]:
sequences = char_dataset.batch(seq_length + 1, drop_remainder=True)

for item in sequences.take(5):
    print(repr(''.join(idx2char[item.numpy()])))



'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


In [35]:
def split_input_target(chunk):
    input_text = chunk[:-1] # take all except the last character
    target_text = chunk[1:] # take all except the first character

    return input_text, target_text

dataset = sequences.map(split_input_target)


In [36]:

for input_exp, target_exp in dataset.take(1):
    print('Input data', repr(''.join(idx2char[input_exp.numpy()])))
    print("Target data", repr(''.join(idx2char[target_exp.numpy()])))


Input data 'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target data 'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [37]:

BATCH_SIZE = 64
# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True).prefetch(1)

dataset


<PrefetchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [38]:

# Model
# Embedding: input layer map the numbers of each character to a vector
# with embedding_dim
# GRU: special type of RNN with size units=rnn_units
# Dense: vocab_size outputs

vocab_size = len(vocab)

embedding_dim = 256

rnn_units = 1024


In [39]:

def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=(batch_size, None)),
        tf.keras.layers.GRU(units=rnn_units,
                            return_sequences=True,
                            stateful=True,
                            recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model


model = build_model(vocab_size, embedding_dim, rnn_units, BATCH_SIZE)



In [40]:

"""
    For each character the model looks up the
    embedding, runs the GRU one timestep with
    the embedding as input, and applies the dense
    layer to generate logits predicting the log-likelihood of the next character:
"""
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")



(64, 100, 65) # (batch_size, sequence_length, vocab_size)


In [41]:

sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()
sampled_indices


array([56, 39, 60,  9, 50,  3, 53, 15, 62, 11, 35, 45,  1, 34,  2, 49, 16,
       16,  3, 14, 23, 10, 37, 30, 49, 24,  8, 54, 29, 31, 30,  5, 18, 23,
       23, 21, 48, 13, 54, 33, 55, 32, 11, 15, 61, 37, 34, 37, 48,  8, 44,
       22, 47, 26,  4, 28, 62, 33, 22, 23, 63, 57, 23, 19, 53, 25,  4, 31,
       63, 49, 37, 40, 39, 60, 38, 30, 46,  1, 11, 10, 25, 34,  9, 39, 10,
       33, 33, 63, 57, 25, 51, 59, 56, 23, 45, 64, 62, 45, 30, 61])

In [42]:

def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

example_batch_loss = loss(target_example_batch, example_batch_predictions)
example_batch_loss.numpy().mean()



4.175749

In [43]:

model.compile(optimizer='adam', loss=loss)

checkpoint_dir = './training_checkpoint'

checkpoint_predix = os.path.join(checkpoint_dir, 'ckpt_{epoch}')

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_predix,
    save_weights_only=True,
    save_freq=5,
)



In [44]:
EPOCH = 50

history = model.fit(dataset, epochs=EPOCH, callbacks=[checkpoint_callback],)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
 23/172 [===>..........................] - ETA: 26s - loss: 1.6003

KeyboardInterrupt: ignored

In [45]:
# Delete checkpoint dir
# !rm -r "/content/training_checkpoint"

In [None]:
# Zip to download file
# !zip -r /content/file.zip /content/training_checkpoint



In [None]:
tf.train.latest_checkpoint(checkpoint_dir)

In [None]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))

In [None]:
model.summary()

In [None]:

def generate_text(model, start_string):
    """
    Chose a start string, init RNN state and set the number
    of characters to generate
    Get the prediction distribution of next character using the start string and RNN state
    Use categorical distribution to calculate the index of predicted character
    and use this predicted character as our next input
    The RNN state returned by the model is fed back into the model so that it now has more context,
    After predicting the next character, the modified RNN states are again fed back into the model
    :param model:
    :param start_string:
    :return:
    """
    num_generate = 1000

    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0) # convert to 2d tensor

    text_generated = []

    # Love results in more predictable text
    # High otherwise
    temperature = 1.0

    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)

        # remove batch dimension
        predictions = tf.squeeze(predictions, axis=0)

        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()

        input_eval = tf.expand_dims([predicted_id], axis=0)

        text_generated.append(idx2char[predicted_id])

    return start_string + ''.join(text_generated)

print(generate_text(model, start_string="ROMEO: "))



