In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

In [2]:
shakespeare_url = 'https://homl.info/shakespeare'
filepath = keras.utils.get_file('shakespeare.txt', shakespeare_url)

with open(filepath) as f:
    shakespeare_text = f.read()

In [3]:
tokenizer = keras.preprocessing.text.Tokenizer(char_level=True)
tokenizer.fit_on_texts(shakespeare_text)

max_id = len(tokenizer.word_index)
dataset_size = tokenizer.document_count

In [4]:
[encoded] = np.array(tokenizer.texts_to_sequences([shakespeare_text])) - 1
train_size = int(dataset_size * 0.9)
dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])

In [5]:
n_steps = 100
window_length = n_steps + 1
dataset = dataset.window(window_length, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(window_length))

In [6]:
batch_size = 32
dataset = dataset.shuffle(10000).batch(batch_size)
dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:, 1:]))
dataset = dataset.map(
    lambda x_batch, y_batch: (tf.one_hot(x_batch, depth=max_id), y_batch))
dataset = dataset.prefetch(1)

In [7]:
model = keras.models.Sequential([
    keras.layers.GRU(256, return_sequences=True, 
                     input_shape=[None, max_id], 
                     dropout=0.3), 
    keras.layers.GRU(256, return_sequences=True, 
                     dropout=0.3), 
    keras.layers.GRU(256, return_sequences=True, 
                     dropout=0.3), 
    keras.layers.TimeDistributed(
        keras.layers.Dense(max_id, activation='softmax'))
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

history = model.fit(dataset, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
def preprocess(texts):
    x = np.array(tokenizer.texts_to_sequences(texts)) - 1
    return tf.one_hot(x, max_id)

def next_char(text, temperature=1):
    x_new = preprocess([text])
    y_proba = model(x_new)[0, -1:, :]
    rescaled_logits = tf.math.log(y_proba) / temperature
    char_id = tf.random.categorical(rescaled_logits, num_samples=1) + 1
    return tokenizer.sequences_to_texts(char_id.numpy())[0]

def complete_text(text, n_chars=50, temperature=1):
    for _ in range(n_chars):
        text += next_char(text, temperature)
    return text

In [9]:
complete_text('t', temperature=0.2)

'ther will a coodman with a bitter word?\n\nkatharina:'

In [10]:
complete_text('t', temperature=1)

'takcould you fancy mine aar sits shis will\ncan do t'

In [11]:
complete_text('t', temperature=2)

'th, but till nob give\n isvunshesetrab, pom wed bean'

In [12]:
model.save('shakespeare_char_rnn.h5')