In [1]:
import tensorflow as tf
from tensorflow import keras

# Downloading and loading dataset

In [10]:
shakespeare_url = "https://homl.info/shakespeare"
filepath = keras.utils.get_file("shakespear.txt", shakespeare_url)

In [11]:
with open(filepath) as f:
  shakespear_text = f.read()

In [12]:
shakespear_text

''

# Tokenizing dataset

In [4]:
tokenizer = tf.keras.preprocessing.text.Tokenizer(char_level= True)
tokenizer.fit_on_texts([shakespear_text])

In [7]:
# tokenizer.word_index
import json
with open('word_dict.json', 'w') as file:
    json.dump(tokenizer.word_index, file)

In [9]:
with open('index_word.json', 'w') as file:
    json.dump(tokenizer.index_word, file)

In [5]:
tokenizer.texts_to_sequences(["First"])

[[]]

In [7]:
max_id = len(tokenizer.word_index)
dataset_size = sum([_ for x, _ in tokenizer.word_counts.items()])
print(max_id, dataset_size)

39 1115394


In [8]:
import numpy as np

In [9]:
[encoded] = np.array(tokenizer.texts_to_sequences([shakespear_text])) - 1

In [10]:
encoded

array([19,  5,  8, ..., 20, 26, 10])

# Creating training set

In [11]:
train_size = dataset_size * 10 // 100
dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])

In [12]:
print(train_size)

111539


In [13]:
tokenizer.sequences_to_texts([[20, 6, 9, 8, 3]])

['f i r s t']

# Windowing training dataset (truncated backpropogation through time)

In [14]:
n_steps = 100
window_length = n_steps + 1
dataset = dataset.window(window_length, shift= 1, drop_remainder=True)

## Converting nested dataset to dataset of tensors and batching them using flat_map()

In [15]:
dataset = dataset.flat_map(lambda window : window.batch(window_length))

In [16]:
batch_size = 32
dataset = dataset.shuffle(10000).batch(batch_size)
dataset = dataset.map(lambda windows : (windows[:, :-1], windows[ : , 1:]))

In [17]:
dataset = dataset.map(lambda X_batch, Y_batch : (tf.one_hot(X_batch, depth=max_id), Y_batch))

In [18]:
dataset = dataset.prefetch(1)

# Creating Model

In [19]:
model = keras.models.Sequential([
                          keras.layers.GRU(128, return_sequences=True, input_shape= [None, max_id], dropout=0.2, recurrent_dropout=0.2),
                          keras.layers.GRU(128, return_sequences=True, dropout=0.2, recurrent_dropout=0.2),
                          keras.layers.TimeDistributed(keras.layers.Dense(max_id, activation='softmax'))
                          ])

In [20]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam")
history = model.fit(dataset, epochs= 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## Using model

In [21]:
def preprocess(text):
    X = np.array(tokenizer.texts_to_sequences(text)) - 1
    return tf.one_hot(X, max_id)

In [37]:
X_new = preprocess(["How are yo"])

In [38]:
Y_pred_array = model.predict(X_new)
print(Y_pred_array.shape)

(1, 10, 39)


#### In the one-hot encoding scheme, it's actually the indices between 0-39 (+1 for predictions) represents characters

In [73]:
def predict_char(text):
    X_new = preprocess([text])
    y_pred_array = model.predict(X_new)
    max_ind = np.argmax(y_pred_array, axis=2)
    return tokenizer.sequences_to_texts(max_ind + 1)[0][-1]

In [74]:
predict_char("How are yo")

'u'

In [75]:
predict_char("Somethin")

'g'

In [77]:
tests = ["Dow", "Ol", "Trainin", "Difficul", "Har", "Dignit"]
for text in tests:
    print(predict_char(text))

n
d
g
a
d
y


### Generating texts from generated characters

In [79]:
next_window = 10
initial_text = "somethin"
for i in range(next_window):
    next_char = predict_char(initial_text)
    initial_text += next_char
print(initial_text)

something, whose p


In [80]:
def predict_further(input_text, window= 10):
    initial_text = input_text
    for i in range(window):
        next_char = predict_char(initial_text)
        initial_text += next_char
    return initial_text
predict_further("somethin")

'something, whose p'

In [81]:
predict_further("the ultim")

'the ultime to the p'

In [84]:
predict_further("somethin", 40)

'something, whose parts and present\nto the people'

In [85]:
predict_further("futu")

'futus:\ni have '

### Randomly picking next_char with tf.random.categorical()

In [88]:
def next_char(text, temperature=1):
    X_new = preprocess([text])
    y_prob = model.predict(X_new)[0, -1:, : ]
    log_norm = tf.math.log(y_prob)/temperature
    char_id = tf.random.categorical(log_norm, num_samples= 1) +1
    return tokenizer.sequences_to_texts(char_id.numpy())[0]

In [94]:
next_char("giv")

'e'

In [95]:
def complete_text(text, n_chars=50, temperature=1):
    for i in range(n_chars):
        text += next_char(text, temperature)
    return text

In [96]:
print(complete_text("t", temperature=0.2))

the man i am good master?

coriolanus:
i would he h


In [97]:
print(complete_text("t"))

throke thee
since that may do that a friendn hath f


In [99]:
print(complete_text("w", n_chars=51,temperature=0.1))

with thy hand: i have deserved thee here,
thou hast 


In [100]:
print(complete_text("w", temperature=2))

welt
beenefy! hilr! why hasham.

secood;
cike? no,?


In [102]:
print(complete_text("wife"))

wife is past of them. now, that is the
truer i am marc


In [103]:
print(complete_text("husband"))

husband mine with man:
worthy name. sir.

sicinius:
there


# Stateful RNN

## Simpler way to batch by creating batches containing single window each

In [104]:
dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])
dataset = dataset.window(window_length, shift= n_steps, drop_remainder=True)
dataset = dataset.flat_map(lambda window : window.batch(window_length))
dataset = dataset.batch(1)
dataset = dataset.map(lambda windows : (windows[:, :-1], windows[ : , 1:]))
dataset = dataset.map(lambda X_batch, Y_batch : (tf.one_hot(X_batch, depth=max_id), Y_batch))
dataset.prefetch(1)

<PrefetchDataset element_spec=(TensorSpec(shape=(None, None, 39), dtype=tf.float32, name=None), TensorSpec(shape=(None, None), dtype=tf.int32, name=None))>

## Batching for stateful RNN proper way

In [109]:
batch_size = 32
encoded_parts = np.array_split(encoded[:train_size], batch_size)
print(len(encoded_parts))

32


In [110]:
datasets = []
for encoded_part in encoded_parts:
    dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])
    dataset = dataset.window(window_length, shift= n_steps, drop_remainder=True)
    dataset = dataset.flat_map(lambda window : window.batch(window_length))
    datasets.append(dataset)

dataset = tf.data.Dataset.zip(tuple(datasets)).map(lambda *window : tf.stack(window))
dataset = dataset.map(lambda windows : (windows[:, :-1], windows[ : , 1:]))
dataset = dataset.map(lambda X_batch, Y_batch : (tf.one_hot(X_batch, depth=max_id), Y_batch))
dataset.prefetch(1)

<PrefetchDataset element_spec=(TensorSpec(shape=(32, None, 39), dtype=tf.float32, name=None), TensorSpec(shape=(32, None), dtype=tf.int32, name=None))>

# Creating model


In [111]:
model = keras.models.Sequential([
    keras.layers.GRU(128, return_sequences=True, stateful=True, batch_input_shape=[batch_size, None, max_id], 
    dropout= 0.2, recurrent_dropout= 0.2),
    keras.layers.GRU(128, return_sequences= True, stateful= True, dropout=0.2),
    keras.layers.TimeDistributed(keras.layers.Dense(max_id, activation= 'softmax'))
])

### Callback to reset states

In [112]:
class ResetStatesCallback(keras.callbacks.Callback):
    def on_epochs_begin(self, epoch, logs):
        self.model_reset_states()

In [None]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam")
history = model.fit(dataset, epochs=50, callbacks=[ResetStatesCallback()])