# Generating Shakespearean Text Using a Character RNN

### Creating the Training Dataset

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
if tf.config.list_physical_devices("GPU"):
  print("GPU connected successfully!")
else:
  print("No GPU detected")

GPU connected successfully!


In [3]:

shakespeare_url = "https://homl.info/shakespeare"
filepath =  tf.keras.utils.get_file("shakespeare.txt",shakespeare_url)

with open(filepath) as f:
    shakespeare_text = f.read()

In [4]:
print(shakespeare_text[:100])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You


In [5]:
"".join(sorted(set(shakespeare_text.lower())))

"\n !$&',-.3:;?abcdefghijklmnopqrstuvwxyz"

In [6]:
text_vector_layer=tf.keras.layers.TextVectorization(split="character",
                                                   standardize="lower")
text_vector_layer.adapt([shakespeare_text])
encoded = text_vector_layer([shakespeare_text])[0]

In [7]:
print(text_vector_layer.get_vocabulary())

['', '[UNK]', np.str_(' '), np.str_('e'), np.str_('t'), np.str_('o'), np.str_('a'), np.str_('i'), np.str_('h'), np.str_('s'), np.str_('r'), np.str_('n'), np.str_('\n'), np.str_('l'), np.str_('d'), np.str_('u'), np.str_('m'), np.str_('y'), np.str_('w'), np.str_(','), np.str_('c'), np.str_('f'), np.str_('g'), np.str_('b'), np.str_('p'), np.str_(':'), np.str_('k'), np.str_('v'), np.str_('.'), np.str_("'"), np.str_(';'), np.str_('?'), np.str_('!'), np.str_('-'), np.str_('j'), np.str_('q'), np.str_('x'), np.str_('z'), np.str_('3'), np.str_('&'), np.str_('$')]


In [8]:
len(text_vector_layer.get_vocabulary())

41

In [9]:
len(encoded)

1115394

In [10]:
encoded

<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([21,  7, 10, ..., 22, 28, 12])>

In [11]:
encoded-=2

In [12]:
encoded

<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([19,  5,  8, ..., 20, 26, 10])>

In [13]:
n_tokens = text_vector_layer.vocabulary_size()-2

In [14]:
n_tokens

39

In [15]:
class TimeSeriesDatasetBuilder:
    def __init__(self,series,window_size=56,target_columns=None,horizon=1,seq_to_seq=False,
                 batch_size=32,buffer_size=10_000,shuffle=True,seed=42):
        self.series=np.array(series)
        self.window_size=window_size
        self.target_columns=target_columns
        self.horizon=horizon
        self.batch_size=batch_size
        self.shuffle=shuffle
        self.seed=seed
        self.seq_to_seq=seq_to_seq
        self.buffer_size=buffer_size


    def _create_X_y(self):
        X,y=[],[]

        for i in range(len(self.series) - self.window_size - self.horizon + 1):
            window=self.series[i:i+self.window_size]

            if self.seq_to_seq:
              target_seq=[]
              for j in range(self.window_size):
                future=self.series[i + j + 1:i + j + 1 + self.horizon]
                if self.target_columns is not None:
                  future=future[:,self.target_columns]
                target_seq.append(future)
              # target_seq=np.array(target_seq)

              y.append(target_seq)
            else:
              future=self.series[i+self.window_size:i+self.window_size+self.horizon]
              if self.target_columns is not None:
                future=future[:,self.target_columns]
              if self.horizon==1:
                future=future[0]
              y.append(future)

            X.append(window)

        return np.array(X),np.array(y)

    def get_tf_dataset(self):
        X,y=self._create_X_y()
        ds=tf.data.Dataset.from_tensor_slices((X,y))
        if self.shuffle:
            ds=ds.shuffle(buffer_size=self.buffer_size,seed=self.seed)
        ds=ds.batch(self.batch_size)
        ds=ds.prefetch(tf.data.AUTOTUNE)
        return ds


In [16]:
window_size = 100
builder_train=TimeSeriesDatasetBuilder(
   series=encoded[:1_000_000],
   window_size=window_size,
   batch_size=32,
   buffer_size=100_000,
   seq_to_seq=True
)
builder_valid=TimeSeriesDatasetBuilder(
   series=encoded[1_000_000:1_060_000],
   window_size=window_size,
   batch_size=32,
   buffer_size=10_000,
   seq_to_seq=True
)
builder_test=TimeSeriesDatasetBuilder(
   series=encoded[1_060_000:],
   window_size=window_size,
   batch_size=32,
   buffer_size=10_000,
   seq_to_seq=True
)

In [None]:
train_set = builder_train.get_tf_dataset()
valid_set = builder_valid.get_tf_dataset()
test_set = builder_test.get_tf_dataset()

In [None]:

print(f"Vocabulary size: {n_tokens}")
print(f"Total encoded length: {len(encoded)}")
print(f"Training samples: ~{len(encoded[:1_000_000]) - window_size}")
print(f"Validation samples: ~{60_000 - window_size}")
print(f"Test samples: ~{len(encoded[1_060_000:]) - window_size}")

for batch_x, batch_y in train_set.take(1):
    print(f"Input batch shape: {batch_x.shape}")
    print(f"Target batch shape: {batch_y.shape}")
    print(f"Input example (first 10 chars): {batch_x[0][:10].numpy()}")
    print(f"Target example (first 10 predictions): {batch_y[0][:10].numpy().flatten()}")


## Building and Training the Char-RNN Model

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=n_tokens,output_dim=16),
    tf.keras.layers.GRU(128,return_sequences=True),
    tf.keras.layers.Dense(n_tokens,activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy",optimizer="nadam",
              metrics=["accuracy"])
model_ckp = tf.keras.callbacks.ModelCheckpoint("my_shakespeare_model.keras",
                            monitor="val_accuracy",save_best_only=True)
history = model.fit(train_set,
                    epochs=10,
                    validation_data=(valid_set),
                     callbacks=[model_ckp])

Epoch 1/10
[1m31247/31247[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m241s[0m 8ms/step - accuracy: 0.5477 - loss: 1.4978 - val_accuracy: 0.5336 - val_loss: 1.6078
Epoch 2/10
[1m31247/31247[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m237s[0m 8ms/step - accuracy: 0.5979 - loss: 1.2897 - val_accuracy: 0.5391 - val_loss: 1.5758
Epoch 3/10
[1m31247/31247[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m239s[0m 8ms/step - accuracy: 0.6026 - loss: 1.2699 - val_accuracy: 0.5432 - val_loss: 1.5665
Epoch 4/10
[1m31247/31247[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m239s[0m 8ms/step - accuracy: 0.6053 - loss: 1.2598 - val_accuracy: 0.5440 - val_loss: 1.5662
Epoch 5/10
[1m31247/31247[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m238s[0m 8ms/step - accuracy: 0.6067 - loss: 1.2530 - val_accuracy: 0.5449 - val_loss: 1.5592
Epoch 6/10
[1m31247/31247[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m239s[0m 8ms/step - accuracy: 0.6075 - loss: 1.2489 - val_accuracy: 0.5456 - val_loss:

In [None]:
shakespeare_model=tf.keras.Sequential([
    text_vector_layer,
    tf.keras.layers.Lambda(lambda x: x-2),  # no <PAD> or <UNK> tokens
    model
])

In [None]:
input_text=tf.constant(["To be or not to be"])
y_proba=shakespeare_model.predict(input_text,verbose=0)[0,-1] # prediction of last character i.e "b"
y_pred=tf.argmax(y_proba) # most probable character ID
text_vector_layer.get_vocabulary()[y_pred+2][0]

' '

## Generating Fake Shakespearean Text

In [None]:
log_probas = tf.math.log([[0.5,0.4,0.1]])
tf.random.categorical(log_probas,num_samples=8)

<tf.Tensor: shape=(1, 8), dtype=int64, numpy=array([[0, 0, 1, 1, 0, 2, 0, 0]])>

In [None]:
def next_char(text,temperature=1):
  y_proba=shakespeare_model.predict([text],verbose=0)[0,-1:]
  rescaled_logits=tf.math.log(y_proba)/temperature
  char_id=tf.random.categorical(rescaled_logits,num_samples=1)[0,0]
  return text_vector_layer.get_vocabulary()[char_id+2]

In [None]:
import time
import sys

def generate_text(text, n_chars=200, temperature=1, delay=0.05):
    print(text.numpy()[0].decode("utf-8"), end='', flush=True)  # print the initial prompt

    for _ in range(n_chars):
        char = next_char(text, temperature)
        text += char

        print(char, end='', flush=True)
        time.sleep(delay)  # typing effect



In [None]:
generate_text(input_text, n_chars=500, temperature=0.7, delay=0.0005)


To be or not to begin the prince,
to fear you but to make the thoral ever boy.

pompey:
sir, your life, and would not the duke:
the desire to make the world.

mariana:
he shall have an undone finded with our pretty house,
and then, let's as it is this delight,
and this mind cause to make your grace, if a lady.
the prison; for the devittes.

duke vincentio:
for the time; for i should die to a city.

angelo:
see, brother-book'd in one leave to see the view
by all actions will i unrion, friar, sir, i do not accompon

### Stateful RNN

In [37]:
def to_dataset_for_stateful_rnn(series,window_size):
  ds = tf.data.Dataset.from_tensor_slices(series)
  ds = ds.window(window_size+1,shift=window_size,drop_remainder=True)
  ds = ds.flat_map(lambda window: window.batch(window_size+1)).batch(1)
  return ds.map(lambda window:(window[:,:-1],window[:,1:])).prefetch(1)

In [38]:
stateful_train_set = to_dataset_for_stateful_rnn(encoded[:1_000_000],window_size)
stateful_valid_set = to_dataset_for_stateful_rnn(encoded[1_000_000:],window_size)

In [39]:
stateful_model = tf.keras.Sequential([
    tf.keras.layers.Input(batch_shape=[1, None], dtype=tf.int32),
    tf.keras.layers.Embedding(input_dim=n_tokens,output_dim=16,),
    tf.keras.layers.GRU(128,return_sequences=True,stateful=True),
    tf.keras.layers.Dense(n_tokens,activation="softmax")
])


In [42]:
class ResetStates(tf.keras.callbacks.Callback):
  def on_epoch_begin(self,epoch,logs):
    for layer in self.model.layers:
      if hasattr(layer,'reset_states'):
        layer.reset_states()

In [43]:

stateful_model.compile(loss="sparse_categorical_crossentropy",optimizer="nadam",
              metrics=["accuracy"])
model_ckp = tf.keras.callbacks.ModelCheckpoint("my_shakespeare_model.keras",
                            monitor="val_accuracy",save_best_only=True)
history = stateful_model.fit(stateful_train_set,
                    epochs=10,
                    validation_data=stateful_valid_set,
                     callbacks=[ResetStates(),model_ckp])

Epoch 1/10
   9997/Unknown [1m83s[0m 8ms/step - accuracy: 0.3899 - loss: 2.1045



[1m9999/9999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 9ms/step - accuracy: 0.3899 - loss: 2.1044 - val_accuracy: 0.4778 - val_loss: 1.7421
Epoch 2/10
[1m9999/9999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 14ms/step - accuracy: 0.5219 - loss: 1.5863 - val_accuracy: 0.5005 - val_loss: 1.6653
Epoch 3/10
[1m9999/9999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 14ms/step - accuracy: 0.5462 - loss: 1.4916 - val_accuracy: 0.5094 - val_loss: 1.6334
Epoch 4/10
[1m9999/9999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 14ms/step - accuracy: 0.5574 - loss: 1.4479 - val_accuracy: 0.5163 - val_loss: 1.6162
Epoch 5/10
[1m9999/9999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 14ms/step - accuracy: 0.5642 - loss: 1.4216 - val_accuracy: 0.5196 - val_loss: 1.6101
Epoch 6/10
[1m9999/9999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 14ms/step - accuracy: 0.5685 - los

In [63]:
stateless_model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=n_tokens,output_dim=16),
    tf.keras.layers.GRU(128,return_sequences=True),
    tf.keras.layers.Dense(n_tokens,activation="softmax")
])

In [64]:
stateless_model.build(tf.TensorShape([None,None]))
stateless_model.set_weights(stateful_model.get_weights())

In [65]:
shakespeare_model=tf.keras.Sequential([
    text_vector_layer,
    tf.keras.layers.Lambda(lambda x: x-2),
    stateless_model
])

In [66]:
def next_char(text,temperature=1):
  y_proba=shakespeare_model.predict([text],verbose=0)[0,-1:]
  rescaled_logits=tf.math.log(y_proba)/temperature
  char_id=tf.random.categorical(rescaled_logits,num_samples=1)[0,0]
  return text_vector_layer.get_vocabulary()[char_id+2]

In [67]:
import time
import sys

def generate_text(text, n_chars=200, temperature=1, delay=0.05):
    print(text.numpy()[0].decode("utf-8"), end='', flush=True)  # print the initial prompt

    for _ in range(n_chars):
        char = next_char(text, temperature)
        text += char

        print(char, end='', flush=True)
        time.sleep(delay)  # typing effect


In [69]:
input_text=tf.constant(["To be or not to be"])
generate_text(input_text, n_chars=500, temperature=0.7, delay=0.0005)


To be or not to be
with suphock'd it not too more of a life,
i'll have you no lion him where is his field.

gremio:
har to good with her.

thire gentleman:
trumper, the was a blow you humy me for him:
that in my perform'd him and with me her,
if bad horse you understand with him with him
a wring in horters to know you and here:
what i have be have well good and humble us a combinds.

duke vincentio:
in incrits and babber within the hand,
to hear not the trunch a very father,
as the state, i will betim this tempes