# Text Generation with TensorFlow: Building and Using a Shakespeare Model(RNN)

### Importing Necessary Modules



In [10]:
import pickle
import tensorflow as tf

### Downloading and Reading Shakespeare Text

This step involves downloading the complete works of Shakespeare from a given URL and reading the content into a string

In [2]:
shakespeare_url = "https://homl.info/shakespeare"  # shortcut URL
filepath = tf.keras.utils.get_file("shakespeare.txt", shakespeare_url)

# Read the content of the downloaded file into a string
with open(filepath) as f:
    shakespeare_text = f.read()

# Output the first 500 characters to verify the content
print(shakespeare_text[:500])

In [3]:
print(shakespeare_text[:100])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You


### Text Vectorization

This step involves defining a TextVectorization layer in TensorFlow to process the Shakespeare text at the character level, adapting it to the dataset, and encoding the text.

In [25]:
text_vec_layer = tf.keras.layers.TextVectorization(split="character", standardize="lower")

# Adapt the TextVectorization layer to the Shakespeare text dataset to learn the vocabulary
text_vec_layer.adapt([shakespeare_text])

# Encode the entire Shakespeare text using the adapted TextVectorization layer
encoded = text_vec_layer([shakespeare_text])[0]

# Output the first 100 encoded characters to verify the encoding
print(encoded[:100])

tf.Tensor(
[21  7 10  9  4  2 20  7  4  7 37  3 11 25 12 23  3 21  5 10  3  2 18  3
  2 24 10  5 20  3  3 14  2  6 11 17  2 21 15 10  4  8  3 10 19  2  8  3
  6 10  2 16  3  2  9 24  3  6 26 28 12 12  6 13 13 25 12  9 24  3  6 26
 19  2  9 24  3  6 26 28 12 12 21  7 10  9  4  2 20  7  4  7 37  3 11 25
 12 17  5 15], shape=(100,), dtype=int64)


In [26]:
# Adjust the encoded text by subtracting 2 from each token to remove <PAD> and <UNK> tokens
encoded -= 2

# Calculate the number of distinct characters (tokens) in the dataset
n_tokens = text_vec_layer.vocabulary_size() - 2

# Determine the total number of characters in the dataset
dataset_size = len(encoded)

# Output the number of distinct characters and the total number of characters in the dataset
print(f"Number of distinct chars: {n_tokens}")
print(f"Total number of chars: {dataset_size}")

Number of distinct chars: 39
Total number of chars: 1115394


### Creating TensorFlow Dataset from Sequence

This function converts a given sequence into a TensorFlow dataset, creating windows of a specified length, optionally shuffling the dataset, and preparing input-target pairs for training.

In [27]:
def to_dataset(sequence, length, shuffle=False, seed=None, batch_size=32):
    """
    Convert a sequence into a TensorFlow dataset with specified window length, optional shuffling, and batching.
    
    Args:
        sequence (array-like): Input sequence to be converted into a dataset.
        length (int): Length of each window in the sequence.
        shuffle (bool): Whether to shuffle the dataset. Default is False.
        seed (int): Random seed for shuffling. Default is None.
        batch_size (int): Number of samples per batch. Default is 32.

    Returns:
        tf.data.Dataset: A TensorFlow dataset with input-target pairs for training.
    """
    
    # Convert the input sequence into a TensorFlow dataset
    ds = tf.data.Dataset.from_tensor_slices(sequence)
    
    # Create windows of the specified length
    ds = ds.window(length + 1, shift=1, drop_remainder=True)
    
    # Flatten the windows into batches
    ds = ds.flat_map(lambda window: window.batch(length + 1))
    
    # Optionally shuffle the dataset
    if shuffle:
        ds = ds.shuffle(buffer_size=100_000, seed=seed)
    
    # Batch the dataset
    ds = ds.batch(batch_size)
    
    # Split each window into input and target sequences
    def split_input_target(window):
        window = tf.expand_dims(window, axis=-1) if len(window.shape) == 1 else window
        return window[:, :-1], window[:, 1:]
    
    # Map the split function and prefetch the dataset
    return ds.map(split_input_target).prefetch(1)

In [28]:
# Example usage
sequence = encoded.numpy()  # Example sequence
length = 100  # Example length of each window
dataset = to_dataset(sequence, length, shuffle=True, seed=42, batch_size=32)

# Output the first batch to verify
for batch in dataset.take(1):
    input_seq, target_seq = batch
    print("Input sequence:", input_seq.numpy())
    print("Target sequence:", target_seq.numpy())

Input sequence: [[15  0  9 ... 11 11  0]
 [25  1  0 ...  3  9  7]
 [13  7 23 ... 17  0  4]
 ...
 [25  1  0 ...  6  1  0]
 [11 12  0 ... 21 13  2]
 [ 1  0  5 ...  5  2  6]]
Target sequence: [[ 0  9  3 ... 11  0  2]
 [ 1  0  3 ...  9  7  2]
 [ 7 23 10 ...  0  4  7]
 ...
 [ 1  0 21 ...  1  0 18]
 [12  0 15 ... 13  2  0]
 [ 0  5  2 ...  2  6  0]]


### Creating Training, Validation, and Test Datasets

In [8]:
length = 100
tf.random.set_seed(42)
train_set = to_dataset(encoded[:1_000_000], length=length, shuffle=True, seed=42)
valid_set = to_dataset(encoded[1_000_000:1_060_000], length=length)
test_set = to_dataset(encoded[1_060_000:], length=length)

for input_seq, target_seq in train_set.take(1):
    print(input_seq.shape, target_seq.shape)

(32, 100) (32, 100)


### Building and Training the Model

In [8]:
# Building and Training the Model

# Define the Sequential model
model = tf.keras.Sequential([
    # Embedding layer to convert token indices into dense vectors
    tf.keras.layers.Embedding(input_dim=n_tokens, output_dim=16),
    
    # GRU layer for sequence processing
    tf.keras.layers.GRU(128, return_sequences=True),
    
    # Dense layer with softmax activation to predict the next character
    tf.keras.layers.Dense(n_tokens, activation="softmax")
])

# Compile the model
model.compile(
    loss="sparse_categorical_crossentropy",  # Loss function for multi-class classification
    optimizer="nadam",  # Optimizer for gradient-based optimization
    metrics=["accuracy"]  # Evaluation metric
)

# Define a checkpoint callback to save the best model based on validation accuracy
model_ckpt = tf.keras.callbacks.ModelCheckpoint(
    "my_shakespeare_model.keras",  # File path to save the model
    monitor="val_accuracy",  # Metric to monitor for saving the best model
    save_best_only=True  # Save only the best model
)

# Train the model with training and validation datasets
history = model.fit(
    train_set,  # Training dataset
    validation_data=valid_set,  # Validation dataset
    epochs=10,  # Number of epochs
    callbacks=[model_ckpt]  # List of callbacks
)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Defining the Full Shakespeare 

This step involves defining a full model that includes text vectorization, token index adjustment, and the previously defined model for making predictions.

In [29]:
shakespeare_model = tf.keras.Sequential([
    # TextVectorization layer to process and vectorize input text
    text_vec_layer,
    
    # Lambda layer to adjust token indices by subtracting 2 to account for <PAD> and <UNK> tokens
    tf.keras.layers.Lambda(lambda X: X - 2),
    
    # The previously defined model for predicting the next character
    model
])

# Output the model summary to verify the architecture
shakespeare_model.summary()

### Saving the Model

This step involves saving the trained model to a file using the pickle module for later use.

In [10]:
import pickle
pickle.dump(model,open("model.keras","wb"))

### Loading the Model

In [15]:
model = pickle.load(open("model.keras","rb"))

### Making Predictions with the Model

This step involves making predictions using the loaded model by providing an input text, obtaining the probabilities for the next character, and decoding the predicted character.

In [31]:
# Define input text as a TensorFlow constant
input_text = tf.constant(["To be or not to b"])

# Predict the next character probabilities using the model
y_proba = shakespeare_model.predict(input_text)[0, -1]

# Get the index of the predicted character
y_pred = tf.argmax(y_proba)

# Decode the predicted character from the vocabulary
predicted_char = text_vec_layer.get_vocabulary()[y_pred + 2]

# Output the predicted character
print("Predicted character:", predicted_char)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Predicted character: e


### Generating the Next Character

This step involves defining a function to generate the next character based on the input text and a specified temperature, which controls the randomness of predictions.

In [19]:
 def next_char(text, temperature=1):
        """
    Generate the next character given an input text and temperature.

    Args:
        text (str): Input text for which the next character is to be predicted.
        temperature (float): Sampling temperature to control randomness. Higher values increase randomness.

    Returns:
        str: The predicted next character.
    """
        input_text = tf.constant([text])
        y_proba = shakespeare_model.predict(input_text)[0, -1:]
        rescaled_logits = tf.math.log(y_proba) / temperature
        char_id = tf.random.categorical(rescaled_logits, num_samples=1)[0, 0]
        return text_vec_layer.get_vocabulary()[char_id + 2]

### Extending Text Generation

In [32]:
# Extending Text Generation

def extend_text(text, n_chars=50, temperature=1):
    """
    Extend the given text by generating a specified number of characters.

    Args:
        text (str): Initial text to extend.
        n_chars (int): Number of characters to generate.
        temperature (float): Sampling temperature to control randomness. Higher values increase randomness.

    Returns:
        str: Extended text with additional characters.
    """
    for _ in range(n_chars):
        # Generate the next character and append it to the text
        text += next_char(text, temperature)
    return text



In [36]:
# Generating Extended Text with Fixed Seed


tf.random.set_seed(42)

# Generate and print extended text with a low temperature for more deterministic results
extended_text = extend_text(" i love you", temperature=0.01)
print(extended_text)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16

In [37]:
tf.random.set_seed(42)
print(extend_text("i love you", temperature=1))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16

In [35]:
tf.random.set_seed(42)
print(extend_text("i love you", temperature=1.5))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20