# Preparing the Dataset

In [None]:
import tensorflow as tf

# Download Shakespeare's works
shakespeare_url = "https://homl.info/shakespeare"
filepath = tf.keras.utils.get_file("shakespeare.txt", shakespeare_url)

# Read the file
with open(filepath, 'r') as f:
    shakespeare_text = f.read()

# Encode text as characters using TextVectorization
text_vec_layer = tf.keras.layers.TextVectorization(
    split="character",
    standardize="lower"
)
text_vec_layer.adapt([shakespeare_text])

# Convert text to integer encoding
encoded = text_vec_layer([shakespeare_text])[0]

# Adjust encoding to exclude padding and unknown tokens
encoded -= 2  # Remove <PAD> and <UNK>
n_tokens = text_vec_layer.vocabulary_size() - 2  # Number of unique tokens
dataset_size = len(encoded)  # Total characters in the text

# Function to convert text sequence into input-target pairs
def to_dataset(sequence, length, shuffle=False, seed=None, batch_size=32):
    ds = tf.data.Dataset.from_tensor_slices(sequence)
    ds = ds.window(length + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda window_ds: window_ds.batch(length + 1))
    if shuffle:
        ds = ds.shuffle(buffer_size=100_000, seed=seed)
    ds = ds.batch(batch_size)
    return ds.map(lambda window: (window[:, :-1], window[:, 1:])).prefetch(1)

# Split dataset into training, validation, and test sets
length = 100
tf.random.set_seed(42)

train_set = to_dataset(encoded[:1_000_000], length=length, shuffle=True, seed=42)
valid_set = to_dataset(encoded[1_000_000:1_060_000], length=length)
test_set = to_dataset(encoded[1_060_000:], length=length)


Downloading data from https://homl.info/shakespeare
[1m1115394/1115394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


# Building and Training the Model

In [None]:
# Define the model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=n_tokens, output_dim=16),
    tf.keras.layers.GRU(128, return_sequences=True),
    tf.keras.layers.Dense(n_tokens, activation="softmax")
])

# Compile the model
model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer="nadam",
    metrics=["accuracy"]
)




In [None]:
# Train the model
model_ckpt = tf.keras.callbacks.ModelCheckpoint(
    "my_shakespeare_model.keras",  # Updated to include .keras extension
    monitor="val_accuracy",
    save_best_only=True
)

history = model.fit(
    train_set,
    validation_data=valid_set,
    epochs=10,
    callbacks=[model_ckpt]
)


Epoch 1/10
  28790/Unknown [1m3912s[0m 135ms/step - accuracy: 0.5420 - loss: 1.5190

# Wrapping the Model with Preprocessing


In [None]:
# Wrap preprocessing into the model
shakespeare_model = tf.keras.Sequential([
    text_vec_layer,
    tf.keras.layers.Lambda(lambda X: X - 2),  # Adjust encoding
    model
])


#  Generating Text

In [None]:
# Predict the next character
y_proba = shakespeare_model.predict(["To be or not to b"])[0, -1]
y_pred = tf.argmax(y_proba)
predicted_char = text_vec_layer.get_vocabulary()[y_pred + 2]

print(f"Predicted next character: {predicted_char}")


# Generating Fake Shakespearean Text
To generate Shakespearean-like text using a character-level RNN model, we can predict characters sequentially.

## Decoding Strategies
Greedy Decoding: Predict the most likely character at each step, but it often leads to repetitive outputs.
### Random Sampling:
Use tf.random.categorical() to sample the next character based on probabilities. This adds diversity to the generated text.
Implementation
1. Random Sampling with Temperature
Adjusting the temperature controls the diversity of generated text:

Low temperature: Favors high-probability characters (precise, rigid text).
High temperature: Encourages diversity (creative, diverse text).

In [None]:
import tensorflow as tf

def next_char(text, temperature=1):
    # Predict probabilities for the next character
    y_proba = shakespeare_model.predict([text])[0, -1:]
    rescaled_logits = tf.math.log(y_proba) / temperature  # Rescale logits by temperature
    char_id = tf.random.categorical(rescaled_logits, num_samples=1)[0, 0]  # Sample next char
    return text_vec_layer.get_vocabulary()[char_id + 2]  # Convert ID to char


In [None]:
# Text Extension
def extend_text(text, n_chars=50, temperature=1):
    for _ in range(n_chars):
        text += next_char(text, temperature)
    return text


In [None]:
tf.random.set_seed(42)

# Generate text with varying temperatures
print(extend_text("To be or not to be", temperature=0.01))  # Rigid, repetitive text
print(extend_text("To be or not to be", temperature=1))     # Balanced, diverse text
print(extend_text("To be or not to be", temperature=100))   # Random, chaotic text


# Stateful RNNs

A stateful RNN preserves its hidden state across training batches, enabling it to learn longer-term dependencies.

## Data Preparation for Stateful RNNs
Ensure sequences in the same batch are consecutive across batches. Use a custom function to create the dataset

In [None]:
def to_dataset_for_stateful_rnn(sequence, length):
    ds = tf.data.Dataset.from_tensor_slices(sequence)
    ds = ds.window(length + 1, shift=length, drop_remainder=True)
    ds = ds.flat_map(lambda window: window.batch(length + 1)).batch(1)  # Batch size = 1
    return ds.map(lambda window: (window[:, :-1], window[:, 1:])).prefetch(1)

# Prepare datasets
stateful_train_set = to_dataset_for_stateful_rnn(encoded[:1_000_000], length)
stateful_valid_set = to_dataset_for_stateful_rnn(encoded[1_000_000:1_060_000], length)
stateful_test_set = to_dataset_for_stateful_rnn(encoded[1_060_000:], length)


# Stateful RNN Model
Define the model and include the stateful=True parameter

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=n_tokens, output_dim=16, batch_input_shape=[1, None]),
    tf.keras.layers.GRU(128, return_sequences=True, stateful=True),
    tf.keras.layers.Dense(n_tokens, activation="softmax")
])


# Reset States Callback

In [None]:
class ResetStatesCallback(tf.keras.callbacks.Callback):
    def on_epoch_begin(self, epoch, logs=None):
        self.model.reset_states()


# Compile and Train

In [None]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam", metrics=["accuracy"])

history = model.fit(
    stateful_train_set,
    validation_data=stateful_valid_set,
    epochs=10,
    callbacks=[ResetStatesCallback(), model_ckpt]
)
