#**Text Generation Model using Python**

##1.   Import Libraries and Load Data

In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np

# load the Tiny Shakespeare dataset
dataset, info = tfds.load('tiny_shakespeare', with_info=True, as_supervised=False)

Downloading and preparing dataset Unknown size (download: Unknown size, generated: 1.06 MiB, total: 1.06 MiB) to /root/tensorflow_datasets/tiny_shakespeare/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/1 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/tiny_shakespeare/incomplete.GG5TQF_1.0.0/tiny_shakespeare-train.tfrecord*.…

Generating validation examples...:   0%|          | 0/1 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/tiny_shakespeare/incomplete.GG5TQF_1.0.0/tiny_shakespeare-validation.tfrec…

Generating test examples...:   0%|          | 0/1 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/tiny_shakespeare/incomplete.GG5TQF_1.0.0/tiny_shakespeare-test.tfrecord*..…

Dataset tiny_shakespeare downloaded and prepared to /root/tensorflow_datasets/tiny_shakespeare/1.0.0. Subsequent calls will reuse this data.



##2. Text Preprocessing



In [2]:
# get the text from the dataset
text = next(iter(dataset['train']))['text'].numpy().decode('utf-8')

# create a mapping from unique characters to indices
vocab = sorted(set(text))
char2idx = {char: idx for idx, char in enumerate(vocab)}
idx2char = np.array(vocab)

# numerically represent the characters
text_as_int = np.array([char2idx[c] for c in text])

# create training examples and targets
seq_length = 100
examples_per_epoch = len(text) // (seq_length + 1)

# create training sequences
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

sequences = char_dataset.batch(seq_length + 1, drop_remainder=True)

##3. Input and Target Sequences



In [3]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

##4. Shuffling, Batching, and Prefetching

In [4]:
# batch size and buffer size
BATCH_SIZE = 64
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE)
)

##5. Building the Model

In [5]:
# length of the vocabulary
vocab_size = len(vocab)

# the embedding dimension
embedding_dim = 256

# number of RNN units
rnn_units = 1024

def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Input(batch_shape=(batch_size, None)),  # Specify batch size and sequence length
        tf.keras.layers.Embedding(vocab_size, embedding_dim),
        tf.keras.layers.LSTM(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model

model = build_model(vocab_size, embedding_dim, rnn_units, BATCH_SIZE)



##6. Compiling the Model

In [6]:
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

model.compile(optimizer='adam', loss=loss)

##7. Training the Model

In [7]:
import os

# directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'

# name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}.weights.h5")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True
)

# train the model
EPOCHS = 10
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/10
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 66ms/step - loss: 3.1423
Epoch 2/10
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 67ms/step - loss: 2.0985
Epoch 3/10
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 67ms/step - loss: 1.8039
Epoch 4/10
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 68ms/step - loss: 1.6457
Epoch 5/10
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 69ms/step - loss: 1.5474
Epoch 6/10
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 70ms/step - loss: 1.4804
Epoch 7/10
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 71ms/step - loss: 1.4316
Epoch 8/10
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 72ms/step - loss: 1.3940
Epoch 9/10
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 73ms/step - loss: 1.3633
Epoch 10/10
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13

In [10]:
import os

checkpoint_dir = './training_checkpoints'
print("Available checkpoints:")
print(os.listdir(checkpoint_dir))


Available checkpoints:
['ckpt_7.weights.h5', 'ckpt_3.weights.h5', 'ckpt_8.weights.h5', 'ckpt_1.weights.h5', 'ckpt_6.weights.h5', 'ckpt_5.weights.h5', 'ckpt_9.weights.h5', 'ckpt_2.weights.h5', 'ckpt_10.weights.h5', 'ckpt_4.weights.h5']


##8. Restoring the Model


In [11]:
# List of available checkpoints
checkpoint_files = [
    'ckpt_7.weights.h5', 'ckpt_3.weights.h5', 'ckpt_8.weights.h5', 'ckpt_1.weights.h5',
    'ckpt_6.weights.h5', 'ckpt_5.weights.h5', 'ckpt_9.weights.h5', 'ckpt_2.weights.h5',
    'ckpt_10.weights.h5', 'ckpt_4.weights.h5'
]

# Load the most recent checkpoint manually, e.g., 'ckpt_10.weights.h5'
checkpoint_file = os.path.join(checkpoint_dir, checkpoint_files[-1])

# Load the weights from the selected checkpoint
model.load_weights(checkpoint_file)
model.build(tf.TensorShape([1, None]))

print(f"Loaded weights from {checkpoint_file}.")


Loaded weights from ./training_checkpoints/ckpt_4.weights.h5.


##9. Generating Text


In [21]:
def generate_text(model, start_string, num_generate=1000, temperature=1.0):
    # Convert the start string to a list of integers using char2idx
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)  # Add batch dimension

    text_generated = []

    # Reset the states of the recurrent layers (only LSTM or GRU, for example)
    for layer in model.layers:
        if hasattr(layer, 'reset_states'):
            layer.reset_states()

    for i in range(num_generate):
        # Make predictions
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0)  # Remove batch dimension

        # Apply temperature scaling
        predictions = predictions / temperature  # Control the randomness

        # Sample the predicted character ID
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()

        # Update the input for the next prediction
        input_eval = tf.expand_dims([predicted_id], 0)  # Prepare the next input batch

        # Append the predicted character to the output
        text_generated.append(idx2char[predicted_id])

    # Join the list of characters to form the final generated text
    return start_string + ''.join(text_generated)

# Example usage with lower temperature to reduce randomness
print(generate_text(model, start_string="QUEEN: So, lets end this", temperature=0.7))


QUEEN: So, lets end this cantass of with your leare; with else of my more
The setce from the contriest and with the tonguness; and sutching himself;
Be the pliete the mine of your but you name.

LUCIO:
If the with a king of the forth the cousin from the fest her both in me.

Plans:
Come, my bloody pates, I will sin the glay
And when you will soul manishaple sharth thee spare with his blows
Is till I well him and him, fake with chimplives:
And who heaven in the cridein words, good upon the hath must spoles, and must wastle.

CAMILLO:
With their with the take to hear eyes fither.

DUCHESS OF YORK:
There's never her with hims, allow the toon him thy charges the fiess withs,
And have be ploud with thee read on thy will what will steeplands,
Let unlike privise and sprine you and not another majest.

HENRY BOLINGBROKE:
Be ore foe now this right of strong?
I wished my lord.

RYORK:
Well such his wick both his reasue.

BUCKINGHAM:
Nay, that y with me denisbul is to well,
Brieves a this stir?

