<a href="https://colab.research.google.com/github/Abhiroop17/Text-Generator-using-Transformers/blob/main/Generating_Text_Using_a_Transformer_Decoder_Only_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Initial Setup**

In [1]:
import os
import warnings

warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

In [4]:
!pip install keras-nlp tensorflow-text

Collecting keras-nlp
  Downloading keras_nlp-0.15.1-py3-none-any.whl.metadata (6.7 kB)
Collecting tensorflow-text
  Downloading tensorflow_text-2.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)
Downloading keras_nlp-0.15.1-py3-none-any.whl (548 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m548.4/548.4 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tensorflow_text-2.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m92.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tensorflow-text, keras-nlp
Successfully installed keras-nlp-0.15.1 tensorflow-text-2.17.0


In [5]:
import keras_nlp
import tensorflow as tf
from tensorflow import keras

In [6]:
# this should output "Num GPUs Available: 1" if you have one GPU attached
print("Num GPUs Available: ", len(tf.config.list_physical_devices("GPU")))

Num GPUs Available:  1


# **Hyperparameters**

In [7]:
# Data
BATCH_SIZE = 64
SEQ_LEN = 128
MIN_TRAINING_SEQ_LEN = 450

# Model
EMBED_DIM = 256
FEED_FORWARD_DIM = 256
NUM_HEADS = 3
NUM_LAYERS = 2
VOCAB_SIZE = 5000  # Limits parameters in model

# **Load the Data**

In [8]:
keras.utils.get_file(
    origin="https://storage.googleapis.com/asl-public/text/data/simplebooks.zip",
    extract=True,
)
data_dir = os.path.expanduser("~/.keras/datasets/simplebooks/")

# Load simplebooks-92 train set and filter out short lines using MIN_TRAINING_SEQ_LEN
raw_train_ds = (
    tf.data.TextLineDataset(data_dir + "simplebooks-92-raw/train.txt")
    .filter(lambda x: tf.strings.length(x) > MIN_TRAINING_SEQ_LEN)
    .batch(BATCH_SIZE)
    .shuffle(buffer_size=256)
)

# Load simplebooks-92 validation set and filter out short lines using MIN_TRAINING_SEQ_LEN
raw_val_ds = (
    tf.data.TextLineDataset(data_dir + "simplebooks-92-raw/valid.txt")
    .filter(lambda x: tf.strings.length(x) > MIN_TRAINING_SEQ_LEN)
    .batch(BATCH_SIZE)
)

Downloading data from https://storage.googleapis.com/asl-public/text/data/simplebooks.zip
[1m282386239/282386239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


# **Train the Tokenizer**

In [10]:
# Train tokenizer vocabulary
print("Training the word piece tokenizer. This will take 5-10 mins...")
vocab = keras_nlp.tokenizers.compute_word_piece_vocabulary(
    raw_train_ds,
    vocabulary_size=VOCAB_SIZE,
    lowercase=True,
    reserved_tokens=["[PAD]", "[UNK]", "[BOS]"],
)
print("Training is complete!!")

Training the word piece tokenizer. This will take 5-10 mins...
Training is complete!!


# **Load Tokenizer**

In [11]:
tokenizer = keras_nlp.tokenizers.WordPieceTokenizer(
    vocabulary=vocab,
    sequence_length=SEQ_LEN,
    lowercase=True,
)

# **Tokenize Data**

In [12]:
# packer adds a start token
start_packer = keras_nlp.layers.StartEndPacker(
    sequence_length=SEQ_LEN,
    start_value=tokenizer.token_to_id("[BOS]"),
)


def preprocess(inputs):
    outputs = tokenizer(inputs)
    features = start_packer(outputs)
    labels = outputs
    return features, labels


# Tokenize and split into train and label sequences.
train_ds = raw_train_ds.map(
    preprocess, num_parallel_calls=tf.data.AUTOTUNE
).prefetch(tf.data.AUTOTUNE)
val_ds = raw_val_ds.map(
    preprocess, num_parallel_calls=tf.data.AUTOTUNE
).prefetch(tf.data.AUTOTUNE)

# **Build the model**

In [13]:
inputs = keras.layers.Input(shape=(None,), dtype=tf.int32)
# Embedding layer
embedding_layer = keras_nlp.layers.TokenAndPositionEmbedding(
    vocabulary_size=VOCAB_SIZE,
    sequence_length=SEQ_LEN,
    embedding_dim=EMBED_DIM,
    mask_zero=True,
)
x = embedding_layer(inputs)
# Transformer decoder layers
for _ in range(NUM_LAYERS):
    decoder_layer = keras_nlp.layers.TransformerDecoder(
        num_heads=NUM_HEADS,
        intermediate_dim=FEED_FORWARD_DIM,
    )
    x = decoder_layer(x)  # Giving one argument only skips cross-attention
# Output layer
outputs = keras.layers.Dense(VOCAB_SIZE)(x)
model = keras.Model(inputs=inputs, outputs=outputs)

# set up the loss metric
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
perplexity = keras_nlp.metrics.Perplexity(from_logits=True, mask_token_id=0)

# compile the model
model.compile(optimizer="adam", loss=loss_fn, metrics=[perplexity])

In [14]:
model.summary()

# **Training**

In [15]:
EPOCHS = 1  # increase the number of epochs for better results
print("Training started, this could take 4-10 mins per epoch with a T4 GPU...")
model.fit(train_ds, validation_data=val_ds, verbose=2, epochs=EPOCHS)
print("Training is complete!!")

Training started, this could take 4-10 mins per epoch with a T4 GPU...
3169/3169 - 167s - 53ms/step - loss: 4.4790 - perplexity: 88.4934 - val_loss: 4.1088 - val_perplexity: 61.3523
Training is complete!!


# **Inference**

In [16]:
# The "packer" layers adds the [BOS] token for us.
prompt_tokens = start_packer(tokenizer([""]))
prompt_tokens

<tf.Tensor: shape=(1, 128), dtype=int32, numpy=
array([[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
      dtype=int32)>

In [17]:
def next(prompt, cache, index):
    logits = model(prompt)[:, index - 1, :]
    # Ignore hidden states for now; only needed for contrastive search.
    hidden_states = None
    return logits, hidden_states, cache

# **Greedy search**

In [18]:
sampler = keras_nlp.samplers.GreedySampler()
output_tokens = sampler(
    next=next,
    prompt=prompt_tokens,
    index=1,  # Start sampling immediately after the [BOS] token.
)
txt = tokenizer.detokenize(output_tokens)
print(f"Greedy search generated text: \n{txt}\n")

Greedy search generated text: 
['[BOS] " i \' m going to do , " said the doctor , " i \' ll be glad to get you , " she said . " i \' ll have to go to the house , and i \' ll be glad to get you . i \' ll be glad to get you \' ll be careful to get the house . i \' ll go to the house , and i \' ll go to the house . i \' ll be glad to get you \' ll be careful to get the house . i \' ll go to the house , and i \' ll go to the house . i \' ll go to the house , and i \'']



# **Beam search**

In [19]:
sampler = keras_nlp.samplers.BeamSampler(num_beams=10)
output_tokens = sampler(
    next=next,
    prompt=prompt_tokens,
    index=1,
)
txt = tokenizer.detokenize(output_tokens)
print(f"Beam search generated text: \n{txt}\n")

Beam search generated text: 
['[BOS] " i don \' t know what i \' ll do , " he said . " i don \' t know what i \' ll do . i \' m going to do it . i \' m goin \' to do it . i don \' t think i \' m going to do it . i don \' t think i \' m going to do it . i don \' t think i \' m going to do it . i \' m sure i \' m going to do it . i don \' t think i \' m going to do it . i \' m goin \' n \' t wantin \' em . i don \' t want']




# **Random search**

In [21]:
sampler = keras_nlp.samplers.RandomSampler()
output_tokens = sampler(
    next=next,
    prompt=prompt_tokens,
    index=1,
)
txt = tokenizer.detokenize(output_tokens)
print(f"Random search generated text: \n{txt}\n")

Random search generated text: 
["[BOS] and now i ' m gladys ourselves up stow shore will be back again a few weeks ago , i at present at anchors , as i say , they fought many trips managed to untrobek . if there is pretty likely to be chilltrons for the six o ' clock , for these two weeks fall asleep . they are all four brothers , except that they never have married to take care of them ; and the when become the first getty , curling about it , and where they don ' t be smarting at night . now i expect you to be up the same person in the"]



# **Top-K search**

In [22]:
sampler = keras_nlp.samplers.TopKSampler(k=10)
output_tokens = sampler(
    next=next,
    prompt=prompt_tokens,
    index=1,
)
txt = tokenizer.detokenize(output_tokens)
print(f"Top-K search generated text: \n{txt}\n")

Top-K search generated text: 
['[BOS] after this he began , and he did not know the truth . he had told the truth , and had said to it . but the little boy had been the first , that was the first of all kinds , and had been told them , he had no difficulty in making himself , and was not quite sure that he should be glad to see the peasant \' s story . so the story he said to the " image , " i \' m going to be sure that you are not to be able to tell him , but that \' s was not only a single word ; that he could not tell him how he']



# **Using callbacks for text generation**

In [20]:
class TopKTextGenerator(keras.callbacks.Callback):
    """A callback to generate text from a trained model using top-k."""

    def __init__(self, k):
        self.sampler = keras_nlp.samplers.TopKSampler(k)

    def on_epoch_end(self, epoch, logs=None):
        output_tokens = self.sampler(
            next=next,
            prompt=prompt_tokens,
            index=1,
        )
        txt = tokenizer.detokenize(output_tokens)
        print(f"Top-K search generated text: \n{txt}\n")


text_generation_callback = TopKTextGenerator(k=10)
# Dummy training loop to demonstrate callback.
model.fit(
    train_ds.take(1), verbose=2, epochs=2, callbacks=[text_generation_callback]
)

Epoch 1/2
Top-K search generated text: 
["[BOS] there was little enough for this one that , and there wasn ' t a good deal more for them than it had ever been for a month . she was quite so much a bit afraid , that was the little boys in the same manner , that he had a nice little bit for them . and that it was just what they said , when they started out of them , he told them , and the children , who had told them that they were to be going to do it . they asked them to the house when they saw a housekeeper and the boys , for they were all in the way that they had no particular to go to"]

1/1 - 14s - 14s/step - loss: 4.0962 - perplexity: 60.2662
Epoch 2/2
Top-K search generated text: 
['[BOS] the next time the boys were on decked by a ship in the deck . the captain was interruptition ; and , in one of them had been placed in a boat that was not in a case of an officer , the ship was being of captain . he was at once the same time that he was not at the moment of an officer . but the 

<keras.src.callbacks.history.History at 0x7bc6b4b356c0>