<a href="https://colab.research.google.com/github/Dhanasree-Rajamani/SpecialTopics_DeepLearning/blob/main/Assignment%203/297_tensorflow_text_generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importing Required Libraries

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

Hyperparameters

Defining hyperparameters like batch size, block size, learning rate, etc., that will govern the training and model architecture.

In [None]:
# Hyperparameters
batch_size = 16
block_size = 32
max_iters = 5000
eval_interval = 100
learning_rate = 1e-3
n_embd = 64
n_head = 4
n_layer = 4
dropout = 0.0

Data Loading

Loading text data, which is assumed to be a set of poems from the file 'poems_dataset.txt'.
This is not shakespere dataset, this is another custom dataset which consists of different poems of various genre

In [None]:
with open('/content/poems_dataset.txt', 'r', encoding='utf-8') as f:
    text = f.read()

Data Tokenization

Creating a vocabulary by finding unique characters in the dataset. Mapping characters to unique integers for encoding and the reverse for decoding.

Encoding the Data

Converting the entire text into a sequence of integers.

In [None]:
chars = sorted(list(set(text)))
vocab_size = len(chars)
stoi = {ch: i for i, ch in enumerate(chars)}
itos = {i: ch for i, ch in enumerate(chars)}

# Encoding the data
data = [stoi[c] for c in text]

Train-Validation Split

Splitting the data into a training set (90% of the data) and a validation set (the remaining 10%).

In [None]:
n = int(0.9 * len(data))

train_data_tensor = tf.constant(data[:n], dtype=tf.int32)
val_data_tensor = tf.constant(data[n:], dtype=tf.int32)

Batch Data Generator

Function get_batch randomly samples batches of data for training the model.

In [None]:
def get_batch(data_tensor, batch_size, block_size):
    start_indices = tf.random.uniform((batch_size,), 0, len(data_tensor) - block_size, dtype=tf.int64)
    x_batch = tf.stack([data_tensor[start:start + block_size] for start in start_indices])
    y_batch = tf.stack([data_tensor[start + 1:start + block_size + 1] for start in start_indices])
    return x_batch, y_batch

Defining Transformer Components

MultiHeadSelfAttention: This class defines the multi-head self-attention mechanism.

TransformerBlock: This class represents a block in the Transformer architecture which includes the attention mechanism and a feed-forward neural network.

Defining Transformer Components

MultiHeadSelfAttention: This class defines the multi-head self-attention mechanism.

TransformerBlock: This class represents a block in the Transformer architecture which includes the attention mechanism and a feed-forward neural network.

In [None]:
class MultiHeadSelfAttention(layers.Layer):
    def __init__(self, embed_size, heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_size = embed_size
        self.heads = heads
        self.head_dim = embed_size // heads

        self.values = layers.Dense(self.head_dim, use_bias=False)
        self.keys = layers.Dense(self.head_dim, use_bias=False)
        self.queries = layers.Dense(self.head_dim, use_bias=False)
        self.fc_out = layers.Dense(embed_size)

    def call(self, values, keys, query):
        N, seq_length, _ = query.shape
        value_len, key_len = values.shape[1], keys.shape[1]

        # Split embedding into self.head pieces
        values = tf.reshape(values, (N, value_len, self.heads, self.head_dim))
        keys = tf.reshape(keys, (N, key_len, self.heads, self.head_dim))
        queries = tf.reshape(query, (N, seq_length, self.heads, self.head_dim))

        values = self.values(values)
        keys = self.keys(keys)
        queries = self.queries(queries)

        # Scaled dot-product attention
        attention = tf.einsum("nqhd,nkhd->nhqk", queries, keys)
        attention = attention / tf.math.sqrt(float(self.head_dim))
        attention = tf.nn.softmax(attention, axis=-1)

        out = tf.einsum("nhql,nlhd->nqhd", attention, values)
        out = tf.reshape(out, (N, seq_length, self.embed_size))
        out = self.fc_out(out)
        return out

class TransformerBlock(layers.Layer):
    def __init__(self, embed_size, heads, dropout, forward_expansion):
        super(TransformerBlock, self).__init__()
        self.attention = MultiHeadSelfAttention(embed_size, heads)
        self.norm1 = layers.LayerNormalization(epsilon=1e-6)
        self.norm2 = layers.LayerNormalization(epsilon=1e-6)

        self.feed_forward = keras.Sequential(
            [
                layers.Dense(forward_expansion * embed_size, activation="relu"),
                layers.Dense(embed_size),
            ]
        )

        self.dropout = layers.Dropout(dropout)

    def call(self, value, key, query):
        attention = self.attention(value, key, query)
        x = self.dropout(self.norm1(attention + query))
        forward = self.feed_forward(x)
        out = self.dropout(self.norm2(forward + x))
        return out



Defining the Main Model

The BigramLanguageModel class represents the main model architecture comprising embedding layers, multiple Transformer blocks, and a final dense layer for predictions.

In [None]:
class BigramLanguageModel(keras.Model):
    def __init__(self, vocab_size, embed_size, heads, n_layers, max_length, forward_expansion, dropout):
        super(BigramLanguageModel, self).__init__()
        self.embedding = layers.Embedding(vocab_size, embed_size)
        self.positional_embedding = layers.Embedding(max_length, embed_size)
        self.transformer_blocks = [
            TransformerBlock(embed_size, heads, dropout, forward_expansion)
            for _ in range(n_layers)
        ]
        self.dropout = layers.Dropout(dropout)
        self.fc_out = layers.Dense(vocab_size)

    def call(self, x):
        N, seq_length = x.shape
        positions = tf.range(start=0, limit=seq_length, delta=1)
        out = self.embedding(x)
        out += self.positional_embedding(positions)

        for block in self.transformer_blocks:
            out = block(out, out, out)

        out = self.dropout(out)
        out = self.fc_out(out)
        return out


Model Initialization and Training

Instantiating the model and training it using the Adam optimizer and a sparse categorical cross-entropy loss. During training, the model's loss is printed at regular intervals.

In [None]:
model = BigramLanguageModel(
    vocab_size,
    n_embd,
    n_head,
    n_layer,
    block_size,
    forward_expansion=n_embd * 4,
    dropout=dropout
)
optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
generated_text = []
for iteration in range(max_iters):
    x_batch, y_batch = get_batch(train_data_tensor, batch_size, block_size)
    with tf.GradientTape() as tape:
        logits = model(x_batch)
        loss = loss_fn(y_batch, logits)

    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    if iteration % eval_interval == 0:
        print(f"Iteration {iteration}, Loss: {loss.numpy()}")



Iteration 0, Loss: 4.8210062980651855
Iteration 100, Loss: 3.157886266708374
Iteration 200, Loss: 3.16868257522583
Iteration 300, Loss: 3.1558258533477783
Iteration 400, Loss: 3.1271755695343018
Iteration 500, Loss: 3.154634475708008
Iteration 600, Loss: 2.640578508377075
Iteration 700, Loss: 2.53776216506958
Iteration 800, Loss: 2.5078542232513428
Iteration 900, Loss: 2.3348848819732666
Iteration 1000, Loss: 2.2853941917419434
Iteration 1100, Loss: 2.428593158721924
Iteration 1200, Loss: 2.347982406616211
Iteration 1300, Loss: 2.3472790718078613
Iteration 1400, Loss: 2.2813467979431152
Iteration 1500, Loss: 2.3147552013397217
Iteration 1600, Loss: 2.3841500282287598
Iteration 1700, Loss: 2.22607421875
Iteration 1800, Loss: 2.177729606628418
Iteration 1900, Loss: 2.2903754711151123
Iteration 2000, Loss: 2.1901493072509766
Iteration 2100, Loss: 2.228200912475586
Iteration 2200, Loss: 2.1776833534240723
Iteration 2300, Loss: 2.1726367473602295
Iteration 2400, Loss: 2.0799922943115234
Ite

Text Generation

A function generate_text is provided to generate new text sequences using the trained model. Given a starting string, the model produces new text up to a specified length.

In [None]:
def generate_text(model, start_string, max_generate_length=2000):
    # Convert start_string to tensor
    input_eval = [stoi[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    generated_text = []

    model.reset_states()
    for i in range(max_generate_length):
        logits = model(input_eval)
        # Use a multinomial distribution to predict the token returned by the model
        predicted_id = tf.random.categorical(logits[:, 0, :], num_samples=1)[-1,0].numpy()


        # Append the predicted token to the input string and the generated text
        input_eval = tf.expand_dims([predicted_id], 0)
        generated_text.append(itos[predicted_id])

    return ''.join(generated_text)

start_string = " "  # You can use a space, or any other starting token
print(generate_text(model, start_string))


''aa
"aaa ------ aae âof'aaa--:-ae --aoeee-aea-----------aeeaaeai----- a:-a--âââo----eelaa-âââ'---------a---âaae---aaa----------leaeea a f'aeegâo---------aiaea:i----!a--l'i ------f--ae a---aaea---l-----
."----Iâ '---aa-----leaaaeaaeaaeeaa -I----IâT--- - 'aaaeese-aa---ade '''aae---------------- --a aeai-- aa aaia--e-----ele------------- 'aeeaa---ae-f:--f''--aaaaaaa--SV---I--a--------aadaa--a---eaaaiaagââaeeeeaaaaaaae --'-------y-fe------- --"--------------o---------------------'r--------:--"-aa a ----aaia aeiaaaaeaiia-aaalaaeaaee-e--aia-aaei aaaa â-------------as-------------a
âaaeeeee'eed'ae'''ae-----eee ee-----f-- ---f eeeaeae---e aaeei------aaaa aaaita--"-aee,a aaeaae-------a -âoaeeaaae'---.SI''aas--------------ââade-âoee 'g'aeaaaa aaâaae--- -eaeaa---aee--a eeeeaaeegâââoaaeassa leeeaae -----I'----  ---ae
''-a-I--aaae,----ââââ)aaeee aeeaaa eaa----------ad'eeeâaaaa aeaae-----a--y-Iaeaaee--aa-Oaaaa----aaaaaeeeai--------OI--------a:aaae-----"---ee:'aaeaaegâ'' 'eaae ââleelei----LI--------