<a href="https://colab.research.google.com/github/Kamani-Shivani/NLP/blob/main/Assignment_8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Example text data (you can replace this with any larger corpus) text = """ Once upon a time, there was a little girl named Red Riding Hood. She loved to visit her grandmother, who lived in the woods. One day, her mother asked her to take a basket of goodies to her grandmother. On her way through the woods, she met a big bad wolf who wanted to eat her. [CO5]

(i) Build the Transformer Model on above dataset

In [11]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, MultiHeadAttention, LayerNormalization, Dense, Dropout
from tensorflow.keras import Sequential, Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Sample text data
text = """Once upon a time, there was a little girl named Red Riding Hood. She loved to visit her grandmother, who lived in the woods. One day, her mother asked her to take a basket of goodies to her grandmother. On her way through the woods, she met a big bad wolf who wanted to eat her."""

# Step 1: Tokenize and Prepare Data
vocab_size = 1000  # Adjust as needed
tokenizer = Tokenizer(num_words=vocab_size, oov_token="")
tokenizer.fit_on_texts([text])

# Convert text to sequences of tokens
sequences = tokenizer.texts_to_sequences([text])[0]

# Prepare input sequences and labels
input_sequences = []
for i in range(1, len(sequences)):
    input_sequences.append(sequences[:i+1])

# Pad sequences
max_sequence_len = max(len(x) for x in input_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')

# Split into inputs and labels
X, y = input_sequences[:,:-1], input_sequences[:,-1]
y = tf.keras.utils.to_categorical(y, num_classes=vocab_size)

(ii) Train the model using 20, 60, 70 epochs

In [12]:
# Step 2: Build the Transformer Model
class TransformerModel(Model):
    def __init__(self, vocab_size, embed_dim, num_heads, ff_dim):
        super(TransformerModel, self).__init__()
        self.embedding = Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.attention = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.dense_proj = Sequential([Dense(ff_dim, activation="relu"), Dense(embed_dim)])
        self.layernorm1 = LayerNormalization()
        self.layernorm2 = LayerNormalization()
        self.dropout1 = Dropout(0.1)
        self.dropout2 = Dropout(0.1)
        self.final_layer = Dense(vocab_size, activation="softmax")

    def call(self, inputs):
        x = self.embedding(inputs)
        attn_output = self.attention(x, x)
        x = self.layernorm1(x + self.dropout1(attn_output))
        ffn_output = self.dense_proj(x)
        x = self.layernorm2(x + self.dropout2(ffn_output))
        # Return the output for the last timestep only
        return self.final_layer(x[:, -1])

# Initialize the model with parameters
embed_dim = 128  # Embedding dimension
num_heads = 4    # Number of attention heads
ff_dim = 512     # Feed forward network dimension

transformer_model = TransformerModel(vocab_size, embed_dim, num_heads, ff_dim)
transformer_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

(iii) After training, use the model to generate new text by feeding it an initial seed text

In [13]:
# Step 3: Train the model with different epochs
epochs_list = [20,60,70]
for epochs in epochs_list:
    print(f"\nTraining model with {epochs} epochs:")
    transformer_model.fit(X, y, epochs=epochs, verbose=1)


Training model with 20 epochs:
Epoch 1/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 238ms/step - accuracy: 0.0000e+00 - loss: 6.9713
Epoch 2/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 147ms/step - accuracy: 0.1663 - loss: 5.9343
Epoch 3/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 139ms/step - accuracy: 0.2167 - loss: 5.2229
Epoch 4/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 183ms/step - accuracy: 0.2080 - loss: 4.7646
Epoch 5/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 137ms/step - accuracy: 0.3466 - loss: 4.2715
Epoch 6/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step - accuracy: 0.4748 - loss: 3.7517
Epoch 7/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 136ms/step - accuracy: 0.7695 - loss: 3.2668
Epoch 8/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 138ms/step - accuracy: 0.8267 - loss: 2.8777
Epoch 9/20
[1m2/2[

(iv) Experimenting and Improving the Model by large dataset and hyper tune parameter.

In [15]:
# Step 4: Text Generation from a Seed Text
def generate_text(model, tokenizer, seed_text, max_length=50):
    for _ in range(max_length):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre')
        predicted = model.predict(token_list, verbose=0)
        predicted_word_index = np.argmax(predicted, axis=-1).item()
        output_word = tokenizer.index_word.get(predicted_word_index, "")
        seed_text += " " + output_word
    return seed_text

# Generate text using a seed
seed_text = "Once upon a time"
print("\nGenerated text:", generate_text(transformer_model, tokenizer, seed_text))


Generated text: Once upon a time there was a little girl named red riding hood she loved to visit her grandmother who lived in the woods one day her mother asked her to take a basket of goodies to her grandmother on her way through the woods she met a big bad wolf who wanted to
