Example text data (you can replace this with any larger corpus) text = """ Once upon a time, there was a little girl named Red Riding Hood. She loved to visit her grandmother, who lived in the woods. One day, her mother asked her to take a basket of goodies to her grandmother. On her way through the woods, she met a big bad wolf who wanted to eat her.

(i) Build the Transformer Model on above dataset

In [1]:
!pip install torchtext




In [2]:
!pip uninstall -y torch torchtext
!pip install torch==2.0.1 torchtext==0.15.2


Found existing installation: torch 2.0.1
Uninstalling torch-2.0.1:
  Successfully uninstalled torch-2.0.1
Found existing installation: torchtext 0.15.2
Uninstalling torchtext-0.15.2:
  Successfully uninstalled torchtext-0.15.2
Collecting torch==2.0.1
  Using cached torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl.metadata (24 kB)
Collecting torchtext==0.15.2
  Using cached torchtext-0.15.2-cp310-cp310-manylinux1_x86_64.whl.metadata (7.4 kB)
Using cached torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl (619.9 MB)
Using cached torchtext-0.15.2-cp310-cp310-manylinux1_x86_64.whl (2.0 MB)
Installing collected packages: torch, torchtext
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchaudio 2.5.0+cu121 requires torch==2.5.0, but you have torch 2.0.1 which is incompatible.
torchvision 0.20.0+cu121 requires torch==2.5.0, but you have torch 2.0.1 which is incompatibl

In [3]:
from collections import Counter
import re

# Preprocess text by making lowercase and removing punctuation
text = """Once upon a time, there was a little girl named Red Riding Hood. She loved to visit her grandmother, who lived in the woods. One day, her mother asked her to take a basket of goodies to her grandmother. On her way through the woods, she met a big bad wolf who wanted to eat her."""
text = text.lower()
words = re.findall(r'\b\w+\b', text)

# Count word frequency
word_counts = Counter(words)

# Display the 10 most common words
print(word_counts.most_common(10))


[('her', 6), ('a', 4), ('to', 4), ('she', 2), ('grandmother', 2), ('who', 2), ('the', 2), ('woods', 2), ('once', 1), ('upon', 1)]


In [4]:
import spacy

# Load the pre-trained English model
nlp = spacy.load("en_core_web_sm")

# Process the text
doc = nlp(text)

# Display named entities
for entity in doc.ents:
    print(f"{entity.text} ({entity.label_})")


one day (DATE)


In [7]:
from textblob import TextBlob

# Create a TextBlob object
blob = TextBlob(text)

# Get sentiment polarity (-1 to 1, where 1 is very positive)
sentiment = blob.sentiment.polarity
print(f"Sentiment polarity: {sentiment}")


Sentiment polarity: -0.03749999999999998


In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from torch.utils.data import DataLoader, Dataset

# Sample dataset text
text = """Once upon a time, there was a little girl named Red Riding Hood. She loved to visit her grandmother, who lived in the woods. One day, her mother asked her to take a basket of goodies to her grandmother. On her way through the woods, she met a big bad wolf who wanted to eat her."""

# Parameters
embed_size = 64
num_heads = 2
hidden_size = 128
num_layers = 2
seq_length = 10  # sequence length for training
batch_size = 2
num_epochs = 10
learning_rate = 0.001

# Step 1: Data Preparation
tokenizer = get_tokenizer("basic_english")
tokens = tokenizer(text)
vocab = build_vocab_from_iterator([tokens], specials=["<pad>", "<sos>", "<eos>"])
vocab.set_default_index(vocab["<pad>"])

# Encode text to integers
encoded_text = [vocab["<sos>"]] + [vocab[token] for token in tokens] + [vocab["<eos>"]]

# Dataset and DataLoader
class TextDataset(Dataset):
    def __init__(self, encoded_text, seq_length):
        self.data = []
        for i in range(0, len(encoded_text) - seq_length):
            self.data.append((encoded_text[i:i+seq_length], encoded_text[i+1:i+1+seq_length]))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx][0]), torch.tensor(self.data[idx][1])

dataset = TextDataset(encoded_text, seq_length)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Step 2: Transformer Model Design
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, embed_size, num_heads, hidden_size, num_layers):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.transformer = nn.Transformer(
            embed_size, num_heads, num_layers, num_layers, hidden_size
        )
        self.fc_out = nn.Linear(embed_size, vocab_size)

    def forward(self, x):
        batch_size, seq_length = x.size()
        # Create positional encoding dynamically based on input sequence length and batch size
        position = torch.arange(seq_length).unsqueeze(1).expand(seq_length, batch_size).to(x.device)
        position = self.embedding(position)  # (seq_length, batch_size, embed_size)

        x = self.embedding(x) + position.permute(1, 0, 2)  # (batch_size, seq_length, embed_size)
        x = x.permute(1, 0, 2)  # (seq_length, batch_size, embed_size) for transformer input
        x = self.transformer(x, x)
        x = x.permute(1, 0, 2)  # (batch_size, seq_length, embed_size)
        out = self.fc_out(x)
        return out


# Initialize model, loss, and optimizer
model = TransformerModel(len(vocab), embed_size, num_heads, hidden_size, num_layers)
criterion = nn.CrossEntropyLoss(ignore_index=vocab["<pad>"])
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Step 3: Training the Model
model.train()
for epoch in range(num_epochs):
    total_loss = 0
    for src, tgt in loader:
        optimizer.zero_grad()
        output = model(src)
        loss = criterion(output.view(-1, len(vocab)), tgt.view(-1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(loader)}")

# Testing the Model (Generating a sequence)
model.eval()
start_token = torch.tensor([[vocab["<sos>"]]], dtype=torch.long)
generated_text = start_token

for _ in range(20):  # Generate 20 tokens
    with torch.no_grad():
        output = model(generated_text)
        next_token = output.argmax(dim=-1)[:, -1].unsqueeze(1)
        generated_text = torch.cat((generated_text, next_token), dim=1)

# Convert generated indices back to words
generated_words = [vocab.lookup_token(token.item()) for token in generated_text.squeeze()]
print("Generated text:", " ".join(generated_words))


Epoch 1/10, Loss: 3.6525793501308987
Epoch 2/10, Loss: 3.073973459856851
Epoch 3/10, Loss: 2.378192288534982
Epoch 4/10, Loss: 1.7735906158174788
Epoch 5/10, Loss: 1.3291738331317902
Epoch 6/10, Loss: 0.9994254857301712
Epoch 7/10, Loss: 0.7574767470359802
Epoch 8/10, Loss: 0.5657120038356099
Epoch 9/10, Loss: 0.4475465363689831
Epoch 10/10, Loss: 0.3877304904162884
Generated text: <sos> wanted wanted to to her on her to her grandmother woods her . her wolf who bad , her named


(ii) Train the model using 20, 60, 70 epochs

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from torch.utils.data import DataLoader, Dataset

# Sample dataset text
text = """Once upon a time, there was a little girl named Red Riding Hood. She loved to visit her grandmother, who lived in the woods. One day, her mother asked her to take a basket of goodies to her grandmother. On her way through the woods, she met a big bad wolf who wanted to eat her."""

# Parameters
embed_size = 64
num_heads = 2
hidden_size = 128
num_layers = 2
seq_length = 10  # sequence length for training
batch_size = 2
learning_rate = 0.001

# Step 1: Data Preparation
tokenizer = get_tokenizer("basic_english")
tokens = tokenizer(text)
vocab = build_vocab_from_iterator([tokens], specials=["<pad>", "<sos>", "<eos>"])
vocab.set_default_index(vocab["<pad>"])

# Encode text to integers
encoded_text = [vocab["<sos>"]] + [vocab[token] for token in tokens] + [vocab["<eos>"]]

# Dataset and DataLoader
class TextDataset(Dataset):
    def __init__(self, encoded_text, seq_length):
        self.data = []
        for i in range(0, len(encoded_text) - seq_length):
            self.data.append((encoded_text[i:i+seq_length], encoded_text[i+1:i+1+seq_length]))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx][0]), torch.tensor(self.data[idx][1])

dataset = TextDataset(encoded_text, seq_length)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Step 2: Transformer Model Design
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, embed_size, num_heads, hidden_size, num_layers):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.transformer = nn.Transformer(
            embed_size, num_heads, num_layers, num_layers, hidden_size
        )
        self.fc_out = nn.Linear(embed_size, vocab_size)

    def forward(self, x):
        batch_size, seq_length = x.size()
        # Create positional encoding dynamically based on input sequence length and batch size
        position = torch.arange(seq_length).unsqueeze(1).expand(seq_length, batch_size).to(x.device)
        position = self.embedding(position)  # (seq_length, batch_size, embed_size)

        x = self.embedding(x) + position.permute(1, 0, 2)  # (batch_size, seq_length, embed_size)
        x = x.permute(1, 0, 2)  # (seq_length, batch_size, embed_size) for transformer input
        x = self.transformer(x, x)
        x = x.permute(1, 0, 2)  # (batch_size, seq_length, embed_size)
        out = self.fc_out(x)
        return out

# Function to train the model
def train_model(model, loader, num_epochs):
    criterion = nn.CrossEntropyLoss(ignore_index=vocab["<pad>"])
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for src, tgt in loader:
            optimizer.zero_grad()
            output = model(src)
            loss = criterion(output.view(-1, len(vocab)), tgt.view(-1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(loader)}")

# Initialize model
model = TransformerModel(len(vocab), embed_size, num_heads, hidden_size, num_layers)

# Step 3: Train for 20, 60, and 70 epochs
print("Training for 20 epochs:")
train_model(model, loader, num_epochs=20)

print("\nTraining for 60 epochs:")
train_model(model, loader, num_epochs=60)

print("\nTraining for 70 epochs:")
train_model(model, loader, num_epochs=70)

# Testing the Model (Generating a sequence)
model.eval()
start_token = torch.tensor([[vocab["<sos>"]]], dtype=torch.long)
generated_text = start_token

for _ in range(20):  # Generate 20 tokens
    with torch.no_grad():
        output = model(generated_text)
        next_token = output.argmax(dim=-1)[:, -1].unsqueeze(1)
        generated_text = torch.cat((generated_text, next_token), dim=1)

# Convert generated indices back to words
generated_words = [vocab.lookup_token(token.item()) for token in generated_text.squeeze()]
print("Generated text:", " ".join(generated_words))


Training for 20 epochs:
Epoch 1/20, Loss: 3.537820041179657
Epoch 2/20, Loss: 2.786639698914119
Epoch 3/20, Loss: 2.149489883865629
Epoch 4/20, Loss: 1.573677999632699
Epoch 5/20, Loss: 1.1476625459534782
Epoch 6/20, Loss: 0.8520970003945487
Epoch 7/20, Loss: 0.6490336369190898
Epoch 8/20, Loss: 0.5016217093382563
Epoch 9/20, Loss: 0.4063738211989403
Epoch 10/20, Loss: 0.32050611451268196
Epoch 11/20, Loss: 0.2712828123143741
Epoch 12/20, Loss: 0.23149434556918486
Epoch 13/20, Loss: 0.1911866326949426
Epoch 14/20, Loss: 0.1644972094467708
Epoch 15/20, Loss: 0.15363820189876215
Epoch 16/20, Loss: 0.13111823770616735
Epoch 17/20, Loss: 0.1150105729965227
Epoch 18/20, Loss: 0.09300358859556061
Epoch 19/20, Loss: 0.08949809002556972
Epoch 20/20, Loss: 0.07557534852198192

Training for 60 epochs:
Epoch 1/60, Loss: 0.10959914858852114
Epoch 2/60, Loss: 0.0807519799896649
Epoch 3/60, Loss: 0.08737738942727447
Epoch 4/60, Loss: 0.10958111179726464
Epoch 5/60, Loss: 0.11494214300598417
Epoch 6/

(iii) After training, use the model to generate new text by feeding it an initial seed text

In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from torch.utils.data import DataLoader, Dataset

# Sample dataset text
text = """Once upon a time, there was a little girl named Red Riding Hood. She loved to visit her grandmother, who lived in the woods. One day, her mother asked her to take a basket of goodies to her grandmother. On her way through the woods, she met a big bad wolf who wanted to eat her."""

# Parameters
embed_size = 64
num_heads = 2
hidden_size = 128
num_layers = 2
seq_length = 10  # sequence length for training
batch_size = 2
learning_rate = 0.001

# Step 1: Data Preparation
tokenizer = get_tokenizer("basic_english")
tokens = tokenizer(text)
vocab = build_vocab_from_iterator([tokens], specials=["<pad>", "<sos>", "<eos>"])
vocab.set_default_index(vocab["<pad>"])

# Encode text to integers
encoded_text = [vocab["<sos>"]] + [vocab[token] for token in tokens] + [vocab["<eos>"]]

# Dataset and DataLoader
class TextDataset(Dataset):
    def __init__(self, encoded_text, seq_length):
        self.data = []
        for i in range(0, len(encoded_text) - seq_length):
            self.data.append((encoded_text[i:i+seq_length], encoded_text[i+1:i+1+seq_length]))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx][0]), torch.tensor(self.data[idx][1])

dataset = TextDataset(encoded_text, seq_length)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Step 2: Transformer Model Design
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, embed_size, num_heads, hidden_size, num_layers):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.transformer = nn.Transformer(
            embed_size, num_heads, num_layers, num_layers, hidden_size
        )
        self.fc_out = nn.Linear(embed_size, vocab_size)

    def forward(self, x):
        batch_size, seq_length = x.size()
        position = torch.arange(seq_length).unsqueeze(1).expand(seq_length, batch_size).to(x.device)
        position = self.embedding(position)  # (seq_length, batch_size, embed_size)

        x = self.embedding(x) + position.permute(1, 0, 2)  # (batch_size, seq_length, embed_size)
        x = x.permute(1, 0, 2)  # (seq_length, batch_size, embed_size) for transformer input
        x = self.transformer(x, x)
        x = x.permute(1, 0, 2)  # (batch_size, seq_length, embed_size)
        out = self.fc_out(x)
        return out

# Function to train the model
def train_model(model, loader, num_epochs):
    criterion = nn.CrossEntropyLoss(ignore_index=vocab["<pad>"])
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for src, tgt in loader:
            optimizer.zero_grad()
            output = model(src)
            loss = criterion(output.view(-1, len(vocab)), tgt.view(-1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(loader)}")

# Initialize model
model = TransformerModel(len(vocab), embed_size, num_heads, hidden_size, num_layers)

# Step 3: Train for 20, 60, and 70 epochs
print("Training for 20 epochs:")
train_model(model, loader, num_epochs=20)

print("\nTraining for 60 epochs:")
train_model(model, loader, num_epochs=60)

print("\nTraining for 70 epochs:")
train_model(model, loader, num_epochs=70)

# Function to generate text given a seed
def generate_text(model, seed_text, max_length=20):
    model.eval()

    # Tokenize and encode the seed text
    seed_tokens = tokenizer(seed_text)
    seed_indices = [vocab["<sos>"]] + [vocab[token] for token in seed_tokens]
    input_tensor = torch.tensor(seed_indices).unsqueeze(0)  # (1, seq_length + 1)

    generated_text = input_tensor

    for _ in range(max_length):
        with torch.no_grad():
            output = model(generated_text)
            next_token = output.argmax(dim=-1)[:, -1].unsqueeze(1)  # Get last token prediction
            generated_text = torch.cat((generated_text, next_token), dim=1)  # Append predicted token

    # Convert generated indices back to words
    generated_words = [vocab.lookup_token(token.item()) for token in generated_text.squeeze()]
    return " ".join(generated_words)

# Example usage of the generate_text function
seed_input = "Once upon a time"
generated_output = generate_text(model, seed_input)
print("\nGenerated text from seed input:")
print(generated_output)


Training for 20 epochs:
Epoch 1/20, Loss: 3.6034443037850514
Epoch 2/20, Loss: 2.968920741762434
Epoch 3/20, Loss: 2.2996141484805515
Epoch 4/20, Loss: 1.713986839566912
Epoch 5/20, Loss: 1.2770515148128783
Epoch 6/20, Loss: 0.9161859729460308
Epoch 7/20, Loss: 0.7003728513206754
Epoch 8/20, Loss: 0.5369263525520053
Epoch 9/20, Loss: 0.43426841497421265
Epoch 10/20, Loss: 0.3448526268558843
Epoch 11/20, Loss: 0.283246819462095
Epoch 12/20, Loss: 0.2381653248199395
Epoch 13/20, Loss: 0.202905303931662
Epoch 14/20, Loss: 0.18809998274913856
Epoch 15/20, Loss: 0.18738360357071673
Epoch 16/20, Loss: 0.16752104966768197
Epoch 17/20, Loss: 0.14597733997340714
Epoch 18/20, Loss: 0.10569844833974328
Epoch 19/20, Loss: 0.09927455921258245
Epoch 20/20, Loss: 0.07589996632720743

Training for 60 epochs:
Epoch 1/60, Loss: 0.10270796649690185
Epoch 2/60, Loss: 0.10325740404160959
Epoch 3/60, Loss: 0.11446882079222373
Epoch 4/60, Loss: 0.10268178582191467
Epoch 5/60, Loss: 0.07940549217164516
Epoch 

(iv) Experimenting and Improving the Model by large dataset and hyper tune parameter.

In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from torch.utils.data import DataLoader, Dataset

# Sample dataset as a string
sample_data = """Once upon a time, in a faraway land, there lived a young girl named Red Riding Hood.
She often visited her grandmother who lived deep in the woods.
One day, her mother asked her to take a basket of goodies to her grandmother.
As she walked through the forest, she encountered a big bad wolf.
The wolf had a cunning plan to trick her, but Red Riding Hood was clever.
She eventually reached her grandmother's house, where a surprise awaited her."""

# Load the sample data into a text variable
text = sample_data

# Parameters for experimentation
embed_size = 128  # Experiment with different sizes
num_heads = 4     # Experiment with different number of heads
hidden_size = 256 # Adjust hidden size
num_layers = 4    # Increase layers for deeper models
seq_length = 20   # Increase sequence length if necessary
batch_size = 16   # Experiment with different batch sizes
learning_rate = 0.0005  # Adjust learning rate
num_epochs = 20  # Set the number of epochs for training

# Data Preparation
tokenizer = get_tokenizer("basic_english")
tokens = tokenizer(text)
vocab = build_vocab_from_iterator([tokens], specials=["<pad>", "<sos>", "<eos>"])
vocab.set_default_index(vocab["<pad>"])

# Encode text to integers
encoded_text = [vocab["<sos>"]] + [vocab[token] for token in tokens] + [vocab["<eos>"]]

# Custom Dataset Class for Larger Dataset
class TextDataset(Dataset):
    def __init__(self, encoded_text, seq_length):
        self.data = []
        for i in range(0, len(encoded_text) - seq_length):
            self.data.append((encoded_text[i:i + seq_length], encoded_text[i + 1:i + 1 + seq_length]))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx][0]), torch.tensor(self.data[idx][1])

dataset = TextDataset(encoded_text, seq_length)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Transformer Model
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, embed_size, num_heads, hidden_size, num_layers):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.transformer = nn.Transformer(embed_size, num_heads, num_layers, num_layers, hidden_size)
        self.fc_out = nn.Linear(embed_size, vocab_size)

    def forward(self, x):
        batch_size, seq_length = x.size()
        position = torch.arange(seq_length).unsqueeze(1).expand(seq_length, batch_size).to(x.device)
        position = self.embedding(position)  # (seq_length, batch_size, embed_size)

        x = self.embedding(x) + position.permute(1, 0, 2)  # (batch_size, seq_length, embed_size)
        x = x.permute(1, 0, 2)  # (seq_length, batch_size, embed_size)
        x = self.transformer(x, x)
        x = x.permute(1, 0, 2)  # (batch_size, seq_length, embed_size)
        out = self.fc_out(x)
        return out

# Function to train the model with early stopping
def train_model(model, loader, num_epochs, patience=5):
    criterion = nn.CrossEntropyLoss(ignore_index=vocab["<pad>"])
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    best_loss = float('inf')
    patience_counter = 0

    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for src, tgt in loader:
            optimizer.zero_grad()
            output = model(src)
            loss = criterion(output.view(-1, len(vocab)), tgt.view(-1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(loader)
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}")

        # Early stopping logic
        if avg_loss < best_loss:
            best_loss = avg_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered.")
                break

# Initialize model
model = TransformerModel(len(vocab), embed_size, num_heads, hidden_size, num_layers)

# Train model
train_model(model, loader, num_epochs)

# Function to generate text given a seed
def generate_text(model, seed_text, max_length=20):
    model.eval()

    # Tokenize and encode the seed text
    seed_tokens = tokenizer(seed_text)
    seed_indices = [vocab["<sos>"]] + [vocab[token] for token in seed_tokens]
    input_tensor = torch.tensor(seed_indices).unsqueeze(0)  # (1, seq_length + 1)

    generated_text = input_tensor

    for _ in range(max_length):
        with torch.no_grad():
            output = model(generated_text)
            next_token = output.argmax(dim=-1)[:, -1].unsqueeze(1)  # Get last token prediction
            generated_text = torch.cat((generated_text, next_token), dim=1)  # Append predicted token

    # Convert generated indices back to words
    generated_words = [vocab.lookup_token(token.item()) for token in generated_text.squeeze()]
    return " ".join(generated_words)

# Example usage of the generate_text function
seed_input = "Once upon a time"
generated_output = generate_text(model, seed_input)
print("\nGenerated text from seed input:")
print(generated_output)


Epoch 1/20, Loss: 4.0168
Epoch 2/20, Loss: 3.7676
Epoch 3/20, Loss: 3.5672
Epoch 4/20, Loss: 3.3141
Epoch 5/20, Loss: 3.0366
Epoch 6/20, Loss: 2.7172
Epoch 7/20, Loss: 2.4321
Epoch 8/20, Loss: 2.1609
Epoch 9/20, Loss: 1.9021
Epoch 10/20, Loss: 1.6596
Epoch 11/20, Loss: 1.4840
Epoch 12/20, Loss: 1.3177
Epoch 13/20, Loss: 1.1662
Epoch 14/20, Loss: 1.0351
Epoch 15/20, Loss: 0.9366
Epoch 16/20, Loss: 0.8411
Epoch 17/20, Loss: 0.7627
Epoch 18/20, Loss: 0.6832
Epoch 19/20, Loss: 0.6418
Epoch 20/20, Loss: 0.5861

Generated text from seed input:
<sos> once upon a time , a faraway land , she encountered a faraway land , a faraway land , there lived wolf of bad
