# Assignment 4: Simple text generation model 
**Student Name:** Lakshit Gupta  
**Enrollment No:** E23CSEU0992  
**Date:** February 3, 2026

## Objective
The objective of this lab is to understand and implement a simple text generation model  

# Component–I: RNN / LSTM Based Text Generation

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np


In [2]:
text = """
artificial intelligence is transforming modern society
it is used in healthcare finance education and transportation
machine learning allows systems to improve automatically with experience
data plays a critical role in training intelligent systems
large datasets help models learn complex patterns
deep learning uses multi layer neural networks
"""


In [3]:
words = text.lower().split()
vocab = sorted(set(words))

word_to_idx = {word: i for i, word in enumerate(vocab)}
idx_to_word = {i: word for word, i in word_to_idx.items()}
vocab_size = len(vocab)


In [4]:
seq_length = 4
X, y = [], []

for i in range(len(words) - seq_length):
    X.append([word_to_idx[w] for w in words[i:i+seq_length]])
    y.append(word_to_idx[words[i+seq_length]])

X = torch.tensor(X)
y = torch.tensor(y)


In [5]:
class LSTMTextGen(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        x = self.embedding(x)
        out, _ = self.lstm(x)
        out = out[:, -1, :]
        return self.fc(out)


In [6]:
model = LSTMTextGen(vocab_size, 64, 128)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 200
for epoch in range(epochs):
    optimizer.zero_grad()
    output = model(X)
    loss = criterion(output, y)
    loss.backward()
    optimizer.step()

    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")


Epoch 0, Loss: 3.7803
Epoch 20, Loss: 2.9147
Epoch 40, Loss: 1.2441
Epoch 60, Loss: 0.2218
Epoch 80, Loss: 0.0627
Epoch 100, Loss: 0.0320
Epoch 120, Loss: 0.0211
Epoch 140, Loss: 0.0155
Epoch 160, Loss: 0.0120
Epoch 180, Loss: 0.0097


In [7]:
def generate_text(seed_text, num_words):
    model.eval()
    words_list = seed_text.lower().split()

    for _ in range(num_words):
        seq = torch.tensor([[word_to_idx[w] for w in words_list[-seq_length:]]])
        with torch.no_grad():
            output = model(seq)
        predicted = torch.argmax(output).item()
        words_list.append(idx_to_word[predicted])

    return " ".join(words_list)

print(generate_text("artificial intelligence is transforming", 8))


artificial intelligence is transforming modern society it is used in healthcare finance


# Component–II: Transformer Based Text Generation

In [8]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=500):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * -(np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.pe = pe.unsqueeze(0)

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]


In [9]:
class TransformerTextGen(nn.Module):
    def __init__(self, vocab_size, embed_size, num_heads, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.pos_encoder = PositionalEncoding(embed_size)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=embed_size,
            nhead=num_heads,
            dim_feedforward=hidden_dim
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=2)
        self.fc = nn.Linear(embed_size, vocab_size)

    def forward(self, x):
        x = self.embedding(x)
        x = self.pos_encoder(x)
        x = self.transformer(x)
        x = x.mean(dim=1)
        return self.fc(x)


In [10]:
transformer_model = TransformerTextGen(vocab_size, 64, 2, 128)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(transformer_model.parameters(), lr=0.001)

epochs = 200
for epoch in range(epochs):
    optimizer.zero_grad()
    output = transformer_model(X)
    loss = criterion(output, y)
    loss.backward()
    optimizer.step()

    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")


Epoch 0, Loss: 3.8751


  self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=2)


Epoch 20, Loss: 2.7677
Epoch 40, Loss: 1.8223
Epoch 60, Loss: 1.1853
Epoch 80, Loss: 0.7303
Epoch 100, Loss: 0.4388
Epoch 120, Loss: 0.2622
Epoch 140, Loss: 0.1751
Epoch 160, Loss: 0.1169
Epoch 180, Loss: 0.0797


In [11]:
def generate_text_transformer(seed_text, num_words):
    transformer_model.eval()
    words_list = seed_text.lower().split()

    for _ in range(num_words):
        seq = torch.tensor([[word_to_idx[w] for w in words_list[-seq_length:]]])
        with torch.no_grad():
            output = transformer_model(seq)
        predicted = torch.argmax(output).item()
        words_list.append(idx_to_word[predicted])

    return " ".join(words_list)

print(generate_text_transformer("deep learning uses", 8))


deep learning uses multi layer neural networks networks deep networks deep
