# Setup

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Prepare Data

In [2]:
# Sample text
text = "hello"

# Create character mappings
chars = sorted(list(set(text)))
char_to_idx = {ch: i for i, ch in enumerate(chars)}
idx_to_char = {i: ch for i, ch in enumerate(chars)}

# Encode input and target
input_seq = [char_to_idx[ch] for ch in text[:-1]]   # "hell"
target_seq = [char_to_idx[ch] for ch in text[1:]]   # "ello"

# Convert to tensors
input_tensor = torch.tensor(input_seq).view(1, -1, 1).float().to(device)
target_tensor = torch.tensor(target_seq).view(1, -1).long().to(device)

vocab_size = len(chars)


# GRU Model

In [3]:
class GRUTextGen(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUTextGen, self).__init__()
        self.hidden_size = hidden_size
        self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, h=None):
        out, h = self.gru(x, h)
        out = self.fc(out)
        return out, h


# Train Model

In [4]:
model = GRUTextGen(input_size=1, hidden_size=16, output_size=vocab_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Training loop
for epoch in range(300):
    output, _ = model(input_tensor)
    loss = criterion(output.view(-1, vocab_size), target_tensor.view(-1))

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 50 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")


Epoch 0, Loss: 1.2858
Epoch 50, Loss: 0.0513
Epoch 100, Loss: 0.0036
Epoch 150, Loss: 0.0018
Epoch 200, Loss: 0.0011
Epoch 250, Loss: 0.0008


# Text Generation

In [5]:
def generate_text(model, start_char, length=5):
    model.eval()
    chars_generated = [start_char]
    input_char = torch.tensor([[char_to_idx[start_char]]], dtype=torch.float32).view(1, 1, 1).to(device)
    hidden = None

    for _ in range(length - 1):
        output, hidden = model(input_char, hidden)
        predicted_idx = output.argmax(dim=2)[:, -1].item()
        predicted_char = idx_to_char[predicted_idx]
        chars_generated.append(predicted_char)

        # Next input is current output
        input_char = torch.tensor([[predicted_idx]], dtype=torch.float32).view(1, 1, 1).to(device)

    return ''.join(chars_generated)

print("\nGenerated Text:", generate_text(model, start_char='h', length=5))



Generated Text: hello
