In [2]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset

In [3]:
with open('Dataset.txt', 'r', encoding='utf-8') as f:
    text = f.read()
chars = list(set(text))
char_to_idx = {c: i for i, c in enumerate(chars)}
idx_to_char = {i: c for i, c in enumerate(chars)}
vocab_size = len(chars)
print(f"Vocabulary size: {vocab_size}")

Vocabulary size: 57


In [4]:
block_size = 256  # Context length
data = torch.tensor([char_to_idx[c] for c in text], dtype=torch.long)
n = int(0.9*len(data))  # 90% train, 10% val
train_data = data[:n]
val_data = data[n:]

In [5]:
def get_batch(split):
    data = train_data if split == 'train' else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    return x, y

batch_size = 32

In [18]:
class CharRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, n_layers=1):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.GRU(embedding_dim, hidden_dim, n_layers)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, h):
        embed = self.embedding(x)  # Shape: (seq_len, batch_size, embedding_dim)
        out, h = self.rnn(embed, h)
        out = self.fc(out.view(-1, out.size(2)))  # Flatten for dense layer
        return out, h

    def init_hidden(self, batch_size):
        return torch.zeros(self.rnn.num_layers, batch_size, self.rnn.hidden_size).to('cuda')

In [19]:
model = CharRNN(
    vocab_size=vocab_size,
    embedding_dim=16,
    hidden_dim=128,
    n_layers=2
)
model = model.to('cuda')  # Use GPU
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [20]:
epochs = 10
for epoch in range(epochs):
    hidden = model.init_hidden(batch_size).to('cuda')
    for step in range(100):  # Train for 100 steps per epoch
        inputs, targets = get_batch('train')
        inputs, targets = inputs.to('cuda'), targets.to('cuda')

        # Forward pass
        hidden = hidden.detach()
        logits, hidden = model(inputs.T, hidden)
        loss = criterion(logits, targets.view(-1))

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 10 == 0:
            print(f"Epoch {epoch+1}/{epochs}, Step {step}, Loss: {loss.item():.4f}")

# Save the trained model
#torch.save(model.state_dict(), 'char_rnn.pth')

Epoch 1/10, Step 0, Loss: 4.0280
Epoch 1/10, Step 10, Loss: 3.3424
Epoch 1/10, Step 20, Loss: 3.0729
Epoch 1/10, Step 30, Loss: 3.0374
Epoch 1/10, Step 40, Loss: 3.0384
Epoch 1/10, Step 50, Loss: 3.0433
Epoch 1/10, Step 60, Loss: 3.0303
Epoch 1/10, Step 70, Loss: 3.0305
Epoch 1/10, Step 80, Loss: 3.0265
Epoch 1/10, Step 90, Loss: 3.0338
Epoch 2/10, Step 0, Loss: 3.0442
Epoch 2/10, Step 10, Loss: 3.0412
Epoch 2/10, Step 20, Loss: 3.0396
Epoch 2/10, Step 30, Loss: 3.0200
Epoch 2/10, Step 40, Loss: 3.0410
Epoch 2/10, Step 50, Loss: 3.0388
Epoch 2/10, Step 60, Loss: 3.0359
Epoch 2/10, Step 70, Loss: 3.0292
Epoch 2/10, Step 80, Loss: 3.0424
Epoch 2/10, Step 90, Loss: 3.0368
Epoch 3/10, Step 0, Loss: 3.0429
Epoch 3/10, Step 10, Loss: 3.0224
Epoch 3/10, Step 20, Loss: 3.0351
Epoch 3/10, Step 30, Loss: 3.0334
Epoch 3/10, Step 40, Loss: 3.0509
Epoch 3/10, Step 50, Loss: 3.0376
Epoch 3/10, Step 60, Loss: 3.0384
Epoch 3/10, Step 70, Loss: 3.0426
Epoch 3/10, Step 80, Loss: 3.0391
Epoch 3/10, Step 

In [22]:
import numpy as np
def generate_text(model, start_str, max_length=500):
    model.eval()
    with torch.no_grad():
        x = torch.tensor([char_to_idx[c] for c in start_str], dtype=torch.long).view(-1, 1).to('cuda')
        hidden = model.init_hidden(1).to('cuda')
        generated = start_str

        for _ in range(max_length - len(start_str)):
            input_seq = x[-block_size:] if len(x) >= block_size else x
            logits, hidden = model(input_seq, hidden)
            prob = nn.functional.softmax(logits[-1], dim=0).cpu().numpy()
            next_char_idx = np.random.choice(len(prob), p=prob)
            next_char = idx_to_char[next_char_idx]
            generated += next_char
            x = torch.cat((x, torch.tensor([[next_char_idx]]).to('cuda')), dim=0)

    return generated

print(generate_text(model, "Once upon a time", max_length=200))

Once upon a timeezo vgdh faednRinrerro dti oeaOe f slrhhestrhra edes eegketkihe teeRf rh rtsedo, tw til tursTalWTenusf    eopgtteimo gpsgsr . ee it h  ia ukag.thlu iico n ttsselegiA nhn albeeatirhteve
