In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import random


In [5]:
# Check GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


Using device: cuda


In [6]:
# 1. Load and preprocess text
with open("shakespeare.txt", "r", encoding="utf-8") as f:
    text = f.read().lower()

In [7]:
# 2. Create character mapping
chars = sorted(list(set(text)))
vocab_size = len(chars)
char2idx = {ch: i for i, ch in enumerate(chars)}
idx2char = {i: ch for i, ch in enumerate(chars)}

In [8]:
# 3. Prepare dataset
seq_length = 100
step = 1

sequences = []
next_chars = []

In [9]:
for i in range(0, len(text) - seq_length, step):
    sequences.append(text[i:i+seq_length])
    next_chars.append(text[i+seq_length])

In [10]:
# Integer encoding
X = [[char2idx[c] for c in seq] for seq in sequences]
y = [char2idx[c] for c in next_chars]

# Convert to tensors
X = torch.tensor(X, dtype=torch.long)
y = torch.tensor(y, dtype=torch.long)

In [17]:
# Optional: reduce dataset size to test faster
X = X[:1000000]
y = y[:1000000]


In [18]:
# 4. Dataset and DataLoader
from torch.utils.data import TensorDataset, DataLoader

batch_size = 128
dataset = TensorDataset(X, y)
loader = DataLoader(dataset, shuffle=True, batch_size=batch_size)

In [19]:
# 5. Define the model
class CharRNN(nn.Module):
    def __init__(self, vocab_size, embed_dim=64, hidden_dim=256):
        super(CharRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, hidden=None):
        x = self.embedding(x)
        out, hidden = self.lstm(x, hidden)
        out = self.fc(out[:, -1, :])  # Only last time step
        return out, hidden
# Clip gradients to prevent explosion

model = CharRNN(vocab_size).to(device)
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()

In [20]:
# 6. Train the model
epochs = 500
for epoch in range(epochs):
    total_loss = 0
    for batch_X, batch_y in loader:
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)

        optimizer.zero_grad()
        out, _ = model(batch_X)
        loss = criterion(out, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}")

Epoch 1/500, Loss: 2096.5330
Epoch 2/500, Loss: 1709.7689
Epoch 3/500, Loss: 1582.1136
Epoch 4/500, Loss: 1502.0764
Epoch 5/500, Loss: 1447.0344
Epoch 6/500, Loss: 1405.9842
Epoch 7/500, Loss: 1372.4659
Epoch 8/500, Loss: 1345.4979
Epoch 9/500, Loss: 1320.5332
Epoch 10/500, Loss: 1300.5979
Epoch 11/500, Loss: 1281.4838
Epoch 12/500, Loss: 1265.1940
Epoch 13/500, Loss: 1250.0177
Epoch 14/500, Loss: 1235.6572
Epoch 15/500, Loss: 1222.2782
Epoch 16/500, Loss: 1209.8380
Epoch 17/500, Loss: 1198.7786
Epoch 18/500, Loss: 1187.5709
Epoch 19/500, Loss: 1178.4263
Epoch 20/500, Loss: 1168.2863
Epoch 21/500, Loss: 1158.6445
Epoch 22/500, Loss: 1149.2485
Epoch 23/500, Loss: 1140.9859
Epoch 24/500, Loss: 1132.9825
Epoch 25/500, Loss: 1124.9031
Epoch 26/500, Loss: 1116.9243
Epoch 27/500, Loss: 1109.8027
Epoch 28/500, Loss: 1102.2315
Epoch 29/500, Loss: 1095.0580
Epoch 30/500, Loss: 1087.7547
Epoch 31/500, Loss: 1081.3802
Epoch 32/500, Loss: 1074.9526
Epoch 33/500, Loss: 1068.3567
Epoch 34/500, Loss:

In [21]:
# Save the model after training
torch.save(model.state_dict(), "char_rnn_model.pth")
print("Model saved successfully!")


Model saved successfully!


In [None]:
# 7. Generate text
def generate_text(model, start_seq, gen_length=300, temperature=1.0):
    model.eval()
    input_seq = [char2idx.get(c, 0) for c in start_seq.lower()]
    input_seq = torch.tensor(input_seq, dtype=torch.long).unsqueeze(0).to(device)

    generated = start_seq

    hidden = None
    for _ in range(gen_length):
        out, hidden = model(input_seq, hidden)
        out = out.squeeze().div(temperature).exp()
        prob = out / torch.sum(out)
        next_idx = torch.multinomial(prob, 1).item()
        next_char = idx2char[next_idx]
        generated += next_char

        input_seq = torch.cat([input_seq[:, 1:], torch.tensor([[next_idx]], device=device)], dim=1)

    return generated

In [None]:
seed = "to be or not to be, "
print(generate_text(model, seed, gen_length=300, temperature=0.11))

NameError: name 'model' is not defined