<a href="https://colab.research.google.com/github/Kush-Singh-26/NLP/blob/main/ChargenerateRNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Character Level RNN trained on Shakespeare's Text data

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import random

In [None]:
# Download the Tiny Shakespeare dataset
!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt -O input.txt

with open('input.txt', 'r') as f:
    text = f.read()

# Create character vocabulary
chars = sorted(list(set(text)))
vocab_size = len(chars)


--2025-05-07 06:23:17--  https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1115394 (1.1M) [text/plain]
Saving to: ‘input.txt’


2025-05-07 06:23:17 (58.8 MB/s) - ‘input.txt’ saved [1115394/1115394]



In [None]:
# Map characters to indices and vice versa
char2idx = {ch: idx for idx, ch in enumerate(chars)}
idx2char = {idx: ch for ch, idx in char2idx.items()}

# Encode the entire dataset into indices
encoded_text = torch.tensor([char2idx[c] for c in text], dtype=torch.long)

In [None]:
# Define training hyperparameters
block_size = 100
batch_size = 64
hidden_size = 256
num_layers = 2
learning_rate = 0.002
num_epochs = 500

In [None]:
# Function to generate a training batch
def get_batch(encoded_text, block_size, batch_size):
    ix = torch.randint(0, len(encoded_text) - block_size - 1, (batch_size,))
    x = torch.stack([encoded_text[i:i+block_size] for i in ix])
    y = torch.stack([encoded_text[i+1:i+block_size+1] for i in ix])
    return x, y

In [None]:
# Character-level RNN model
class CharRNN(nn.Module):
    def __init__(self, vocab_size, hidden_size, num_layers):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden=None):
        x = self.embed(x)
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out)
        return out, hidden


In [None]:
model = CharRNN(vocab_size, hidden_size, num_layers)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()

# Training Loop
print("Training started...")
for epoch in range(num_epochs):
    x_batch, y_batch = get_batch(encoded_text, block_size, batch_size)
    model.train()
    optimizer.zero_grad()
    logits, _ = model(x_batch)
    loss = loss_fn(logits.view(-1, vocab_size), y_batch.view(-1))
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 2 == 0:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}")


Training started...
Epoch 2/500, Loss: 3.7927
Epoch 4/500, Loss: 3.1972
Epoch 6/500, Loss: 2.9484
Epoch 8/500, Loss: 2.8168
Epoch 10/500, Loss: 2.6898
Epoch 12/500, Loss: 2.6244
Epoch 14/500, Loss: 2.5181
Epoch 16/500, Loss: 2.4964
Epoch 18/500, Loss: 2.4021
Epoch 20/500, Loss: 2.4052
Epoch 22/500, Loss: 2.3409
Epoch 24/500, Loss: 2.3031
Epoch 26/500, Loss: 2.2450
Epoch 28/500, Loss: 2.2645
Epoch 30/500, Loss: 2.2534
Epoch 32/500, Loss: 2.2131
Epoch 34/500, Loss: 2.1791
Epoch 36/500, Loss: 2.1933
Epoch 38/500, Loss: 2.1602
Epoch 40/500, Loss: 2.1563
Epoch 42/500, Loss: 2.0868
Epoch 44/500, Loss: 2.1349
Epoch 46/500, Loss: 2.1302
Epoch 48/500, Loss: 2.0805
Epoch 50/500, Loss: 2.0509
Epoch 52/500, Loss: 2.0432
Epoch 54/500, Loss: 2.0468
Epoch 56/500, Loss: 2.0701
Epoch 58/500, Loss: 2.0394
Epoch 60/500, Loss: 2.0341
Epoch 62/500, Loss: 2.0138
Epoch 64/500, Loss: 2.0227
Epoch 66/500, Loss: 1.9727
Epoch 68/500, Loss: 1.9826
Epoch 70/500, Loss: 1.9753
Epoch 72/500, Loss: 1.9547
Epoch 74/500

In [None]:
def generate(model, start_text='Once upon a ', length=400):
    model.eval()
    chars = list(start_text)
    input_seq = torch.tensor([char2idx[c] for c in chars], dtype=torch.long).unsqueeze(0)
    hidden = None

    for _ in range(length):
        output, hidden = model(input_seq, hidden)
        last_logits = output[0, -1, :]
        probs = torch.softmax(last_logits, dim=0)
        next_idx = torch.multinomial(probs, num_samples=1).item()
        chars.append(idx2char[next_idx])
        input_seq = torch.tensor([[next_idx]], dtype=torch.long)

    return ''.join(chars)

In [None]:
print("\nGenerated text:")
print(generate(model, start_text="Once upon a time,"))


Generated text:
Once upon a time, toes! Look
Fir my cold.

ANGELO:
Canless a honour scanch,
Now to: and dost, be have consword;
And to you am thee thine: 'tis harple: and no lift keemsions our Lair!
But or no lord against to her order stility.

Cloud you, be
Ench indost pre was,
Frugh their torthere good and the firtsh in defence: 'Coursel.

DUKE OF OFCY:
My everein any all own.

SICINIUS:
Furreason, and a heart
It
dopt anger for


In [None]:
torch.save({
    'model_state_dict': model.state_dict(),
    'char2idx': char2idx,
    'idx2char': idx2char,
    'vocab_size': vocab_size,
    'hidden_size': hidden_size,
    'num_layers': num_layers
}, 'char_rnn_full.pt')


## To perform inference

In [None]:
import torch
import torch.nn.functional as F
import random

# Load the trained checkpoint
checkpoint = torch.load('char_rnn_full.pt')

# Recreate the model architecture using saved hyperparameters
model = CharRNN(
    vocab_size=checkpoint['vocab_size'],
    hidden_size=checkpoint['hidden_size'],
    num_layers=checkpoint['num_layers']
)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

# Restore the character mappings
char2idx = checkpoint['char2idx']
idx2char = checkpoint['idx2char']

# Function to generate text using the trained model
def generate_text(model, start_text, char2idx, idx2char, length=200, temperature=1.0):
    model.eval()
    input_seq = torch.tensor([char2idx[c] for c in start_text], dtype=torch.long).unsqueeze(0)
    hidden = None

    generated = list(start_text)

    for _ in range(length):
        output, hidden = model(input_seq, hidden)
        output = output[:, -1, :]  # take the output from the last timestep
        output = output / temperature  # apply temperature for diversity
        probs = F.softmax(output, dim=-1).squeeze()
        char_idx = torch.multinomial(probs, num_samples=1).item()
        generated.append(idx2char[char_idx])
        input_seq = torch.tensor([[char_idx]])

    return ''.join(generated)

# Example usage
start_prompt = "To be"
generated_text = generate_text(model, start_prompt, char2idx, idx2char, length=300, temperature=0.8)
print(generated_text)


To be the brother, to the poor and the handly dost have let the king,
And nothing thy peward and might in the great and did thee, how have, for these your blood myself than thou tell.

KING RICHARD III:
Why are you said at him thou good say.

CATUS:
A though some like deathn and upon so is not our straig
