In [60]:
import torch
import requests
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader


# Check for CUDA support and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [61]:
# Step 1: Download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text  # This is the entire text data

# Step 2: Prepare the dataset
sequence_length = 50
# Create a character mapping to integers
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

# Create sequences and targets
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

# Convert lists to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Step 3: Create a dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

# Step 4: Create data loaders
batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

In [None]:
# Defining the RNN model
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        #This line takes the input tensor x, which contains indices of characters, and passes it through an embedding layer (self.embedding). 
        #The embedding layer converts these indices into dense vectors of fixed size. 
        #These vectors are learned during training and can capture semantic similarities between characters. 
        #The result is a higher-dimensional representation of the input sequence, where each character index is replaced by its corresponding embedding vector. 
        self.embedding = nn.Embedding(input_size, hidden_size)
        # self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        # self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        #The RNN layer returns two outputs: 
        #1- the output tensor containing the output of the RNN at each time step for each sequence in the batch, 
        #2-the hidden state (_) of the last time step (which is not used in this line, hence the underscore).
        output, _ = self.rnn(embedded)
        #The RNN's output contains the outputs for every time step, 
        #but for this task, we're only interested in the output of the last time step because we're predicting the next character after the sequence. 
        #output[:, -1, :] selects the last time step's output for every sequence in the batch (-1 indexes the last item in Python).
        output = self.fc(output[:, -1, :])  # Get the output of the last RNN cell
        return output

def accuracy_calc(model, dataloader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for seqs, targets in dataloader:
            seqs, targets = seqs.to(device), targets.to(device)
            outputs = model(seqs)
            _, predicted = torch.max(outputs, dim=1)
            total += targets.shape[0]
            correct += int((predicted == targets).sum())

    accuracy = correct / total  
    return accuracy

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader, test_loader):
    for epoch in range(1, n_epochs + 1):
        model.train()
        train_loss = 0.0
        for seqs, targets in train_loader:
            seqs, targets = seqs.to(device), targets.to(device)
            outputs = model(seqs)
            loss = loss_fn(outputs, targets)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        if (epoch) % 1 == 0:
            train_accuracy = accuracy_calc(model, train_loader)
            test_accuracy = accuracy_calc(model, test_loader)
            print(f"Epoch {epoch}, Training Loss: {train_loss/len(train_loader):.4f}, Training Accuracy: {train_accuracy:.4f}, Validation Accuracy: {test_accuracy:.4f}")

In [None]:
# Hyperparameters
hidden_size = 128
learning_rate = 0.001
n_epochs = 20

# Model, loss, and optimizer
model = CharRNN(len(chars), hidden_size, len(chars)).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Count the parameters
total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters: {total_params}')

# Training the model
training_loop (
    n_epochs = n_epochs,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
    test_loader = test_loader
)

Total number of parameters: 428097
Epoch 1, Training Loss: 1.6848, Training Accuracy: 0.5405, Validation Accuracy: 0.5350


KeyboardInterrupt: 

In [None]:
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-prediction_range:]], dtype=torch.long).unsqueeze(0)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "This simple example demonstrates"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
# print(f"Predicted string: '{test_str}'")

# # test_str = "The"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
for i in range(prediction_range):
    test_str = test_str + predicted_char
    predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)

print(f"Predicted string: '{test_str}'")


NameError: name 'char_to_ix' is not defined