# RNN

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split

from collections import Counter
import numpy as np
from tqdm import tqdm
import requests
import re

In [None]:
url = "https://www.gutenberg.org/files/100/100-0.txt"
response = requests.get(url)
shakespeare_text = response.text

shakespeare_text = re.sub(r'\s+', ' ', shakespeare_text)

In [None]:
len(shakespeare_text)

In [None]:
pprint(shakespeare_text[:1500])

In [None]:
pattern = re.compile("THE TRAGEDY OF HAMLET, PRINCE OF DENMARK")
matches = pattern.finditer(shakespeare_text)
for match in matches:
    print(match)

In [None]:
pattern = re.compile("THE FIRST PART OF KING HENRY THE FOURTH")
matches = pattern.finditer(shakespeare_text)
for match in matches:
    print(match)

In [None]:
hamlet_text = shakespeare_text[927066:1105544]

In [None]:
pprint(hamlet_text)

In [None]:
words = re.findall(r'\b\w+\b', hamlet_text.lower())
vocab = Counter(words)
word_to_ix = {word: i for i, (word, _) in enumerate(vocab.items())}
ix_to_word = {i: word for word, i in word_to_ix.items()}

encoded_text = [word_to_ix[word] for word in words]

class TextDataset(Dataset):
    def __init__(self, data, seq_length):
        self.data = data
        self.seq_length = seq_length

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, idx):
        input_seq = self.data[idx:idx + self.seq_length]
        target_seq = self.data[idx + 1:idx + self.seq_length + 1]
        return torch.tensor(input_seq, dtype=torch.long), torch.tensor(target_seq, dtype=torch.long)

## RNN

In [None]:
class RNNCell(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(RNNCell, self).__init__()
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.h2o = nn.Linear(hidden_size, input_size)
        self.tanh = torch.tanh

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 2)  # Adjusted to concat along the feature dimension
        hidden = self.i2h(combined)
        output = self.h2o(hidden)
        output = self.tanh(output)
        return output, hidden

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, embedding_dim):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, embedding_dim)
        self.rnn_cell = RNNCell(embedding_dim, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input)
        outputs = []
        for i in range(embedded.size(1)):  # Loop over sequence length
            output, hidden = self.rnn_cell(embedded[:, i].unsqueeze(1), hidden)
            outputs.append(output)
        return torch.cat(outputs, dim=1), hidden

    def initHidden(self, batch_size, device):
        return torch.zeros(batch_size, 1, self.hidden_size, device=device)

## Training

In [None]:
# Hyperparameters
seq_length = 5
batch_size = 1024
embedding_dim = 128
hidden_size = 256
learning_rate = 0.001
epochs = 50

# Initialize the model, criterion, and optimizer
model = RNN(len(word_to_ix), hidden_size, embedding_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Detect the available device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# Prepare the dataset and dataloader
dataset = TextDataset(encoded_text, seq_length)

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
def train_and_test(model, train_loader, test_loader, criterion, optimizer, epochs, device):
    model.to(device)
    for epoch in range(epochs):
        # Training phase
        model.train()
        train_loss = 0
        for inputs, targets in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} [Training]", leave=False):
            inputs, targets = inputs.to(device), targets.to(device)
            hidden = model.initHidden(inputs.size(0), device)
            model.zero_grad()
            outputs, hidden = model(inputs, hidden)
            loss = criterion(outputs.view(-1, outputs.size(2)), targets.view(-1))
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        avg_train_loss = train_loss / len(train_loader)
        
        # Testing phase
        model.eval()
        test_loss = 0
        with torch.no_grad():
            for inputs, targets in tqdm(test_loader, desc=f"Epoch {epoch+1}/{epochs} [Testing]", leave=False):
                inputs, targets = inputs.to(device), targets.to(device)
                hidden = model.initHidden(inputs.size(0), device)
                outputs, hidden = model(inputs, hidden)
                loss = criterion(outputs.view(-1, outputs.size(2)), targets.view(-1))
                test_loss += loss.item()
        avg_test_loss = test_loss / len(test_loader)

        print(f'Epoch {epoch+1}/{epochs}, Train Loss: {avg_train_loss:.4f}, Test Loss: {avg_test_loss:.4f}')

# Train the model
train_and_test(model, train_loader, test_loader, criterion, optimizer, epochs, device)

## Prediction

In [None]:
# Prediction function
def predict(model, input_text, word_to_ix, ix_to_word, device, predict_len=100):
    model.eval()  # Set the model to evaluation mode
    input_seq = [word_to_ix[word] for word in input_text.split() if word in word_to_ix]
    input_tensor = torch.tensor(input_seq, dtype=torch.long).unsqueeze(0).to(device)

    hidden = model.initHidden(1, device)

    predicted_text = input_text

    for _ in range(predict_len):
        with torch.no_grad():
            output, hidden = model(input_tensor, hidden)
            output = output[:, -1, :]  # Get the output for the last time step
            output_dist = torch.softmax(output.view(-1), dim=0).cpu().numpy()

        try:
            predicted_word_idx = np.random.choice(len(word_to_ix), p=output_dist)
        except ValueError:
            predicted_word_idx = np.random.choice(len(word_to_ix))  # Fallback to random choice

        predicted_word = ix_to_word[predicted_word_idx]

        predicted_text += ' ' + predicted_word

        input_tensor = torch.cat((input_tensor, torch.tensor([[predicted_word_idx]], dtype=torch.long).to(device)), dim=1)

    return predicted_text

# Example usage:
input_text = "to be or not to be that is the question"
predicted_output = predict(model, input_text, word_to_ix, ix_to_word, device)
print(predicted_output)