# WordRNN

This notebook implements a simple word level neural network. Based on the sequence of words in the text, we will train an RNN to predict the next words in the sequence.


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

torch.manual_seed(0)

<torch._C.Generator at 0x1747c8e2510>

**Download and prepare the data**

Prepare the text data by encoding characters as integers. Then divide the text into sequences of characters.

In [4]:
import requests

#import nltk
#nltk.download('punkt')
#from nltk.tokenize import word_tokenize

import re

# Download the file
url = "https://raw.githubusercontent.com/brunoklein99/deep-learning-notes/refs/heads/master/shakespeare.txt"
response = requests.get(url)
text = response.text[0:9999]
#text = "Hello, world! Welcome to Python's string splitting."
print(text)

# Remove everything except word characters and spaces
cleaned_text = re.sub(r'[^\w\s\n]', '', text)
word_list = cleaned_text.split(" ") # word_tokenize(text.lower())

# Create character-to-index and index-to-character mappings
words = sorted(list(set(word_list)))
word_to_idx = {w: i for i, w in enumerate(words)}
idx_to_word = {i: w for i, w in enumerate(words)}

# Prepare input and target sequences
def prepare_sequences(text, seq_length):
    input_seq = []
    target_seq = []

    for i in range(len(text) - seq_length):
        input_seq.append([word_to_idx[w] for w in text[i:i + seq_length]])
        target_seq.append(word_to_idx[text[i + seq_length]])

    return np.array(input_seq), np.array(target_seq)

seq_length = 5
X, y = prepare_sequences(word_list, seq_length)
X = torch.tensor(X, dtype=torch.long)
y = torch.tensor(y, dtype=torch.long)

THE SONNETS

by William Shakespeare

From fairest creatures we desire increase,
That thereby beauty's rose might never die,
But as the riper should by time decease,
His tender heir might bear his memory:
But thou contracted to thine own bright eyes,
Feed'st thy light's flame with self-substantial fuel,
Making a famine where abundance lies,
Thy self thy foe, to thy sweet self too cruel:
Thou that art now the world's fresh ornament,
And only herald to the gaudy spring,
Within thine own bud buriest thy content,
And tender churl mak'st waste in niggarding:
Pity the world, or else this glutton be,
To eat the world's due, by the grave and thee.

When forty winters shall besiege thy brow,
And dig deep trenches in thy beauty's field,
Thy youth's proud livery so gazed on now,
Will be a tattered weed of small worth held:  
Then being asked, where all thy beauty lies,
Where all the treasure of thy lusty days;
To say within thine own deep sunken eyes,
Were an all-eating shame, and thriftless prais

**Define the RNN architecture**

 Use an RNN layer (nn.LSTM) and a fully connected layer for the output.

In [6]:
class WordRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size): #input size, respresnts how many words in my vocab.
        super(WordRNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Initialize hidden state and cell state
        h0 = (torch.zeros(1, x.size(0), self.hidden_size),
              torch.zeros(1, x.size(0), self.hidden_size))
        out, _ = self.rnn(x, h0)  # Forward pass
        out = self.fc(out[:, -1, :])  # Get the last time step
        return out


**Training Loop**

Train the model by feeding sequences and their targets (next wordss) to the RNN.

Note that the words are encoded as vectors, with one "dimension" per word identified in the text.

In [8]:
input_size = len(words)
hidden_size = 128
output_size = len(words)
num_epochs = 300
learning_rate = 0.1
batch_size = 64
num_batches = len(X) // batch_size

model = WordRNN(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# One-hot encode the input
X_onehot = torch.zeros(X.size(0), X.size(1), input_size)
X_onehot.scatter_(2, X.unsqueeze(2), 1)

# Training loop
for epoch in range(num_epochs):
  # Shuffle the data
  permute = torch.randperm(len(X_onehot))
  X_shuffled = X_onehot[permute]  # use the permutation to index X in a random order
  y_shuffled = y[permute]  # same for y

  for i in range(num_batches):
    # Grab data in batches
    X_batch = X_shuffled[i:(i+batch_size)]
    y_batch = y_shuffled[i:(i+batch_size)]

    model.train()

    optimizer.zero_grad()
    output = model(X_batch)
    loss = criterion(output, y_batch)
    loss.backward()
    optimizer.step()

  if (epoch + 1) % 100 == 0:
      print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [100/200], Loss: 0.3511
Epoch [200/200], Loss: 0.9003


**Text Generation**

 Generate text by sampling words from the trained model.

In [10]:
def generate_text(model, start_string, gen_length=5):
    model.eval()
    input_eval = [word_to_idx[s] for s in start_string.split()]
    input_eval = torch.tensor(input_eval, dtype=torch.long).unsqueeze(0)

    generated_text = start_string + " "

    for _ in range(gen_length):
        input_eval_onehot = torch.zeros(input_eval.size(0), input_eval.size(1), input_size)
        input_eval_onehot.scatter_(2, input_eval.unsqueeze(2), 1)

        predictions = model(input_eval_onehot)
        predicted_word_idx = torch.argmax(predictions, dim=1).item()

        generated_text += idx_to_word[predicted_word_idx] + " "
        input_eval = torch.cat((input_eval[:, 1:], torch.tensor([[predicted_word_idx]])), dim=1)

    return generated_text

print(generate_text(model, start_string="fairest"))


beauty deceive
Then many decrease
And many decrease
And 
