<a href="https://colab.research.google.com/github/JackXZH/madlibs-starter/blob/main/2023_10_31_rnns.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import the packages we're going to use
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import requests

In [None]:
class CharRNN(nn.Module):
    """ Define a character-level RNN
    """
    def __init__(self, vocab_size, hidden_size):
        # Initialize the parent class
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        # Define an RNN layer
        self.rnn = nn.RNN(vocab_size, hidden_size, batch_first=True)
        # Define a fully connected layer
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden):
        # Convert characters to one-hot encoding and set type to float
        x = nn.functional.one_hot(x, self.vocab_size).float()
        # Pass the input and hidden state through the RNN layer
        out, hidden = self.rnn(x, hidden)
        # Pass the RNN output through the fully connected layer
        out = self.fc(out)
        return out, hidden

In [None]:
def generate_text(model, char_to_int, int_to_char, device, start='', length=100):
    """ Run a forward pass of the provided RNN (that is, do "inference")
    """
    # Set the model to evaluation mode
    model.eval()
    # If no start string is provided, randomly select a start character
    if not start:
        start = np.random.choice(list(char_to_int.keys()))
    # Initialize the output string and the hidden state
    output, hidden = start, torch.zeros(1, 1, model.hidden_size).to(device)

    for i in range(length):
        # Convert the last character of output to a tensor
        inp = torch.LongTensor([[char_to_int[output[-1]]]]).to(device)
        # Generate prediction and update hidden state
        pred, hidden = model(inp, hidden)
        # Apply softmax to get probabilities
        prob = nn.functional.softmax(pred[0][0], dim=0)
        # Sample a character from the probability distribution
        next_char = int_to_char[torch.multinomial(prob, 1).item()]
        # Add the sampled character to the output string
        output += next_char
    return output

In [None]:
# Download the tiny Shakespeare dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
# Store the text data as a string
text = response.text

In [None]:
# Create a set of unique characters
chars = set(text)
# Create a character to index mapping
char_to_int = {c: i for i, c in enumerate(chars)}
# Create an index to character mapping
int_to_char = {i: c for i, c in enumerate(chars)}

# Hyperparameters
seq_length = 50
hidden_size = 128
vocab_size = len(chars)
# Convert the text data to integer format using the char_to_int mapping
text_int = np.array([char_to_int[c] for c in text])

# Setting the device to CPU
device = torch.device('cpu')

# Initialize the model, and set it to use the CPU
model = CharRNN(vocab_size, hidden_size)
model.to(device)

# Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Convert the integer text data to a PyTorch tensor
text_int_tensor = torch.LongTensor(text_int).to(device)
# Initialize the hidden state
hidden = torch.zeros(1, 1, hidden_size).to(device)

In [None]:
# Training loop
for epoch in range(10):
    print(f"Epoch {epoch+1}")
    # Loop through the text data in chunks of size seq_length
    for i in range(0, len(text_int) - seq_length, seq_length):
        # Prepare input and target sequences
        inputs = text_int_tensor[i:i+seq_length].unsqueeze(0)
        targets = text_int_tensor[i+1:i+seq_length+1].unsqueeze(0)

        # Zero the gradients
        optimizer.zero_grad()
        # Forward pass
        outputs, hidden = model(inputs, hidden.detach())
        # Compute the loss
        loss = criterion(outputs.squeeze(), targets.squeeze())
        # Backward pass
        loss.backward()
        # Update the weights
        optimizer.step()

        # Print training progress and generate text every 10000 steps
        if i % 10000 == 0:
            print(f"Epoch {epoch+1}, step {i}, Loss: {loss.item()}")
            print(generate_text(model, char_to_int, int_to_char, device, start='', length=200))
            print("-" * 50)

Epoch 1
Epoch 1, step 0, Loss: 4.184510231018066
xJ'
oAwkY,KoUyHzISUA,bUVqZdRIbjs HHhVyfa,sBWgiWoY.m?ISWoNNmzi&Xtdy,sX!doE.-.dOEaIKPc,tE?tD'qk
oDkgjmIERoCOLW?lGO.hbfYCG mUL:UPqtGMtSpq:AtoPTzRWhjhmUClZr3lPxKnoCN,iM.g&3lIPnAmXORNCkHQV LLzIgmvZXNPse
T-q
--------------------------------------------------
Epoch 1, step 10000, Loss: 3.015928030014038
xDInoe nN
ikthhriF coeanh ea uns TehetgelceSttyn  aosyvslhwk ,teneium'lyi dyneofheteurdo ouen er guneltorene
tto r
sn theeune hU?s'tFl siSt ' hwt mot d 'aeeetsnogtr Tladkd
esihhmMeilwnsikt i :eose ! md
--------------------------------------------------
Epoch 1, step 20000, Loss: 2.5597426891326904
j, uoee:d
nAdyd:t Iad yr y u 'd gitk 
re sFs,
 burEIAI oaydenIeruss tblt ,Iy
'su'rI.
s
ISLoXned

lVlarvh ur 
  CoM. ,s wounii , 
i rairhi hent n sn L y ur irs,


V&xZea; b,mcIsot  mI.fc che!r
eogo
s or
--------------------------------------------------
Epoch 1, step 30000, Loss: 2.4646565914154053
m!
!
KoLe Gy c
Mleuem t.tldodd, Phi
s;
wer uhe da' wahe

KeyboardInterrupt: ignored