In [1]:
# Load Shakespeare's text
with open('shake.txt', 'r') as f:
    text = f.read()

In [2]:
text[:1000]

"First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you know Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us kill him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be done: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citizens, the patricians good.\nWhat authority surfeits on would relieve us: if they\nwould yield us but the superfluity, while it were\nwholesome, we might guess they relieved us humanely;\nbut they think we are too dear: the leanness that\nafflicts us, the object of our misery, is as an\ninventory to particularise their abundance; our\nsufferance is a gain to them Let us revenge this with\nour pikes, ere we become rakes: for the gods know I\nspeak this in hunger 

In [3]:
text = text.lower().replace('\n', ' ').replace('$', '').replace('&', '').replace('3', '')

In [4]:
text[:1000]

"first citizen: before we proceed any further, hear me speak.  all: speak, speak.  first citizen: you are all resolved rather to die than to famish?  all: resolved. resolved.  first citizen: first, you know caius marcius is chief enemy to the people.  all: we know't, we know't.  first citizen: let us kill him, and we'll have corn at our own price. is't a verdict?  all: no more talking on't; let it be done: away, away!  second citizen: one word, good citizens.  first citizen: we are accounted poor citizens, the patricians good. what authority surfeits on would relieve us: if they would yield us but the superfluity, while it were wholesome, we might guess they relieved us humanely; but they think we are too dear: the leanness that afflicts us, the object of our misery, is as an inventory to particularise their abundance; our sufferance is a gain to them let us revenge this with our pikes, ere we become rakes: for the gods know i speak this in hunger for bread, not in thirst for revenge. 

In [5]:
vocab = sorted(set(text))
print(len(vocab))

35


In [6]:
char_to_index = {char: idx for idx, char in enumerate(vocab)}

In [7]:
index_to_char = {idx: char for idx, char in enumerate(vocab)}

In [8]:
char_to_index

{' ': 0,
 '!': 1,
 "'": 2,
 ',': 3,
 '-': 4,
 '.': 5,
 ':': 6,
 ';': 7,
 '?': 8,
 'a': 9,
 'b': 10,
 'c': 11,
 'd': 12,
 'e': 13,
 'f': 14,
 'g': 15,
 'h': 16,
 'i': 17,
 'j': 18,
 'k': 19,
 'l': 20,
 'm': 21,
 'n': 22,
 'o': 23,
 'p': 24,
 'q': 25,
 'r': 26,
 's': 27,
 't': 28,
 'u': 29,
 'v': 30,
 'w': 31,
 'x': 32,
 'y': 33,
 'z': 34}

In [9]:
len(text)

1115363

In [10]:
text = text[:100000]

In [11]:
vectorised_text = []
for i in text:
    vec = [0] * 35
    vec[char_to_index[i]] = 1
    vectorised_text.append(vec)

In [12]:
import numpy as np
vectorised_text = np.array(vectorised_text)

In [13]:
len(vectorised_text)

100000

In [19]:
sequence_length = 20
sequences = []
targets = []
for i in range(len(vectorised_text)-sequence_length):
    sequences.append(vectorised_text[i:i+sequence_length])
    targets.append(vectorised_text[i+sequence_length])

In [20]:
import torch
import torch.nn as nn
import torch.optim as optim

In [21]:
sequences_np = np.array(sequences)
sequences_tensor = torch.tensor(sequences_np, dtype=torch.float32)

In [22]:
targets_tensor = torch.tensor(targets, dtype=torch.float32)

In [23]:
# Define the RNN model
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.rnn = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)  # out: (batch_size, seq_length, hidden_size)
        out = out[:, -1, :]  # Get the last time step
        out = self.fc(out)  # Fully connected layer
        return out

In [34]:
# Hyperparameters
input_size = 35  # Size of one-hot encoding
hidden_size = 128  # Number of LSTM units
output_size = 35  # Same as input size for one-hot encoding
num_epochs = 250
batch_size = 32
learning_rate = 0.001

model = RNNModel(input_size, hidden_size, output_size)
criterion = nn.BCEWithLogitsLoss()  # Use BCEWithLogits for one-hot targets
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [35]:
print(sequences_tensor.shape)
print(targets_tensor.shape)

sequences_tensor = sequences_tensor.to('cuda')
targets_tensor = targets_tensor.to('cuda')

torch.Size([99980, 20, 35])
torch.Size([99980, 35])


In [36]:
from torch.utils.data import DataLoader, TensorDataset
dataset = TensorDataset(sequences_tensor, targets_tensor)

batch_size = 64
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
model.to('cuda')
checkpoint_dir = './checkpoints/'

# Training loop
loss_log = []
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for batch_sequences, batch_targets in dataloader:
        optimizer.zero_grad()
        outputs = model(batch_sequences)
        loss = criterion(outputs, batch_targets)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()  # Accumulate loss for the epoch

    avg_loss = epoch_loss / len(dataloader)
    loss_log.append(avg_loss)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')

    if (epoch + 1) % 50 == 0:
        checkpoint_path = f'{checkpoint_dir}model_epoch_{epoch+1}.pth'
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': avg_loss,
        }, checkpoint_path)
        print(f'Checkpoint saved at epoch {epoch+1}')

Epoch [1/250], Loss: 0.1139
Epoch [2/250], Loss: 0.0931
Epoch [3/250], Loss: 0.0879
Epoch [4/250], Loss: 0.0850
Epoch [5/250], Loss: 0.0827
Epoch [6/250], Loss: 0.0805
Epoch [7/250], Loss: 0.0786
Epoch [8/250], Loss: 0.0769
Epoch [9/250], Loss: 0.0754
Epoch [10/250], Loss: 0.0741
Epoch [11/250], Loss: 0.0729
Epoch [12/250], Loss: 0.0719
Epoch [13/250], Loss: 0.0709
Epoch [14/250], Loss: 0.0701
Epoch [15/250], Loss: 0.0693
Epoch [16/250], Loss: 0.0686
Epoch [17/250], Loss: 0.0679
Epoch [18/250], Loss: 0.0673
Epoch [19/250], Loss: 0.0668
Epoch [20/250], Loss: 0.0663
Epoch [21/250], Loss: 0.0658
Epoch [22/250], Loss: 0.0653
Epoch [23/250], Loss: 0.0649
Epoch [24/250], Loss: 0.0645
Epoch [25/250], Loss: 0.0642
Epoch [26/250], Loss: 0.0638
Epoch [27/250], Loss: 0.0635
Epoch [28/250], Loss: 0.0631
Epoch [29/250], Loss: 0.0628
Epoch [30/250], Loss: 0.0625
Epoch [31/250], Loss: 0.0623
Epoch [32/250], Loss: 0.0620
Epoch [33/250], Loss: 0.0617
Epoch [34/250], Loss: 0.0615
Epoch [35/250], Loss: 0

In [38]:
# After 100 epochs, 0.0458 is the loss
# After 250 epochs 0.0408 is the loss

In [40]:
# loss_log[-5:]
loss_log_file = './loss_log.txt'
with open(loss_log_file, 'w') as f:
    for epoch, loss in enumerate(loss_log):
        f.write(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss:.4f}\n')

print(f'Loss log saved to {loss_log_file}')

Loss log saved to ./loss_log.txt


In [47]:
# import seaborn as sns
# import pandas as pd

# # Convert loss_log to a pandas Series or DataFrame for better handling
# loss_series = pd.Series(loss_log)

# # Plot using Seaborn
# sns.lineplot(data=loss_series)
# plt.title('Loss Over Iterations')
# plt.xlabel('Iterations')
# plt.ylabel('Loss')
# plt.show()

In [59]:
checkpoint = torch.load('checkpoints/model_epoch_200.pth', weights_only=True)
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [60]:
def string_to_tensor(string):
    vectorised_start = []
    for i in string:
        vec = [0] * 35
        vec[char_to_index[i]] = 1
        vectorised_start.append(vec)
    for i in range(20 - len(string)):
        padding = [0] * 35
        vectorised_start.insert(0, padding)
    vectorised_start = np.array(vectorised_start)
    start_encoded_tensor =  torch.tensor(vectorised_start, dtype=torch.float32)
    start_encoded_tensor = start_encoded_tensor.unsqueeze(0)
    return vectorised_start, start_encoded_tensor

In [65]:
import torch.nn.functional as F
model.to('cpu')
def get_n_next_char(tensor):
    model.eval()
    with torch.no_grad():
        outputs = model(tensor)
    probs = F.softmax(outputs, dim=-1)
    predicted_index = torch.argmax(probs, dim=-1).item()
    predicted_char = index_to_char[predicted_index]
    print(f"Predicted character: {predicted_char}")
    return predicted_index, predicted_char

generated = ""
vectorised_start, start_encoded_tensor = string_to_tensor('once upon a time a ')

for i in range(1000):
    model.eval()
    with torch.no_grad():
        outputs = model(start_encoded_tensor)
    probs = F.softmax(outputs, dim=-1)

    # Get the index of the highest probability
    predicted_index = torch.argmax(probs, dim=-1).item()

    # Get the predicted character
    predicted_char = index_to_char[predicted_index]

    # print(f"Predicted character: {predicted_char}")
    # print("First character of string: ",index_to_char[torch.argmax(start_encoded_tensor[0][0], dim=-1).item()])
    generated += predicted_char

    new_vec = [0] * 35
    new_vec[predicted_index] = 1
    vectorised_start = np.append(vectorised_start[1:], [new_vec], axis=0)

    start_encoded_tensor =  torch.tensor(vectorised_start, dtype=torch.float32)
    start_encoded_tensor = start_encoded_tensor.unsqueeze(0)

In [66]:
generated

"gon; and for used your never mulds.  sicinius: i wist be sarect and the greatneaters tenthers of mine to the gader a mull that would be consuly: you have beg as your hast, that would be menerian in rave the gods and be not as your pridentss of all the ever were here them all the people, the grangers on the market-place.  marcius: that in the former'd he hath smuld to love him so the bears and the carring.  menenius: the romer'd, when you are nay, which he was deserve to the people, my country of the wans to marcius, the people deserve the godst with me, sarm as my mother aufidius not but the gods; and that then? do not be not but my words of them.  all: which elainty, when it within thing turnels,' the reat with exirh they shall-- beliend him to such a still he did me deed i am me so now, where i think us the worthing the honour good their rome.  volumnia: i do beain the made him that had work he will be the stredpled: you shall be sarent he dishilous marcius, are the tribunes of the 

In [67]:
import torch.nn.functional as F

def get_next_char(tensor):
    model.eval()
    with torch.no_grad():
        outputs = model(tensor)
    probs = F.softmax(outputs, dim=-1)
    predicted_index = torch.argmax(probs, dim=-1).item()
    predicted_char = index_to_char[predicted_index]
    print(f"Predicted character: {predicted_char}")
    return predicted_index, predicted_char

In [75]:
_, x= string_to_tensor("hear")
y = get_next_char(x)
print(y)

Predicted character: i
(17, 'i')


In [91]:
# THE FOLLOWING ARE SMALL TESTS

In [45]:
import numpy as np

start = "once upon a time an "
print("Length of start string:",len(start))
vectorised_start = []
for i in start:
    vec = [0] * 35
    vec[char_to_index[i]] = 1
    vectorised_start.append(vec)
for i in range(20 - len(start)):
    padding = [0] * 35
    vectorised_start.insert(0, padding)

vectorised_start = np.array(vectorised_start)

print("Vectorised start string shape:",vectorised_start.shape)

start_encoded_tensor =  torch.tensor(vectorised_start, dtype=torch.float32)
start_encoded_tensor = start_encoded_tensor.unsqueeze(0)

print("Tensor start string shape:",start_encoded_tensor.shape)

Length of start string: 20
Vectorised start string shape: (20, 35)
Tensor start string shape: torch.Size([1, 20, 35])


In [82]:
# To get the character from the tensor
pos = -1
index_to_char[torch.argmax(start_encoded_tensor[0][pos], dim=-1).item()]

' '

In [None]:
model.eval()
with torch.no_grad():
    outputs = model(start_encoded_tensor)

In [None]:
outputs

tensor([[-1.8052, -2.2018, -2.3272, -2.0821, -1.4322, -1.8945, -1.9367, -2.2215,
         -1.8308, -1.4844, -1.7142, -1.9555, -1.7564, -1.2421, -2.4002, -2.0735,
         -2.2867, -1.5462, -1.8495, -2.0603, -1.5991, -2.1289, -1.7808, -0.9559,
         -1.4678, -2.1032, -2.1087, -1.8054, -1.6473, -1.4099, -1.6015, -1.9589,
         -1.9077, -1.5520, -2.0096]])

In [None]:
import torch.nn.functional as F

probs = F.softmax(outputs, dim=-1)

# Get the index of the highest probability
predicted_index = torch.argmax(probs, dim=-1).item()

# Get the predicted character
predicted_char = index_to_char[predicted_index]

print(f"Predicted character: {predicted_char}")

Predicted character: o


In [None]:
new_vec = [0] * 35
new_vec[predicted_index] = 1
vectorised_start = np.append(vectorised_start[1:], [new_vec], axis=0)
# To check if same shape
print("Shape of vectorised start string",vectorised_start.shape)

In [104]:
generated = ""
for i in range(30):
    model.eval()
    with torch.no_grad():
        outputs = model(start_encoded_tensor)
    probs = F.softmax(outputs, dim=-1)

    # Get the index of the highest probability
    predicted_index = torch.argmax(probs, dim=-1).item()

    # Get the predicted character
    predicted_char = index_to_char[predicted_index]

    print(f"Predicted character: {predicted_char}")
    print("First character of string: ",index_to_char[torch.argmax(start_encoded_tensor[0][0], dim=-1).item()])
    generated += predicted_char

    new_vec = [0] * 35
    new_vec[predicted_index] = 1
    vectorised_start = np.append(vectorised_start[1:], [new_vec], axis=0)

    start_encoded_tensor =  torch.tensor(vectorised_start, dtype=torch.float32)
    start_encoded_tensor = start_encoded_tensor.unsqueeze(0)

Predicted character:  
First character of string:  o
Predicted character:  
First character of string:  n
Predicted character:  
First character of string:  c
Predicted character:  
First character of string:  e
Predicted character:  
First character of string:   
Predicted character:  
First character of string:  u
Predicted character:  
First character of string:  p
Predicted character:  
First character of string:  o
Predicted character:  
First character of string:  n
Predicted character:  
First character of string:   
Predicted character:  
First character of string:  a
Predicted character:  
First character of string:   
Predicted character:  
First character of string:  t
Predicted character:  
First character of string:  i
Predicted character:  
First character of string:  m
Predicted character:  
First character of string:  e
Predicted character:  
First character of string:   
Predicted character:  
First character of string:  a
Predicted character:  
First character of stri

In [103]:
# SAVE MODEL
torch.save(model.state_dict(), 'shakespeare_loss_0.0461.pth')

In [None]:
# LOAD MODEL
model = RNNModel(input_size, hidden_size, output_size)

# LOAD MODEL PARAMETERS
model.load_state_dict(torch.load('rnnmodel.pth'))