In [52]:
# Load Shakespeare's text
with open('shake.txt', 'r') as f:
    text = f.read()

In [53]:
text



In [54]:
text = text.lower().replace('\n', ' ').replace('$', '').replace('&', '').replace('3', '')

In [55]:
text



In [56]:
vocab = sorted(set(text))
print(len(vocab))

35


In [57]:
char_to_index = {char: idx for idx, char in enumerate(vocab)}

In [58]:
index_to_char = {idx: char for idx, char in enumerate(vocab)}

In [59]:
char_to_index

{' ': 0,
 '!': 1,
 "'": 2,
 ',': 3,
 '-': 4,
 '.': 5,
 ':': 6,
 ';': 7,
 '?': 8,
 'a': 9,
 'b': 10,
 'c': 11,
 'd': 12,
 'e': 13,
 'f': 14,
 'g': 15,
 'h': 16,
 'i': 17,
 'j': 18,
 'k': 19,
 'l': 20,
 'm': 21,
 'n': 22,
 'o': 23,
 'p': 24,
 'q': 25,
 'r': 26,
 's': 27,
 't': 28,
 'u': 29,
 'v': 30,
 'w': 31,
 'x': 32,
 'y': 33,
 'z': 34}

In [60]:
len(text)

1115363

In [61]:
text = text[:100000]

In [62]:
vectorised_text = []
for i in text:
    vec = [0] * 35
    vec[char_to_index[i]] = 1
    vectorised_text.append(vec)

In [63]:
import numpy as np
vectorised_text = np.array(vectorised_text)

In [64]:
len(vectorised_text)

100000

In [65]:
sequence_length = 20
sequences = []
targets = []
for i in range(len(vectorised_text)-sequence_length):
    sequences.append(vectorised_text[i:i+sequence_length])
    targets.append(vectorised_text[i+sequence_length])

In [66]:
import torch
import torch.nn as nn
import torch.optim as optim

In [67]:
sequences_tensor = torch.tensor(sequences, dtype=torch.float32)

In [68]:
targets_tensor = torch.tensor(targets, dtype=torch.float32)

In [69]:
# Define the RNN model
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.rnn = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)  # out: (batch_size, seq_length, hidden_size)
        out = out[:, -1, :]  # Get the last time step
        out = self.fc(out)  # Fully connected layer
        return out

In [87]:
# Hyperparameters
input_size = 35  # Size of one-hot encoding
hidden_size = 128  # Number of LSTM units
output_size = 35  # Same as input size for one-hot encoding
num_epochs = 100
batch_size = 32
learning_rate = 0.001

model = RNNModel(input_size, hidden_size, output_size)
criterion = nn.BCEWithLogitsLoss()  # Use BCEWithLogits for one-hot targets
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [75]:
print(sequences_tensor.shape)
print(targets_tensor.shape)

torch.Size([99980, 20, 35])
torch.Size([99980, 35])


In [90]:
from torch.utils.data import DataLoader, TensorDataset
# sequences_tensor.to('cuda')
# targets_tensor.to('cuda')
dataset = TensorDataset(sequences_tensor, targets_tensor)

batch_size = 32
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
model.to('cuda')

# Training loop
loss_log = []
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for batch_sequences, batch_targets in dataloader:
        optimizer.zero_grad()
        batch_sequences = batch_sequences.to('cuda')
        batch_targets = batch_targets.to('cuda')
        outputs = model(batch_sequences)
        loss = criterion(outputs, batch_targets)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()  # Accumulate loss for the epoch

    avg_loss = epoch_loss / len(dataloader)
    loss_log.append(avg_loss)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')

Epoch [1/100], Loss: 0.1058
Epoch [2/100], Loss: 0.0895
Epoch [3/100], Loss: 0.0848
Epoch [4/100], Loss: 0.0814
Epoch [5/100], Loss: 0.0785
Epoch [6/100], Loss: 0.0762
Epoch [7/100], Loss: 0.0744
Epoch [8/100], Loss: 0.0728
Epoch [9/100], Loss: 0.0714
Epoch [10/100], Loss: 0.0702
Epoch [11/100], Loss: 0.0691
Epoch [12/100], Loss: 0.0682
Epoch [13/100], Loss: 0.0674
Epoch [14/100], Loss: 0.0667
Epoch [15/100], Loss: 0.0660
Epoch [16/100], Loss: 0.0654
Epoch [17/100], Loss: 0.0648
Epoch [18/100], Loss: 0.0643
Epoch [19/100], Loss: 0.0638
Epoch [20/100], Loss: 0.0633
Epoch [21/100], Loss: 0.0629
Epoch [22/100], Loss: 0.0625
Epoch [23/100], Loss: 0.0621
Epoch [24/100], Loss: 0.0617
Epoch [25/100], Loss: 0.0614
Epoch [26/100], Loss: 0.0610
Epoch [27/100], Loss: 0.0607
Epoch [28/100], Loss: 0.0604
Epoch [29/100], Loss: 0.0601
Epoch [30/100], Loss: 0.0597
Epoch [31/100], Loss: 0.0594
Epoch [32/100], Loss: 0.0591
Epoch [33/100], Loss: 0.0589
Epoch [34/100], Loss: 0.0586
Epoch [35/100], Loss: 0

In [77]:
loss_log1 = loss_log

In [92]:
loss_log2 = loss_log

In [94]:
loss_log1[-5:]

[0.053882430852055546,
 0.05357464792966843,
 0.053363215676546095,
 0.053042482329010966,
 0.05283798499345779]

In [42]:
# Training loop
loss_log = []
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()

    outputs = model(sequences_tensor)
    loss = criterion(outputs, targets_tensor)
    loss.backward()
    optimizer.step()
    loss_log.append(loss.item())
    # if (epoch+1) % 1 == 0:  # Print every 10 epochs
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/50], Loss: 0.6817
Epoch [2/50], Loss: 0.6775
Epoch [3/50], Loss: 0.6732
Epoch [4/50], Loss: 0.6687
Epoch [5/50], Loss: 0.6638
Epoch [6/50], Loss: 0.6583
Epoch [7/50], Loss: 0.6520
Epoch [8/50], Loss: 0.6445
Epoch [9/50], Loss: 0.6351
Epoch [10/50], Loss: 0.6230
Epoch [11/50], Loss: 0.6065
Epoch [12/50], Loss: 0.5831
Epoch [13/50], Loss: 0.5485
Epoch [14/50], Loss: 0.4971
Epoch [15/50], Loss: 0.4273
Epoch [16/50], Loss: 0.3530
Epoch [17/50], Loss: 0.2946
Epoch [18/50], Loss: 0.2551
Epoch [19/50], Loss: 0.2267
Epoch [20/50], Loss: 0.2046
Epoch [21/50], Loss: 0.1865
Epoch [22/50], Loss: 0.1719
Epoch [23/50], Loss: 0.1601
Epoch [24/50], Loss: 0.1507
Epoch [25/50], Loss: 0.1433
Epoch [26/50], Loss: 0.1376
Epoch [27/50], Loss: 0.1330
Epoch [28/50], Loss: 0.1295
Epoch [29/50], Loss: 0.1268
Epoch [30/50], Loss: 0.1247
Epoch [31/50], Loss: 0.1230
Epoch [32/50], Loss: 0.1217
Epoch [33/50], Loss: 0.1206
Epoch [34/50], Loss: 0.1197
Epoch [35/50], Loss: 0.1190
Epoch [36/50], Loss: 0.1184
E

In [47]:
# import seaborn as sns
# import pandas as pd

# # Convert loss_log to a pandas Series or DataFrame for better handling
# loss_series = pd.Series(loss_log)

# # Plot using Seaborn
# sns.lineplot(data=loss_series)
# plt.title('Loss Over Iterations')
# plt.xlabel('Iterations')
# plt.ylabel('Loss')
# plt.show()

In [95]:
def string_to_tensor(string):
    vectorised_start = []
    for i in string:
        vec = [0] * 35
        vec[char_to_index[i]] = 1
        vectorised_start.append(vec)
    for i in range(20 - len(string)):
        padding = [0] * 35
        vectorised_start.insert(0, padding)
    vectorised_start = np.array(vectorised_start)
    start_encoded_tensor =  torch.tensor(vectorised_start, dtype=torch.float32)
    start_encoded_tensor = start_encoded_tensor.unsqueeze(0)
    return start_encoded_tensor

In [99]:
model.to('cpu')
def get_n_next_char(tensor):
    model.eval()
    with torch.no_grad():
        outputs = model(tensor)
    probs = F.softmax(outputs, dim=-1)
    predicted_index = torch.argmax(probs, dim=-1).item()
    predicted_char = index_to_char[predicted_index]
    print(f"Predicted character: {predicted_char}")
    return predicted_index, predicted_char

generated = ""
for i in range(1000):
    model.eval()
    with torch.no_grad():
        outputs = model(start_encoded_tensor)
    probs = F.softmax(outputs, dim=-1)

    # Get the index of the highest probability
    predicted_index = torch.argmax(probs, dim=-1).item()

    # Get the predicted character
    predicted_char = index_to_char[predicted_index]

    print(f"Predicted character: {predicted_char}")
    # print("First character of string: ",index_to_char[torch.argmax(start_encoded_tensor[0][0], dim=-1).item()])
    generated += predicted_char

    new_vec = [0] * 35
    new_vec[predicted_index] = 1
    vectorised_start = np.append(vectorised_start[1:], [new_vec], axis=0)

    start_encoded_tensor =  torch.tensor(vectorised_start, dtype=torch.float32)
    start_encoded_tensor = start_encoded_tensor.unsqueeze(0)

Predicted character: t
Predicted character: h
Predicted character: e
Predicted character: m
Predicted character:  
Predicted character: t
Predicted character: h
Predicted character: a
Predicted character: t
Predicted character:  
Predicted character: i
Predicted character:  
Predicted character: m
Predicted character: a
Predicted character: y
Predicted character:  
Predicted character: u
Predicted character: s
Predicted character:  
Predicted character: h
Predicted character: i
Predicted character: s
Predicted character:  
Predicted character: p
Predicted character: r
Predicted character: e
Predicted character: s
Predicted character: e
Predicted character: n
Predicted character: t
Predicted character: .
Predicted character:  
Predicted character: t
Predicted character: h
Predicted character: e
Predicted character:  
Predicted character: s
Predicted character: t
Predicted character: o
Predicted character: o
Predicted character: d
Predicted character:  
Predicted character: t
Predicted c

In [100]:
generated

"them that i may us his present. the stood to the people, i will hath hear them honours of my love that he hath less than a gentle and not worthy not some men eall the gods of my love.  sicinius: what have hear me be speall's but the state he was his present us his prease, and the change the heart of your hather to the corn too at his preasing them.  brutus: we have been rome on your loves the war the market-plack'd the capito.  menenius: the fither to seem the people and not with all the but not the gods for the war.  coriolanus: what is then were him heart and heart, he is his prease me heard the people most that would have be the market you come, and not speakengn gone, and show them not that the people, the gods power he hath heart, here i will be the man me as from the common that i may for more to stase, how he has be home, i will be some of our reneral ment our tride the people and heart, to the people, and that we have heardons, hourant the people a ret him straind the people a

In [101]:
import torch.nn.functional as F

def get_next_char(tensor):
    model.eval()
    with torch.no_grad():
        outputs = model(tensor)
    probs = F.softmax(outputs, dim=-1)
    predicted_index = torch.argmax(probs, dim=-1).item()
    predicted_char = index_to_char[predicted_index]
    print(f"Predicted character: {predicted_char}")
    return predicted_index, predicted_char

In [102]:
get_next_char(string_to_tensor("this is a test"))

Predicted character:  


(0, ' ')

In [91]:
# THE FOLLOWING ARE SMALL TESTS

In [45]:
import numpy as np

start = "once upon a time an "
print("Length of start string:",len(start))
vectorised_start = []
for i in start:
    vec = [0] * 35
    vec[char_to_index[i]] = 1
    vectorised_start.append(vec)
for i in range(20 - len(start)):
    padding = [0] * 35
    vectorised_start.insert(0, padding)

vectorised_start = np.array(vectorised_start)

print("Vectorised start string shape:",vectorised_start.shape)

start_encoded_tensor =  torch.tensor(vectorised_start, dtype=torch.float32)
start_encoded_tensor = start_encoded_tensor.unsqueeze(0)

print("Tensor start string shape:",start_encoded_tensor.shape)

Length of start string: 20
Vectorised start string shape: (20, 35)
Tensor start string shape: torch.Size([1, 20, 35])


In [82]:
# To get the character from the tensor
pos = -1
index_to_char[torch.argmax(start_encoded_tensor[0][pos], dim=-1).item()]

' '

In [None]:
model.eval()
with torch.no_grad():
    outputs = model(start_encoded_tensor)

In [None]:
outputs

tensor([[-1.8052, -2.2018, -2.3272, -2.0821, -1.4322, -1.8945, -1.9367, -2.2215,
         -1.8308, -1.4844, -1.7142, -1.9555, -1.7564, -1.2421, -2.4002, -2.0735,
         -2.2867, -1.5462, -1.8495, -2.0603, -1.5991, -2.1289, -1.7808, -0.9559,
         -1.4678, -2.1032, -2.1087, -1.8054, -1.6473, -1.4099, -1.6015, -1.9589,
         -1.9077, -1.5520, -2.0096]])

In [None]:
import torch.nn.functional as F

probs = F.softmax(outputs, dim=-1)

# Get the index of the highest probability
predicted_index = torch.argmax(probs, dim=-1).item()

# Get the predicted character
predicted_char = index_to_char[predicted_index]

print(f"Predicted character: {predicted_char}")

Predicted character: o


In [None]:
new_vec = [0] * 35
new_vec[predicted_index] = 1
vectorised_start = np.append(vectorised_start[1:], [new_vec], axis=0)
# To check if same shape
print("Shape of vectorised start string",vectorised_start.shape)

In [104]:
generated = ""
for i in range(30):
    model.eval()
    with torch.no_grad():
        outputs = model(start_encoded_tensor)
    probs = F.softmax(outputs, dim=-1)

    # Get the index of the highest probability
    predicted_index = torch.argmax(probs, dim=-1).item()

    # Get the predicted character
    predicted_char = index_to_char[predicted_index]

    print(f"Predicted character: {predicted_char}")
    print("First character of string: ",index_to_char[torch.argmax(start_encoded_tensor[0][0], dim=-1).item()])
    generated += predicted_char

    new_vec = [0] * 35
    new_vec[predicted_index] = 1
    vectorised_start = np.append(vectorised_start[1:], [new_vec], axis=0)

    start_encoded_tensor =  torch.tensor(vectorised_start, dtype=torch.float32)
    start_encoded_tensor = start_encoded_tensor.unsqueeze(0)

Predicted character:  
First character of string:  o
Predicted character:  
First character of string:  n
Predicted character:  
First character of string:  c
Predicted character:  
First character of string:  e
Predicted character:  
First character of string:   
Predicted character:  
First character of string:  u
Predicted character:  
First character of string:  p
Predicted character:  
First character of string:  o
Predicted character:  
First character of string:  n
Predicted character:  
First character of string:   
Predicted character:  
First character of string:  a
Predicted character:  
First character of string:   
Predicted character:  
First character of string:  t
Predicted character:  
First character of string:  i
Predicted character:  
First character of string:  m
Predicted character:  
First character of string:  e
Predicted character:  
First character of string:   
Predicted character:  
First character of string:  a
Predicted character:  
First character of stri

In [103]:
# SAVE MODEL
torch.save(model.state_dict(), 'shakespeare_loss_0.0461.pth')

In [None]:
# LOAD MODEL
model = RNNModel(input_size, hidden_size, output_size)

# LOAD MODEL PARAMETERS
model.load_state_dict(torch.load('rnnmodel.pth'))