## At first I create a char level prediction to compare to other models and then I will go for word level to get more meaningful result

As you can see LSTM makes way more meaningful char level predictions compare to simple rnn and embedding. The word level predictions with LSTM will be available soon. 

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader

In [2]:
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'
print(f"Device is: {device}")

Device is: cuda


In [3]:
for i in range(torch.cuda.device_count()):
    print(torch.cuda.get_device_name(i))

NVIDIA GeForce RTX 4070


In [4]:
with open('wizard_of_oz.txt', 'r', encoding='utf-8') as f:
    text = f.read()

In [5]:
chars = sorted(set(text))
print(chars)
vocab_size = len(chars)
print(vocab_size)

['\n', ' ', '!', '"', '$', '%', '&', "'", '(', ')', '*', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '¹', '‒', '—', '―', '‘', '’', '“', '”', '•', '™', '♠', '♦', '\ufeff']
96


In [6]:
char_to_idx = {char: idx for idx, char in enumerate(chars)}
idx_to_char = {idx: char for idx, char in enumerate(chars)}

In [7]:
class TextDataset(Dataset):
    def __init__(self, text, seq_length):
        chars = sorted(list(set(text)))
        self.char_to_idx = {char: idx for idx, char in enumerate(chars)}
        self.idx_to_char = {idx: char for idx, char in enumerate(chars)}
        self.vocab_size = len(chars)
        self.seq_length = seq_length
        self.data = [self.char_to_idx[char] for char in text]

        self.inputs = []
        self.targets = []
        for i in range(0, len(self.data) - seq_length):
            self.inputs.append(self.data[i:i + seq_length])
            self.targets.append(self.data[i + 1:i + seq_length + 1])

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return torch.tensor(self.inputs[idx], dtype=torch.long), torch.tensor(self.targets[idx], dtype=torch.long)

# Load the text data
with open('wizard_of_oz.txt', 'r', encoding='utf-8') as f:
    text = f.read()

seq_length = 50
dataset = TextDataset(text, seq_length)
batch_size = 32
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim

class SimpleLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True, num_layers=6)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.lstm(x, hidden)
        out = self.fc(out)
        return out, hidden

    def init_hidden(self, batch_size):
        # LSTM has two hidden states, hidden state and cell state
        return (torch.zeros(6, batch_size, self.hidden_size),
                torch.zeros(6, batch_size, self.hidden_size))

# Model parameters
input_size = dataset.vocab_size
hidden_size = 128
output_size = dataset.vocab_size

# Initialize model, loss function, and optimizer
model = SimpleLSTM(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Example of how to use init_hidden
batch_size = 32  # Example batch size
hidden = model.init_hidden(batch_size)

In [9]:
model.to(device)

SimpleLSTM(
  (embedding): Embedding(96, 128)
  (lstm): LSTM(128, 128, num_layers=6, batch_first=True)
  (fc): Linear(in_features=128, out_features=96, bias=True)
)

In [10]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for x_batch, y_batch in dataloader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        
        # Initialize hidden state for the current batch
        hidden = model.init_hidden(x_batch.size(0))
        hidden = (hidden[0].to(device), hidden[1].to(device))
        
        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass
        output, hidden = model(x_batch, hidden)
        
        # Calculate loss
        loss = criterion(output.view(-1, output_size), y_batch.view(-1))
        
        # Backward pass
        loss.backward()
        
        # Update parameters
        optimizer.step()
        
        total_loss += loss.item()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(dataloader):.4f}')

Epoch [1/10], Loss: 1.5201
Epoch [2/10], Loss: 1.1072
Epoch [3/10], Loss: 0.9887
Epoch [4/10], Loss: 0.9178
Epoch [5/10], Loss: 0.8719
Epoch [6/10], Loss: 0.8398
Epoch [7/10], Loss: 0.8162
Epoch [8/10], Loss: 0.7982
Epoch [9/10], Loss: 0.7838
Epoch [10/10], Loss: 0.7719


In [23]:
predicted_indices = output.argmax(dim=-1)

predicted_indices = predicted_indices.view(-1).tolist()

print(len(predicted_indices))

predicted_chars = [dataset.idx_to_char[idx] for idx in predicted_indices]

predicted_text = ''.join(predicted_chars)

print("\nPredicted Text from the last batch of the last epoch:")
print(predicted_text)
# for one layer of LSTM
# Epoch [1/10], Loss: 1.4503
# Epoch [2/10], Loss: 1.2842
# Epoch [3/10], Loss: 1.2499
# Epoch [4/10], Loss: 1.2320
# Epoch [5/10], Loss: 1.2211
# Epoch [6/10], Loss: 1.2134
# Epoch [7/10], Loss: 1.2080
# Epoch [8/10], Loss: 1.2035
# Epoch [9/10], Loss: 1.2004
# Epoch [10/10], Loss: 1.1974

600

Predicted Text from the last batch of the last epoch:
e  tis neiore  Tocelyod will nive me treatly se tene cg aoom the storp toews of the snsisible.tytus t wauld nae tou w hm sore tou wiuld ne teeutiful  r ess, Ieent we  aheet fesus  ahat Ihe  aay becenthae toristians aeu man sith tils wontinence ooe ouh  an teleueof ter sisitars, The leople wad niavnedt saapehinking tefore the sord  tnd tt eng  aIee e  ohe  elves   seclared the Woy.

"Whe Wrincess O the sonerable torden te ees and tiy tn the eaek   u aust beow  and t way factiredoe sremucei tn thehee  iiraauld bot boinge teaces oilh tt ipter tly  o oy tn o  an 't it " 
"In ior thet "ah cre nn th


In [38]:
print(output.shape)

predicted_indices = output.argmax(dim=-1)

print(predicted_indices.shape)

predicted_indices = predicted_indices.view(-1).tolist()

print(len(predicted_indices))

predicted_chars = [dataset.idx_to_char[idx] for idx in predicted_indices]

predicted_text = ''.join(predicted_chars)

print("\nPredicted Text from the last batch of the last epoch:")
print(predicted_text)
# two layer, two times of training loop
# Epoch [1/10], Loss: 1.3919
# Epoch [2/10], Loss: 1.1777
# Epoch [3/10], Loss: 1.1169
# Epoch [4/10], Loss: 1.0814
# Epoch [5/10], Loss: 1.0575
# Epoch [6/10], Loss: 1.0408
# Epoch [7/10], Loss: 1.0285
# Epoch [8/10], Loss: 1.0189
# Epoch [9/10], Loss: 1.0113
# Epoch [10/10], Loss: 1.0052
# Epoch [1/10], Loss: 1.0002
# Epoch [2/10], Loss: 0.9959
# Epoch [3/10], Loss: 0.9922
# Epoch [4/10], Loss: 0.9891
# Epoch [5/10], Loss: 0.9865
# Epoch [6/10], Loss: 0.9844
# Epoch [7/10], Loss: 0.9822
# Epoch [8/10], Loss: 0.9804
# Epoch [9/10], Loss: 0.9785
# Epoch [10/10], Loss: 0.9768


torch.Size([12, 50, 96])
torch.Size([12, 50])
600

Predicted Text from the last batch of the last epoch:
    ehe Wirl tnguired.

"N will sead you eo bt, Iotnd tove on tuch a  efful sistance.aoom the srave.he wrlh oraw aeoodes she  wrt the buggy ao ether ae thken uff trom his potours anong thet meaple  whrns ng  _mmgust 10, h754..

                 * *   f ol th mfhers. tn yoe wnew  it was aure tlestid n ae n ixen mere anazed toet torothy at ahe e ltreteice 

"T -I m 'mraid,ye s -ae's aon ing tway!" c
ome  Ta boul ws areatly beawn aut oster God, aor u  ns d  Oomwn wuesday  February 10 at was mrt ip tndther  a could naarce telp muew  ng thrrs  ah th th the crnversion of Ghe mrwr mnsians, 
W have nu


In [42]:
print(output.shape)

predicted_indices = output.argmax(dim=-1)

print(predicted_indices.shape)

predicted_indices = predicted_indices.view(-1).tolist()

print(len(predicted_indices))

predicted_chars = [dataset.idx_to_char[idx] for idx in predicted_indices]

predicted_text = ''.join(predicted_chars)

print("\nPredicted Text from the last batch of the last epoch:")
print(predicted_text)

# 5 layers of LSTM
# Epoch [1/10], Loss: 1.4840
# Epoch [2/10], Loss: 1.1002
# Epoch [3/10], Loss: 0.9972
# Epoch [4/10], Loss: 0.9340
# Epoch [5/10], Loss: 0.8914
# Epoch [6/10], Loss: 0.8604
# Epoch [7/10], Loss: 0.8377
# Epoch [8/10], Loss: 0.8198
# Epoch [9/10], Loss: 0.8055
# Epoch [10/10], Loss: 0.7936


torch.Size([12, 50, 96])
torch.Size([12, 50])
600

Predicted Text from the last batch of the last epoch:
e  thell I be one of the children 

Sunday 24. In   te tome anain wfter all
the dood ohmes,I ve had tect be ducked wefore Ghe  wam hecome pood corizen thet tsual to reairedfrom the warld, yea aven fro  tre  thtl he pccepteble to the geous readers 
“Aeteurs  ao an ungulrded hhrds ―You will not  by doe aven foe wressed Serf- Ihe  hhall I be gossessedte trad to weet aour w'm sure."

"Sou day be tight trtnthe buggy edgewise, wh it would toke up the weion oawedould I stng ” And calling on these tho w  deof atcrined townellsed upward aor alpay  and tet i mao am nhe caast cf all the wtrvants,Ihould b


In [12]:
print(output.shape)

predicted_indices = output.argmax(dim=-1)

print(predicted_indices.shape)

predicted_indices = predicted_indices.view(-1).tolist()

print(len(predicted_indices))

predicted_chars = [dataset.idx_to_char[idx] for idx in predicted_indices]

predicted_text = ''.join(predicted_chars)

print("\nPredicted Text from the last batch of the last epoch:")
print(predicted_text)

# 6 layers of LSTM
# Epoch [1/10], Loss: 1.5201
# Epoch [2/10], Loss: 1.1072
# Epoch [3/10], Loss: 0.9887
# Epoch [4/10], Loss: 0.9178
# Epoch [5/10], Loss: 0.8719
# Epoch [6/10], Loss: 0.8398
# Epoch [7/10], Loss: 0.8162
# Epoch [8/10], Loss: 0.7982
# Epoch [9/10], Loss: 0.7838
# Epoch [10/10], Loss: 0.7719

torch.Size([12, 50, 96])
torch.Size([12, 50])
600

Predicted Text from the last batch of the last epoch:
nnd  aotshall bat thembrod of tho wont._ He said ue oicedtn  Th my love,  hes is aight, pnd may you  I mow gach dis these ovcellent anes of the bnrth e  the Wizard. "arnestly. "Oet us nll me d mappy tht wid not wrcore  an it i was ahere tod hiuld navs sonntrnance  and hf thet Ieace which passeth alld  ahrned antbhe airection of the Cinld. and the bhthe witonished oap-horse eouling wner mnd aver  ahtnough tor aim to boaw mhe buggy aasily acter aim aee  wf the snfluence  of the sin.of righteousneshtf y anil tondrnually_. She wad nothrwer to pray n dtoom his focket and wook hrom tt woveral otarp 
