In [16]:
from torch.utils.data import Dataset, DataLoader
import torch
from torch import nn
from torch.optim import Adam
from tqdm.notebook import trange, tqdm
import numpy as np
import matplotlib.pyplot as plt

In [18]:
filename = "data/macbeth.txt"

text = (open(filename).read())

In [19]:
class TextData(Dataset):
    def __init__(self, text, chunk_sz):
        super().__init__()
        self.__chunk_sz = chunk_sz
        self.__chars = sorted(list(set(text.lower() + text.upper())))
        self.__char2idx = { char : i for i, char in enumerate(self.__chars)}
        self.__idx2char = { i : char for i, char in enumerate(self.__chars)}
        self.__text = text
            
    def encode(self, char):
        return self.__char2idx[char]
    
    def decode(self, idx):
        return self.__idx2char[idx]
            
    def decode_str(self, idxs):
        return ''.join([self.decode(idx.item()) for idx in idxs])
    
    def encode_str(self, string):
        return torch.LongTensor([self.encode(char) for char in string])
    
    def get_vocab_sz(self):
        return len(self.__chars)
            
    def __len__(self):
        return len(self.__text) - self.__chunk_sz
    
    
    def __getitem__(self, idx):
        sample = self.__text[idx : idx + self.__chunk_sz]
        return (self.encode_str(sample[:-1]),
                self.encode_str(sample[1:]))

In [20]:
batch_sz = 256

In [21]:
dataset = TextData(text, 50)
dataloader = DataLoader(dataset, batch_size=batch_sz, shuffle=True)

In [22]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [23]:
class LSTM(nn.Module):
    def __init__(self, num_layers, hidden_sz, vocab_sz, emb_dim):
        super().__init__()
        self.num_layers = num_layers
        self.hidden_sz = hidden_sz
        self.__embedding = nn.Embedding(vocab_sz, emb_dim)
        self.__lstm = nn.LSTM(emb_dim, hidden_sz, num_layers, batch_first=True, dropout=0.2)
        self.__linear = nn.Linear(hidden_sz, vocab_sz)
        
    def forward(self, x, h, c):
        x = self.__embedding(x)
        x, (h, c) = self.__lstm(x, (h, c))
        x = x.reshape(x.shape[0] * x.shape[1], self.hidden_sz)
        x = self.__linear(x)
        return x, (h, c)

In [24]:
model = LSTM(num_layers=5, emb_dim=100, hidden_sz=300, vocab_sz=dataset.get_vocab_sz()).to(device)

In [25]:
optim = Adam(params=model.parameters())
loss_fn = nn.CrossEntropyLoss()

In [26]:
epochs = 25

In [27]:
erange = trange(epochs)
loss_hist = []
model.train()
for i in erange:
    running_loss = 0.0
    runs = 0
    for batch_X, batch_Y in dataloader:
        batch_X = batch_X.to(device)
        batch_Y = batch_Y.to(device)
        model.train()
        model.zero_grad()
        h = torch.zeros((model.num_layers, len(batch_X), model.hidden_sz)).to(device)
        c = torch.zeros((model.num_layers, len(batch_X), model.hidden_sz)).to(device)
        out, (_, _) = model(batch_X, h, c)
        loss = loss_fn(out, batch_Y.view(-1))
        loss.backward()
        optim.step()
        loss_hist.append(loss)
        running_loss += loss
        runs += 1
    erange.desc = f'loss: {running_loss/runs:.4f}'

  0%|          | 0/25 [00:00<?, ?it/s]

In [28]:
model.to('cpu')
torch.save(model.state_dict(), 'pretrained/model.pth')

In [29]:
model.load_state_dict(torch.load('pretrained/model.pth'))
model.to(device)

LSTM(
  (_LSTM__embedding): Embedding(68, 100)
  (_LSTM__lstm): LSTM(100, 300, num_layers=5, batch_first=True, dropout=0.2)
  (_LSTM__linear): Linear(in_features=300, out_features=68, bias=True)
)

In [31]:
start_text = "This"
n = 1000

encoded = dataset.encode_str(start_text).unsqueeze(0).to(device)
output = []
with torch.no_grad():
    h = torch.zeros((model.num_layers, 1, model.hidden_sz)).to(device)
    c = torch.zeros((model.num_layers, 1, model.hidden_sz)).to(device)
    for i in range(n):
        out, (h, c) = model(encoded, h, c)
        out = torch.argmax(out[-1])
        encoded = torch.LongTensor([out]).unsqueeze(1).to(device)
        output.append(out)
    
print(start_text + "".join(dataset.decode_str(output)))

This the Sytor'd

   Seyw. Euery to my blood, this Rather,
Thou seest the Heauens, as they made their prechence.
Cousins, a most alle,
Which was powre in meet, and vnfolt.
The which must consent:
Where is the from them: hole of the Royall Donaland
With a mast of Thrancar, as a day againe
Goues, the pleasure of the time
With vs in Rings. Thou soysones
That truts then speake: the dayy aboulce, and our,
By his the Loues, and what crayse
Compell in the Moon'd?
  Lenox. Make crame our thine poore Mancolme, heare it be with him, but constrained thinke

   Banq. This Guest of Summer,
As basis buried; in forth, in many and fraind,
Then the Tyrants ha's mabord: they stare abiuile haue all:
And be this dead

   Macb. If Chance will haue me King:
I ha's to make them

   Banq. That trusted home,
Might yet enkindle to this Doine, and (tinde,
Then the Charme is firme and good:
Our sping is, shalt be, before the Groomes
Vnment, the good Donquo, hawne with you.

Exeunt. Lords.

Sirrha, a word with you