In [66]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from textdataset import TextDataset
from rnn import RNN

In [59]:
with open('../Texts/shelley.txt', 'r', encoding="utf-8") as fp:
    text=fp.read()

start_idx = text.find('Letter 1\n\n')
end_idx = text.find('End of the Project Gutenberg')
text = text[start_idx:end_idx]
char_set = set(text)

In [60]:
chars_sorted = sorted(char_set)
char2int = {ch:i for i,ch in enumerate(chars_sorted)}
char_array = np.array(chars_sorted)
text_encoded = np.array(
    [char2int[ch] for ch in text],
    dtype=np.int32
)
print('Text encoded shape:', text_encoded.shape)
print(text[:15], '== Encoding ==>', text_encoded[:15])
print(text_encoded[15:21], '== Reverse ==>',
      ''.join(char_array[text_encoded[15:21]]))

for ex in text_encoded[:5]:
    print('{} -> {}'.format(ex, char_array[ex]))

Text encoded shape: (437427,)
Letter 1

_To M == Encoding ==> [36 57 72 72 57 70  1 13  0  0 52 44 67  1 37]
[70 71 10  1 43 53] == Reverse ==> rs. Sa
36 -> L
57 -> e
72 -> t
72 -> t
57 -> e


In [61]:
seq_length = 40         # sequence length
chunk_size = seq_length + 1
text_chunks = [text_encoded[i:i+chunk_size]
               for i in range(len(text_encoded)-chunk_size)]

In [62]:
seq_dataset = TextDataset(torch.tensor(np.array(text_chunks)))

for i, (seq, target) in enumerate(seq_dataset):
    print(' Input (x): ',
          repr(''.join(char_array[seq])))
    print('Traget (y): ',
          repr(''.join(char_array[target])))
    print()
    if i == 1:
        break

 Input (x):  'Letter 1\n\n_To Mrs. Saville, England._\n\n\n'
Traget (y):  'etter 1\n\n_To Mrs. Saville, England._\n\n\nS'

 Input (x):  'etter 1\n\n_To Mrs. Saville, England._\n\n\nS'
Traget (y):  'tter 1\n\n_To Mrs. Saville, England._\n\n\nSt'



In [63]:
batch_size = 64
torch.manual_seed(1)
seq_dl = DataLoader(seq_dataset, batch_size=batch_size, 
                    shuffle=True, drop_last=True)

### Creating the model

In [67]:
vocab_size = len(char_array)
embed_dim = 256
rnn_hidden_size = 512
torch.manual_seed(1)
model = RNN(vocab_size, embed_dim, rnn_hidden_size)

# optimizer and loss
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

Training the model

In [68]:
num_epochs = 10_000
torch.manual_seed(1)

for epoch in range(num_epochs):
    hidden, cell = model.init_hidden(batch_size)
    seq_batch, target_batch = next(iter(seq_dl))            # ?
    optimizer.zero_grad()
    
    loss = 0
    for c in range(seq_length):
        pred, hidden, cell = model(seq_batch[:, c], hidden, cell)
        loss += loss_fn(pred, target_batch[:, c])
    loss.backward()
    optimizer.step()
    loss = loss.item() / seq_length
    if epoch % 500 == 0:
        print(f'Epoch {epoch} loss: {loss:.4f}')

AttributeError: 'RNN' object has no attribute 'embedding'