In [1]:
import unidecode
import string 
import random 
import re 

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable

import time, math

import matplotlib.pyplot as plt 

## load & prepare data

In [2]:
all_characters = string.printable
n_characters = len(all_characters)

data_path = "C:/Users/HomePC/Desktop/papers_implementation/Generating Sequences With Recurrent Neural Networks/data/william_shakepares.txt"
text = unidecode.unidecode(open(data_path).read())
text = text.lower()
text = text.replace('\n', ' ')
text = text[:100_000] # no gpu .... (sad face)

In [3]:
chars = sorted(list(set(text)))
vocab_size = len(chars)

char2idx = {ch: i for i, ch in enumerate(chars)}
idx2char = {i: ch for i, ch in enumerate(chars)}

encoded_text = [char2idx[ch] for ch in text]

In [4]:
class CharDataset(Dataset):
    def __init__(self, encoded_text, seq_length):
        self.seq_length = seq_length
        self.encoded_text = encoded_text

    def __len__(self):
        return len(self.encoded_text) - self.seq_length
    
    def __getitem__(self, idx):
        x = self.encoded_text[idx : idx + self.seq_length]
        y = self.encoded_text[idx+1 : idx + self.seq_length + 1]
        return torch.tensor(x, dtype=torch.long), torch.tensor(y, dtype=torch.long)

    
seq_length = 100
dataset = CharDataset(encoded_text, seq_length)
dataloader = DataLoader(dataset, batch_size=128, shuffle=True, drop_last=True)

In [5]:
class CharRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers=1):
        super(CharRNN, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.vocab_size = vocab_size
    
        self.embed = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)
    
    def forward(self, x, hidden):
        embedded = self.embed(x)
        out, hidden = self.lstm(embedded, hidden)

        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)

        return out, hidden

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        h = weight.new(self.num_layers, batch_size, self.hidden_dim).zero_()
        c = weight.new(self.num_layers, batch_size, self.hidden_dim).zero_()
        return (h, c)

In [6]:
def train(model, dataloader, batch_size, optimizer, criterion):
    model.train()
    hidden = model.init_hidden(batch_size=batch_size)

    for x, y in dataloader:
        x, y = x.to(device), y.to(device)

        hidden = tuple([h.detach() for h in hidden])
        logits, hidden = model(x, hidden)
        loss = criterion(logits, y.view(-1))

        optimizer.zero_grad()
        loss.backward()

        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
        optimizer.step()
        loss.item()

    return loss

In [7]:
def generate_text(model, start_str, char2idx, idx2char, hidden_dim, num_chars=200, temperature=1.0, device='cpu'):
    model.eval()

    input_idxs = [char2idx[ch] for ch in start_str]
    input_tensor = torch.tensor([input_idxs], dtype=torch.long).to(device)

    hidden = model.init_hidden(batch_size=1)

    for i in range(len(start_str) -1):
        _, hidden = model(input_tensor[:, i:i+1], hidden)
    
    out_chars = [ch for ch in start_str]
    current_input = input_tensor[:, -1].unsqueeze(1)

    for i in range(num_chars):
        logits, hidden = model(current_input, hidden)
        logits = logits.squeeze(0)

        probs = F.softmax(logits / temperature, dim=0).detach().cpu().numpy()

        next_idx = torch.multinomial(torch.tensor(probs), 1).item()
        out_chars.append(idx2char[next_idx])

        current_inpt = torch.tensor([[next_idx]], dtype=torch.long).to(device)
    
    return "".join(out_chars)

In [10]:
embedding_dim = 128
hidden_dim = 256
num_layers = 2
lr = 0.001
num_epochs = 2_000
batch_size = 128
plot_every = 10
print_every = 100


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CharRNN(vocab_size, embedding_dim, hidden_dim, num_layers).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

In [11]:
all_losses = []
loss_avg = 0

for epoch in range(1, num_epochs + 1):
    loss = train(model=model, dataloader=dataloader, batch_size=batch_size, optimizer=optimizer, criterion=criterion)
    loss_avg += loss

    if epoch % plot_every == 0:
        print(f'Epoch: {epoch} | Loss: {loss:.4f}')
        start_string = 'to be'
        print(generate_text(model, start_string, char2idx, idx2char, hidden_dim, num_chars=200, temperature=1.0, device=device))
        
    if epoch % plot_every == 0:
        all_losses.append(loss_avg / plot_every)
        loss_avg = 0

KeyboardInterrupt: 

In [1]:
[0.40/0.90, 0.25/0.90, 0.15/0.90, 0.10/0.9]

[0.4444444444444445,
 0.2777777777777778,
 0.16666666666666666,
 0.11111111111111112]

In [7]:
import numpy as np
np.sum([0.44, 0.28, 0.17, 0.11])

np.float64(1.0)