In [1]:
import numpy as np
import random
import torch
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F

In [2]:
PATH = "data/"
device = "cuda"
file_name = f"{PATH}nietzsche.txt"
seed = 42

In [3]:
class CharDataset(Dataset):
    def __init__(self, file_name, seq_length):
        with open(file_name, "r") as file:
            self.text = file.read()
        
        self.vocab = sorted(list(set(self.text)))
        self.vocab_size = len(self.vocab)
        
        self.vocab_indices = {c: i for i, c in enumerate(self.vocab)}
        self.indices_vocab = {i: c for i, c in enumerate(self.vocab)}
        self.idx = [self.vocab_indices[c] for c in self.text]
        
        x = np.stack([self.idx[i] for i in range(len(self.idx) - 1)])
        y = np.stack([self.idx[i+1] for i in range(len(self.idx) - 1)])
        self.x = torch.from_numpy(x).long().to(device)
        self.y = torch.from_numpy(y).long().to(device)
        
        self.seq_length = seq_length

    def __len__(self):
        return int(len(self.text) / self.seq_length)

    def random_sequence(self):
        start_index = random.randint(0, len(self.x) - self.seq_length)
        end_index = start_index + self.seq_length
        return self.x[start_index:end_index], self.y[start_index:end_index]

    def __getitem__(self, idx):
        return self.random_sequence()

In [4]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = 1
        
        self.encoder = nn.Embedding(self.input_size, self.hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, self.n_layers)
        self.decoder = nn.Linear(self.hidden_size, self.output_size)
    
    def forward(self, input, hidden):
        bs = input.shape[0]
        input = self.encoder(input.view(1, -1))
        output, hidden = self.gru(input.view(1, bs, -1), hidden)
        output = self.decoder(output.view(bs, -1))
        output = F.log_softmax(output, dim=1)
        return output, hidden

    def init_hidden(self, bs):
        return torch.tensor(torch.zeros(self.n_layers, bs, self.hidden_size)).to(device)

In [5]:
batch_size = 256
seq_length = 64
char_dataset = CharDataset(file_name, seq_length)
char_loader = DataLoader(char_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

In [6]:
n_hidden = 128
n_factor = 50

n_epochs = 100
lr = 1e-2

In [7]:
torch.manual_seed(seed)
vocab_size = char_dataset.vocab_size
model = RNN(vocab_size, n_hidden, vocab_size).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)

In [8]:
loss_list = []
torch.manual_seed(seed)
for epoch in range(0, n_epochs):
    batch_loss = 0
    for _, (x, y) in enumerate(char_loader):
            hidden = model.init_hidden(x.shape[0])
            model.zero_grad()
            loss = 0

            for c in range(seq_length):
                output, hidden = model(x[:, c], hidden)
                loss += F.nll_loss(output, y[:, c])
            
            loss.backward()
            optimizer.step()
            batch_loss += loss
    
    batch_loss = np.round(batch_loss.item() / (seq_length * len(char_loader)), 5)
    loss_list.append(batch_loss)
    
    if epoch % 5 == 0:
        print("Loss at epoch {}: {}".format(epoch, batch_loss))

    if epoch % 10 == 0 and epoch != 0:
        for param_group in optimizer.param_groups:
            param_group['lr'] = param_group["lr"]*0.5
        print("-----------------------------")
        print("Decreasing learning rate to: {}".format(param_group["lr"]))
        print("-----------------------------")

Loss at epoch 0: 2.51995
Loss at epoch 5: 1.60903
Loss at epoch 10: 1.51938
-----------------------------
Decreasing learning rate to: 0.005
-----------------------------
Loss at epoch 15: 1.46629
Loss at epoch 20: 1.44726
-----------------------------
Decreasing learning rate to: 0.0025
-----------------------------
Loss at epoch 25: 1.42147
Loss at epoch 30: 1.41101
-----------------------------
Decreasing learning rate to: 0.00125
-----------------------------
Loss at epoch 35: 1.39633
Loss at epoch 40: 1.39206
-----------------------------
Decreasing learning rate to: 0.000625
-----------------------------
Loss at epoch 45: 1.38521
Loss at epoch 50: 1.37702
-----------------------------
Decreasing learning rate to: 0.0003125
-----------------------------
Loss at epoch 55: 1.37642
Loss at epoch 60: 1.3739
-----------------------------
Decreasing learning rate to: 0.00015625
-----------------------------
Loss at epoch 65: 1.37081
Loss at epoch 70: 1.37656
----------------------------

In [9]:
def string_2_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        tensor[c] = char_dataset.vocab_indices[string[c]]
    return tensor

In [10]:
def generate_text(start_string="T", predict_len=100, temperature=0.8):
    
    hidden = model.init_hidden(bs=1)
    start_input = string_2_tensor(start_string)
    predicted = start_string

    # Use start string to warm up the hidden state
    for p in range(len(start_string) - 1):
        _, hidden = model(start_input[p], hidden)
    inp = start_input[-1].view(1).to(device)

    for p in range(predict_len):
        output, hidden = model(inp, hidden)

        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]

        # Add predicted character to string and use as next input
        predicted_char = char_dataset.indices_vocab[top_i.item()]
        predicted += predicted_char
        inp = string_2_tensor(predicted_char).to(device)
    return predicted

In [11]:
print(generate_text())

The longer, simply the art probably have continger to his low in the present of the latter the bad th
