In [1]:
import os
import random
import numpy as np
import glob
import unidecode
import string
import time
import math
import torch
import torch.nn as nn
from torch.autograd import Variable
from tensorboardX import SummaryWriter

from tqdm import tqdm

%matplotlib inline
import matplotlib.pyplot as plt


In [2]:
model_name = 'bable-v3'
step = 60000
n_characters = 100
model_type = 'gru'
n_steps = 60000
print_steps = 1000
save_steps = 1000
hidden_size = 256
n_layers = 2
learning_rate = 1e-3
seq_len = 128
batch_size = 64
cuda = True
seed = 9999
data_filepath = 'D:\\Data\\northernlion\\output\\nl-isaac.txt'
save_filepath = 'D:\\Models\\Bable\\' + model_name
if not os.path.exists(save_filepath):
    os.makedirs(save_filepath)
log_filepath = 'D:\\Logs\\Bable\\' + model_name
if not os.path.exists(log_filepath):
    os.makedirs(log_filepath)
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic=True

In [3]:
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, model_type="gru", n_layers=1):
        super(CharRNN, self).__init__()
        self.model_type = model_type.lower()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers

        self.encoder = nn.Embedding(input_size, hidden_size)
        if self.model_type == "gru":
            self.rnn = nn.GRU(hidden_size, hidden_size, n_layers)
        elif self.model_type == "lstm":
            self.rnn = nn.LSTM(hidden_size, hidden_size, n_layers)
        self.out = nn.Linear(hidden_size, output_size)

    def forward(self, inputs, hidden):
        batch_size = inputs.size(0)
        encoded = self.encoder(inputs)
        output, hidden = self.rnn(encoded.view(1, batch_size, -1), hidden)
        output = self.out(output.view(batch_size, -1))
        return output, hidden

    def init_hidden(self, batch_size):
        if self.model_type == "lstm":
            return (Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)),
                    Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)))
        else:
            return Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size))


In [4]:
# TODO should process nl-isaac.txt to convert unicode that can't be printed to some equivalent
all_characters = string.printable
n_characters = len(all_characters)

def read_file(filepath):
    file = unidecode.unidecode(open(filepath).read())
    return file, len(file)

# Turning a string into a tensor
def char_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        try:
            tensor[c] = all_characters.index(string[c])
        except:
            continue
    return tensor

# Readable time elapsed
def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return f'{m}m {s:.2f}s'

In [5]:
def save(model, step):
    model_filepath = os.path.join(save_filepath, f'{model_name}-{step}.pth.tar')
    #torch.save(model, model_filepath)
    torch.save({'state_dict': model.state_dict()}, model_filepath)
    print(f'Saved as {model_filepath}')
    
def load(model, step):
    model_filepath = os.path.join(save_filepath, f'{model_name}-{step}.pth.tar')
    checkpoint = torch.load(model_filepath)
    model.load_state_dict(checkpoint['state_dict'])


In [6]:
def generate(model, prime_str='A', predict_len=100, temperature=0.8, cuda=False):
    hidden = model.init_hidden(1)
    prime_input = Variable(char_tensor(prime_str).unsqueeze(0))

    if cuda:
        hidden = hidden.cuda()
        prime_input = prime_input.cuda()
    predicted = prime_str

    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = model(prime_input[:,p], hidden)
        
    inp = prime_input[:, -1]
    
    for p in range(predict_len):
        output, hidden = model(inp, hidden)
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]

        # Add predicted character to string and use as next input
        predicted_char = all_characters[top_i]
        predicted += predicted_char
        inp = Variable(char_tensor(predicted_char).unsqueeze(0))
        if cuda:
            inp = inp.cuda()

    return predicted


In [7]:
# Initialize models and start training

model = CharRNN(
    n_characters,
    hidden_size,
    n_characters,
    model_type=model_type,
    n_layers=n_layers,
)
model_optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

if cuda:
    model.cuda()

In [8]:
load(model, step)
model.eval()

CharRNN(
  (encoder): Embedding(100, 256)
  (rnn): GRU(256, 256, num_layers=2)
  (out): Linear(in_features=256, out_features=100, bias=True)
)

In [11]:
generate(model, prime_str='hey', predict_len=30000, temperature=0.8, cuda=cuda)

"hey were able to go to the shop but we're not gonna be fair to get an angel probably some kind of there the day again we'll see if we did here but just for this and then it might be like a hundred times sure it's worth it I can fly but if we can get enemies lucky me kill them anyway or something on our other tears upgrade without a doubt the bear we could easily get to you know pause and then he shows up and being like you know the the first thing about in my brain speaking back there class because the guy wanted it by hits you have to take a spear at hard an ace of clubs like I like as soon as we don't get a deal with the devil on this floor only sight or decidestroy from a deal with the devil and I'm gonna you know still get one on this floor yeah we don't want to meet the larges in the haunt man and it has nothing nehas bunning to do it and then like two oster is our damage because that's a little more released but we're gonna may yeah I was going to say no problems in a row is a f