First, we load some books from project Gutenberg into strings.

In [43]:
# Setup

# Characters are represented by 1-hot vectors of size 128
char_dim = 128

In [44]:
import numpy as np
import os
from collections import Counter
import unicodedata
import string
import gc

import torch
from torch import nn
import torch.nn.functional as F
from torch.nn import LSTM
from torch import optim

In [79]:
np.random.seed(seed=12345)

# replaces special characters with their close equivalents in order to simplify the characters that appear
def clean_text(text):
    return str(unicodedata.normalize('NFD', text).encode('ascii', 'ignore'))

gutenberg_dir = 'Gutenberg/txt/'
gutenberg_files = os.listdir(gutenberg_dir)
myfiles = np.random.choice(gutenberg_files, 5)
mystrings = []
counters = []
combined_counter = Counter()
for file in myfiles:
    print('reading file %s' % file)
    myfile = open(gutenberg_dir + file, 'r')
    file_text = clean_text(myfile.read())
    print('read %d characters' % len(file_text))
    mystrings += [file_text]
    myfile.close()
    counter = Counter(file_text)
    counters += [counter]
    combined_counter += counter

print(combined_counter)

for key in combined_counter.keys():
    if ord(key) >= 128:
        print('invalid character value found: %s has numeric value %d', key, ord(key))

reading file Thornton Waldo Burgess___The Adventures of Jimmy Skunk.txt
read 86954 characters
reading file Louisa May Alcott___Shawl-Straps.txt
read 257608 characters
reading file Andrew Lang___John Knox and the Reformation.txt
read 488156 characters
reading file Sir Richard Francis Burton___To the Gold Coast for Gold, Volume 1.txt
read 514103 characters
reading file Daniel Defoe___The History of the Devil.txt
read 721216 characters
Counter({' ': 334089, 'e': 189864, 'n': 143002, 't': 138085, 'a': 123952, 'o': 115740, 'i': 101535, 's': 95039, 'h': 93515, 'r': 91812, 'd': 63674, 'l': 61959, '\\': 43719, 'u': 40924, 'c': 36985, 'f': 34188, ',': 33755, 'm': 33514, 'w': 30568, 'g': 27188, 'y': 27143, 'p': 25581, 'b': 22758, 'v': 16037, '.': 14271, '_': 11357, 'k': 9653, "'": 7853, 'T': 5103, 'S': 4880, '-': 4789, 'I': 4730, 'A': 4378, ';': 4296, 'C': 4223, 'M': 4133, 'D': 3790, '"': 3653, 'x': 3487, 'P': 3446, 'H': 3002, 'B': 2914, 'F': 2461, 'E': 2446, 'W': 2307, '1': 2301, 'L': 2278, 'R'

Train an LSTM on this data

In [4]:
# converts a list of N strings of length T into a numpy array of 1-hot vectors
# input size: (N, T)
# output size: (T, N, 128)
i128 = np.eye(128)
def char_to_ix(texts):
    ords = np.array([[ord(char) for char in text] for text in texts], dtype=int)
    return i128[ords].transpose((1, 0, 2))

# converts a list of N strings of length T into a numpy array of length (T, N)
def char_to_array(texts):
    ords = np.array([[ord(char) for char in text] for text in texts], dtype=int)
    return ords.transpose((1, 0))

#data = char_to_ix(mystrings[0])

In [130]:
class MyLSTM(nn.Module):
    def __init__(self, hidden_dim, num_stacks):
        super(MyLSTM, self).__init__()
        self.hidden_dim = hidden_dim

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm = nn.LSTM(char_dim, hidden_dim, num_layers=num_stacks)
        
        # The linear layer that maps from hidden state space to character space
        self.hidden2char = nn.Linear(hidden_dim, char_dim)
        self.init_hidden_zeros(1)
    
    def init_hidden_zeros(self, minibatch_size):
        self.init_hidden(torch.zeros((self.lstm.num_layers, minibatch_size, self.hidden_dim)), torch.zeros((self.lstm.num_layers, minibatch_size, self.hidden_dim)))
    
    def init_hidden(self, h, c):
        self.hidden = (h, c)

    def forward(self, text):
        # text should be of size (T, N, char_dim)
        # returns character scores of size (T, N, char_dim)
        
        hs, self.hidden = self.lstm(text, self.hidden)
        char_space = self.hidden2char(hs)
        return char_space

In [128]:
def model_loss(model, loss_func, data_ix, data_array):
    model.lstm.eval()
    model.init_hidden_zeros(data_ix.shape[1])
    sequence_in = data_ix[:-1, :, :]
    minibatch_size = data_ix.shape[1]
    loss = 0
    with torch.no_grad():
        for i, char_in in enumerate(sequence_in):
            char_scores = model(char_in.view(1, minibatch_size, -1))
            loss += loss_func(char_scores.view(-1, char_dim), data_array[i+1,:])
    model.lstm.train()
    return loss / len(sequence_in)

In [116]:
# free some memory if possible
train_data = None
val_data = None
test_data = None
val_data_ix = None
val_data_array = None
test_data_ix = None
test_data_array = None
gc.collect()

train_data = ''
val_data = ''
test_data = ''

for string in mystrings:
    train_data += string[:len(string) * 8 // 10]
    val_data += string[len(string) * 8 // 10:len(string) * 9 // 10]
    test_data += string[len(string) * 9 // 10:]

train_data_ix = torch.tensor(char_to_ix([train_data]), dtype=torch.float)
train_data_array = torch.tensor(char_to_array([train_data])).view(-1, 1)

val_data_ix = torch.tensor(char_to_ix([val_data]), dtype=torch.float)
val_data_array = torch.tensor(char_to_array([val_data])).view(-1, 1)

test_data_ix = torch.tensor(char_to_ix([test_data]), dtype=torch.float)
test_data_array = torch.tensor(char_to_array([test_data])).view(-1, 1)

print(len(train_data))
print(len(val_data))
print(len(test_data))


#train_data = mystrings[0][:90000]
#
## data_ix is of shape (T, N, char_dim) while data_array is of shape (T, N)
#train_data_ix = torch.tensor(char_to_ix([train_data]), dtype=torch.float)
#train_data_array = torch.tensor(char_to_array([train_data])).view(-1, 1)
#
#val_data = mystrings[0][-100000:-50000]
#
## data_ix is of shape (T, N, char_dim) while data_array is of shape (T, N)
#val_data_ix = torch.tensor(char_to_ix([val_data]), dtype=torch.float)
#val_data_array = torch.tensor(char_to_array([val_data])).view(-1, 1)
#
#test_data = mystrings[0][-50000:]
#
## data_ix is of shape (T, N, char_dim) while data_array is of shape (T, N)
#test_data_ix = torch.tensor(char_to_ix([test_data]), dtype=torch.float)
#test_data_array = torch.tensor(char_to_array([test_data])).view(-1, 1)

1654427
206804
206806


In [135]:
model = MyLSTM(64, 1)
#model.load_state_dict(torch.load('model_checkpoint_h100_3layer_epoch9'))
loss_func = torch.nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(), lr=0.01)
seq_len = 1000
minibatch_size = 8

train_losses = []
val_losses = []

for epoch in range(30):
    print('on epoch %d' % epoch)
    for i in range(len(train_data) // (seq_len * minibatch_size)):
        print('\r\ton iteration %d / %d' % (i, len(train_data) // (seq_len * minibatch_size)), end='')
        model.zero_grad()
        model.init_hidden_zeros(minibatch_size)
        
        sequence_in = torch.zeros((seq_len - 1, minibatch_size, char_dim))
        sequence_out = torch.zeros((seq_len - 1, minibatch_size), dtype=torch.long)
        for b in range(minibatch_size):
            sequence_in[:,b,:] = train_data_ix[seq_len * (i * minibatch_size + b)
                                               :
                                               seq_len * (i * minibatch_size + b + 1) - 1
                                               ,0,:
                                              ]
            
            sequence_out[:,b] =  train_data_array[seq_len * (i * minibatch_size + b) + 1
                                                  :
                                                  seq_len * (i * minibatch_size + b + 1) 
                                                  ,0
                                                 ]
        #sequence_in = train_data_ix[seq_len * i:seq_len * (i + 1) - 1, :, :]
        #sequence_out = train_data_array[seq_len * i + 1:seq_len * (i + 1), :]
        
        char_scores = model(sequence_in)
        loss = loss_func(char_scores.view(-1, char_dim), sequence_out.view(-1))
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
        optimizer.step()
    print()
    train_loss = model_loss(model, loss_func, train_data_ix[10000:20000,:,:], train_data_array[10000:20000,:])
    val_loss = model_loss(model, loss_func, val_data_ix[:10000,:,:], val_data_array[:10000,:])
    print('\ttraining loss = %f' % train_loss)
    print('\tvalidation loss = %f' % val_loss)
    train_losses += [train_loss]
    val_losses += [val_loss]
    torch.save(model.state_dict(), 'model_checkpoint_2_h64_3layer_epoch' + str(epoch))

train_loss = model_loss(model, loss_func, train_data_ix[:100000,:,:], train_data_array[:100000,:])
val_loss = model_loss(model, loss_func, val_data_ix, val_data_array)
print('\ttraining loss = %f' % train_loss)
print('\tvalidation loss = %f' % val_loss)
train_losses += [train_loss]
val_losses += [val_loss]

on epoch 0
	on iteration 205 / 206
	training loss = 2.170997
	validation loss = 2.238158
on epoch 1
	on iteration 205 / 206
	training loss = 2.015709
	validation loss = 2.088683
on epoch 2
	on iteration 205 / 206
	training loss = 1.896007
	validation loss = 1.986391
on epoch 3
	on iteration 205 / 206
	training loss = 1.838398
	validation loss = 1.927146
on epoch 4
	on iteration 205 / 206
	training loss = 1.805333
	validation loss = 1.893997
on epoch 5
	on iteration 205 / 206
	training loss = 1.773635
	validation loss = 1.856635
on epoch 6
	on iteration 205 / 206
	training loss = 1.752420
	validation loss = 1.829991
on epoch 7
	on iteration 205 / 206
	training loss = 1.737824
	validation loss = 1.809633
on epoch 8
	on iteration 205 / 206
	training loss = 1.723420
	validation loss = 1.791808
on epoch 9
	on iteration 205 / 206
	training loss = 1.716309
	validation loss = 1.781031
on epoch 10
	on iteration 205 / 206
	training loss = 1.707075
	validation loss = 1.769508
on epoch 11
	on iter

In [136]:
model = MyLSTM(64, 3)
loss_func = torch.nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(), lr=0.01)
seq_len = 1000
minibatch_size = 8

train_losses_h64_l3 = []
val_losses_h64_l3 = []

for epoch in range(100):
    print('on epoch %d' % epoch)
    for i in range(len(train_data) // (seq_len * minibatch_size)):
        print('\r\ton iteration %d / %d' % (i, len(train_data) // (seq_len * minibatch_size)), end='')
        model.zero_grad()
        model.init_hidden_zeros(minibatch_size)
        
        sequence_in = torch.zeros((seq_len - 1, minibatch_size, char_dim))
        sequence_out = torch.zeros((seq_len - 1, minibatch_size), dtype=torch.long)
        for b in range(minibatch_size):
            sequence_in[:,b,:] = train_data_ix[seq_len * (i * minibatch_size + b)
                                               :
                                               seq_len * (i * minibatch_size + b + 1) - 1
                                               ,0,:
                                              ]
            
            sequence_out[:,b] =  train_data_array[seq_len * (i * minibatch_size + b) + 1
                                                  :
                                                  seq_len * (i * minibatch_size + b + 1) 
                                                  ,0
                                                 ]
        #sequence_in = train_data_ix[seq_len * i:seq_len * (i + 1) - 1, :, :]
        #sequence_out = train_data_array[seq_len * i + 1:seq_len * (i + 1), :]
        
        char_scores = model(sequence_in)
        loss = loss_func(char_scores.view(-1, char_dim), sequence_out.view(-1))
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
        optimizer.step()
    print()
    train_loss = model_loss(model, loss_func, train_data_ix[10000:20000,:,:], train_data_array[10000:20000,:])
    val_loss = model_loss(model, loss_func, val_data_ix[:10000,:,:], val_data_array[:10000,:])
    print('\ttraining loss = %f' % train_loss)
    print('\tvalidation loss = %f' % val_loss)
    train_losses_h64_l3 += [train_loss]
    val_losses_h64_l3 += [val_loss]
    torch.save(model.state_dict(), 'model_checkpoint_2_h64_3layer_epoch' + str(epoch))

train_loss = model_loss(model, loss_func, train_data_ix[:100000,:,:], train_data_array[:100000,:])
val_loss = model_loss(model, loss_func, val_data_ix, val_data_array)
print('training loss = %f' % train_loss)
print('validation loss = %f' % val_loss)
train_losses_h64_l3 += [train_loss]
val_losses_h64_l3 += [val_loss]

##############################

model = MyLSTM(64, 3)
loss_func = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
seq_len = 1000
minibatch_size = 8

train_losses_h64_l3 = []
val_losses_h64_l3 = []

for epoch in range(100):
    print('on epoch %d' % epoch)
    for i in range(len(train_data) // (seq_len * minibatch_size)):
        print('\r\ton iteration %d / %d' % (i, len(train_data) // (seq_len * minibatch_size)), end='')
        model.zero_grad()
        model.init_hidden_zeros(minibatch_size)
        
        sequence_in = torch.zeros((seq_len - 1, minibatch_size, char_dim))
        sequence_out = torch.zeros((seq_len - 1, minibatch_size), dtype=torch.long)
        for b in range(minibatch_size):
            sequence_in[:,b,:] = train_data_ix[seq_len * (i * minibatch_size + b)
                                               :
                                               seq_len * (i * minibatch_size + b + 1) - 1
                                               ,0,:
                                              ]
            
            sequence_out[:,b] =  train_data_array[seq_len * (i * minibatch_size + b) + 1
                                                  :
                                                  seq_len * (i * minibatch_size + b + 1) 
                                                  ,0
                                                 ]
        #sequence_in = train_data_ix[seq_len * i:seq_len * (i + 1) - 1, :, :]
        #sequence_out = train_data_array[seq_len * i + 1:seq_len * (i + 1), :]
        
        char_scores = model(sequence_in)
        loss = loss_func(char_scores.view(-1, char_dim), sequence_out.view(-1))
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
        optimizer.step()
    print()
    train_loss = model_loss(model, loss_func, train_data_ix[10000:20000,:,:], train_data_array[10000:20000,:])
    val_loss = model_loss(model, loss_func, val_data_ix[:10000,:,:], val_data_array[:10000,:])
    print('\ttraining loss = %f' % train_loss)
    print('\tvalidation loss = %f' % val_loss)
    train_losses_h64_l3 += [train_loss]
    val_losses_h64_l3 += [val_loss]
    torch.save(model.state_dict(), 'model_checkpoint_2_h64_3layer_epoch' + str(epoch))

train_loss = model_loss(model, loss_func, train_data_ix[:100000,:,:], train_data_array[:100000,:])
val_loss = model_loss(model, loss_func, val_data_ix, val_data_array)
print('training loss = %f' % train_loss)
print('validation loss = %f' % val_loss)
train_losses_h64_l3 += [train_loss]
val_losses_h64_l3 += [val_loss]

##############################


model = MyLSTM(128, 3)
loss_func = torch.nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(), lr=0.01)
seq_len = 1000
minibatch_size = 8

train_losses_h128_l3 = []
val_losses_h128_l3 = []

for epoch in range(100):
    print('on epoch %d' % epoch)
    for i in range(len(train_data) // (seq_len * minibatch_size)):
        print('\r\ton iteration %d / %d' % (i, len(train_data) // (seq_len * minibatch_size)), end='')
        model.zero_grad()
        model.init_hidden_zeros(minibatch_size)
        
        sequence_in = torch.zeros((seq_len - 1, minibatch_size, char_dim))
        sequence_out = torch.zeros((seq_len - 1, minibatch_size), dtype=torch.long)
        for b in range(minibatch_size):
            sequence_in[:,b,:] = train_data_ix[seq_len * (i * minibatch_size + b)
                                               :
                                               seq_len * (i * minibatch_size + b + 1) - 1
                                               ,0,:
                                              ]
            
            sequence_out[:,b] =  train_data_array[seq_len * (i * minibatch_size + b) + 1
                                                  :
                                                  seq_len * (i * minibatch_size + b + 1) 
                                                  ,0
                                                 ]
        #sequence_in = train_data_ix[seq_len * i:seq_len * (i + 1) - 1, :, :]
        #sequence_out = train_data_array[seq_len * i + 1:seq_len * (i + 1), :]
        
        char_scores = model(sequence_in)
        loss = loss_func(char_scores.view(-1, char_dim), sequence_out.view(-1))
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
        optimizer.step()
    print()
    train_loss = model_loss(model, loss_func, train_data_ix[10000:20000,:,:], train_data_array[10000:20000,:])
    val_loss = model_loss(model, loss_func, val_data_ix[:10000,:,:], val_data_array[:10000,:])
    print('\ttraining loss = %f' % train_loss)
    print('\tvalidation loss = %f' % val_loss)
    train_losses_h128_l3 += [train_loss]
    val_losses_h128_l3 += [val_loss]
    torch.save(model.state_dict(), 'model_checkpoint_2_h128_3layer_epoch' + str(epoch))

train_loss = model_loss(model, loss_func, train_data_ix[:100000,:,:], train_data_array[:100000,:])
val_loss = model_loss(model, loss_func, val_data_ix, val_data_array)
print('training loss = %f' % train_loss)
print('validation loss = %f' % val_loss)
train_losses_h128_l3 += [train_loss]
val_losses_h128_l3 += [val_loss]

#################################

model = MyLSTM(64, 2)
loss_func = torch.nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(), lr=0.01)
seq_len = 1000
minibatch_size = 8

train_losses_h64_l2 = []
val_losses_h64_l2 = []

for epoch in range(100):
    print('on epoch %d' % epoch)
    for i in range(len(train_data) // (seq_len * minibatch_size)):
        print('\r\ton iteration %d / %d' % (i, len(train_data) // (seq_len * minibatch_size)), end='')
        model.zero_grad()
        model.init_hidden_zeros(minibatch_size)
        
        sequence_in = torch.zeros((seq_len - 1, minibatch_size, char_dim))
        sequence_out = torch.zeros((seq_len - 1, minibatch_size), dtype=torch.long)
        for b in range(minibatch_size):
            sequence_in[:,b,:] = train_data_ix[seq_len * (i * minibatch_size + b)
                                               :
                                               seq_len * (i * minibatch_size + b + 1) - 1
                                               ,0,:
                                              ]
            
            sequence_out[:,b] =  train_data_array[seq_len * (i * minibatch_size + b) + 1
                                                  :
                                                  seq_len * (i * minibatch_size + b + 1) 
                                                  ,0
                                                 ]
        #sequence_in = train_data_ix[seq_len * i:seq_len * (i + 1) - 1, :, :]
        #sequence_out = train_data_array[seq_len * i + 1:seq_len * (i + 1), :]
        
        char_scores = model(sequence_in)
        loss = loss_func(char_scores.view(-1, char_dim), sequence_out.view(-1))
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
        optimizer.step()
    print()
    train_loss = model_loss(model, loss_func, train_data_ix[10000:20000,:,:], train_data_array[10000:20000,:])
    val_loss = model_loss(model, loss_func, val_data_ix[:10000,:,:], val_data_array[:10000,:])
    print('\ttraining loss = %f' % train_loss)
    print('\tvalidation loss = %f' % val_loss)
    train_losses_h64_l2 += [train_loss]
    val_losses_h64_l2 += [val_loss]
    torch.save(model.state_dict(), 'model_checkpoint_2_h64_2layer_epoch' + str(epoch))

train_loss = model_loss(model, loss_func, train_data_ix[:100000,:,:], train_data_array[:100000,:])
val_loss = model_loss(model, loss_func, val_data_ix, val_data_array)
print('training loss = %f' % train_loss)
print('validation loss = %f' % val_loss)
train_losses_h64_l2 += [train_loss]
val_losses_h64_l2 += [val_loss]

##############################

model = MyLSTM(128, 3)
loss_func = torch.nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(), lr=0.01)
seq_len = 1000
minibatch_size = 8

train_losses_h128_l2 = []
val_losses_h128_l2 = []

for epoch in range(100):
    print('on epoch %d' % epoch)
    for i in range(len(train_data) // (seq_len * minibatch_size)):
        print('\r\ton iteration %d / %d' % (i, len(train_data) // (seq_len * minibatch_size)), end='')
        model.zero_grad()
        model.init_hidden_zeros(minibatch_size)
        
        sequence_in = torch.zeros((seq_len - 1, minibatch_size, char_dim))
        sequence_out = torch.zeros((seq_len - 1, minibatch_size), dtype=torch.long)
        for b in range(minibatch_size):
            sequence_in[:,b,:] = train_data_ix[seq_len * (i * minibatch_size + b)
                                               :
                                               seq_len * (i * minibatch_size + b + 1) - 1
                                               ,0,:
                                              ]
            
            sequence_out[:,b] =  train_data_array[seq_len * (i * minibatch_size + b) + 1
                                                  :
                                                  seq_len * (i * minibatch_size + b + 1) 
                                                  ,0
                                                 ]
        #sequence_in = train_data_ix[seq_len * i:seq_len * (i + 1) - 1, :, :]
        #sequence_out = train_data_array[seq_len * i + 1:seq_len * (i + 1), :]
        
        char_scores = model(sequence_in)
        loss = loss_func(char_scores.view(-1, char_dim), sequence_out.view(-1))
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
        optimizer.step()
    print()
    train_loss = model_loss(model, loss_func, train_data_ix[10000:20000,:,:], train_data_array[10000:20000,:])
    val_loss = model_loss(model, loss_func, val_data_ix[:10000,:,:], val_data_array[:10000,:])
    print('\ttraining loss = %f' % train_loss)
    print('\tvalidation loss = %f' % val_loss)
    train_losses_h128_l2 += [train_loss]
    val_losses_h128_l2 += [val_loss]
    torch.save(model.state_dict(), 'model_checkpoint_2_h128_2layer_epoch' + str(epoch))

train_loss = model_loss(model, loss_func, train_data_ix[:100000,:,:], train_data_array[:100000,:])
val_loss = model_loss(model, loss_func, val_data_ix, val_data_array)
print('training loss = %f' % train_loss)
print('validation loss = %f' % val_loss)
train_losses_h128_l2 += [train_loss]
val_losses_h128_l2 += [val_loss]

on epoch 0
	on iteration 205 / 206
	training loss = 2.505617
	validation loss = 2.541927
on epoch 1
	on iteration 25 / 206

KeyboardInterrupt: 

In [73]:
torch.save(model.state_dict(), 'my_test_model')
model2 = MyLSTM(100)
model2.load_state_dict(torch.load('my_test_model'))

#train_loss = model_loss(model2, loss_func, train_data_ix[:50000,:,:], train_data_array[:50000,:])
#val_loss = model_loss(model2, loss_func, val_data_ix, val_data_array)
#print('\ttraining loss = %f' % train_loss)
#print('\tvalidation loss = %f' % val_loss)
model2 = None
gc.collect()

3714

In [40]:
softmax = torch.nn.Softmax()
chars = range(128)

def sample_char(char_scores, temp):
    char_scores = softmax(char_scores / temp)
    char = np.random.choice(chars, p=char_scores.detach().numpy())
    while not chr(char) in string.printable:
        char = np.random.choice(chars, p=char_scores.detach().numpy())
    return char

def sample(model, first_char, init_hidden, T, temp):
    result = first_char
    cur_char = ord(first_char)
    for t in range(T):
        one_hot_char = torch.tensor(i128[cur_char], dtype=torch.float).view(1, 1, -1)
        char_scores = model(one_hot_char)
        cur_char = sample_char(char_scores.view(-1), temp)
        result += chr(cur_char)
    return result

sample(model, 'A', torch.zeros((1, 1, model.hidden_dim)), 500, 0.5)

  """


'Anee and you will have let him goes so much of Hold to the King was a tall that I have never seen the haughance of the King, the King of Navarre, and not the first time to keep me the dead. We will be gone, and that you have been able to me you to the King of Nid-de-Merle, whom I will be a confused of the lady and force of the King of Nid-de-Merle, so dead. We will be a sort of the King father and her own priests were\\nhelp me to the King of Navarre and your good young horse the honour of your sa'