In [1]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F

In [2]:
with open('got.txt','r') as f:
    text = f.read()

In [3]:
text[0:1000]

'A Game Of Thrones \nBook One of A Song of Ice and Fire \nBy George R. R. Martin \nPROLOGUE \n"We should start back," Gared urged as the woods began to grow dark around them. "The wildlings are \ndead." \n"Do the dead frighten you?" Ser Waymar Royce asked with just the hint of a smile. \nGared did not rise to the bait. He was an old man, past fifty, and he had seen the lordlings come and go. \n"Dead is dead," he said. "We have no business with the dead." \n"Are they dead?" Royce asked softly. "What proof have we?" \n"Will saw them," Gared said. "If he says they are dead, that\'s proof enough for me." \nWill had known they would drag him into the quarrel sooner or later. He wished it had been later rather \nthan sooner. "My mother told me that dead men sing no songs," he put in. \n"My wet nurse said the same thing, Will," Royce replied. "Never believe anything you hear at a woman\'s \ntit. There are things to be learned even from the dead." His voice echoed, too loud in the twilit fores

In [4]:
# Get all unique characters in the text
chars = tuple(set(text))

# Dictionaries from char to index and index to char
idx2char = dict(enumerate(chars))
char2idx = {ch: i for i,ch in idx2char.items()}

# According to the dictionary, convert the characters in the book to integers
encoded = np.array([char2idx[ch] for ch in text])


In [5]:
encoded[0:100]

array([ 3,  1, 63, 74,  6, 70,  1, 64, 10,  1, 34,  0, 54, 30, 28, 70, 58,
        1, 16, 47, 30, 30, 43,  1, 64, 28, 70,  1, 30, 10,  1,  3,  1, 27,
       30, 28, 23,  1, 30, 10,  1, 75, 56, 70,  1, 74, 28, 11,  1, 52, 38,
       54, 70,  1, 16, 47, 62,  1, 63, 70, 30, 54, 23, 70,  1, 46, 49,  1,
       46, 49,  1, 32, 74, 54, 61, 38, 28,  1, 16, 44, 46, 64, 42, 64, 63,
       71, 53,  1, 16,  4, 15, 70,  1, 58,  0, 30, 51, 14, 11,  1])

In [6]:
# Apparently. As you can see in our char-RNN image above, our LSTM expects an input that is one-hot encoded meaning that 
# each character is converted into an integer (via our created dictionary) and then converted into a column vector where 
# only it's corresponding integer index will have the value of 1 and the rest of the vector will be filled with 0's. Since 
# we're one-hot encoding the data, let's make a function to do that!

def one_hot_encoder(arr, n_labels):
    
    # Initialize the the encoded array
    one_hot = np.zeros((arr.size, n_labels), dtype=np.float32)
    
    # Fill the appropriate elements with ones
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    
    # Finally reshape it to get back to the original array
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    
    return one_hot

In [7]:
one_hot_encoder(np.array([3,5,1]),10)

array([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)

In [8]:
def batch_generator(arr, batch_size, seq_length):
    # No. of characters in a batch = batch_size * seq_length
    # Therefore no. of batches => 
    
    k = arr.size//(batch_size * seq_length) # No. of batches
    arr = arr[0:(batch_size * seq_length * k)]
    
    # Reshape array into arrays of size length batch_size
    arr = arr.reshape(batch_size,-1)
    
    # Now we iterate over the array all at once with a window of batch_size x seq_length, then advance 
    # by seq_length
    
    for n in range(0,arr.shape[1],seq_length):
        x = arr[:,n:(n+seq_length)]
        
        # The targets are x shifted by one (cause we're predicting the next character)
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y


In [9]:
batches = batch_generator(encoded, 8, 50)
x, y = next(batches)

In [10]:
# printing out the first 10 items in a sequence
print('x\n', x[:, :10])
print('\ny\n', y[:, :10])

x
 [[ 3  1 63 74  6 70  1 64 10  1]
 [15  0 70 28  1  0 70  1 16 54]
 [16 32 30 54  6 30 28 61  1 67]
 [ 0 70  1 43 38 28 23 17  4  1]
 [ 1 29 70 54  1 70 62 70 58  1]
 [67  0 70 28 70 19 70 54  1 16]
 [28 49  4  1 16  4  9 30 51 54]
 [54 74  0  1 10 54 30 67 28 70]]

y
 [[ 1 63 74  6 70  1 64 10  1 34]
 [ 0 70 28  1  0 70  1 16 54 70]
 [32 30 54  6 30 28 61  1 67 74]
 [70  1 43 38 28 23 17  4  1 72]
 [29 70 54  1 70 62 70 58  1 67]
 [ 0 70 28 70 19 70 54  1 16  0]
 [49  4  1 16  4  9 30 51 54  1]
 [74  0  1 10 54 30 67 28 70 11]]


In [11]:
# check if GPU is available
train_on_gpu = torch.cuda.is_available()
if(train_on_gpu):
    print('Training on GPU!')
else: 
    print('No GPU available, training on CPU; consider making n_epochs very small.')

Training on GPU!


In [12]:
class GotRNN(nn.Module):
    def __init__(self, chars, hidden_size, n_layers, drop_prob = 0.8, lr = 0.001):
        
        super(GotRNN, self).__init__()

        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.drop_prob = drop_prob
        self.lr = lr
        
        # Creating dictionaries
        self.chars = chars
        self.idx2char = dict(enumerate(self.chars))
        self.char2idx = {ch: i for i,ch in self.idx2char.items()}
        
        self.lstm = nn.LSTM(len(self.chars), hidden_size, n_layers, dropout = drop_prob, batch_first = True)
        
        self.dropout = nn.Dropout(drop_prob)
        
        self.fc = nn.Linear(hidden_size, len(self.chars)) # Output size is also the length of the text, just one forwarded.
        
    def forward(self, x, hidden):
        
        out, hidden = self.lstm(x, hidden)
        
        out = self.dropout(out)
        out = out.contiguous().view(-1, self.hidden_size)

        out = self.fc(out)
        
        return out, hidden
    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x hidden_size,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        
        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_size).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.hidden_size).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_size).zero_(),
                      weight.new(self.n_layers, batch_size, self.hidden_size).zero_())
        
        return hidden  
    
    
    


In [13]:
# Hyperparameters
epochs = 20
batch_size = 64
seq_length = 64
lr = 0.001
clip = 5
val_frac = 0.2
print_every = 64
drop_prob = 0.5

hidden_size = 512
n_layers = 2
net = GotRNN(chars, hidden_size, n_layers, drop_prob=drop_prob)
print(net)

GotRNN(
  (lstm): LSTM(78, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5)
  (fc): Linear(in_features=512, out_features=78, bias=True)
)


In [14]:
# Loss and Optimizer
opt = torch.optim.Adam(net.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

In [15]:
def train(net, data, epochs, batch_size, seq_length, lr, clip, val_frac, print_every):
    
    net.train()
    
    # Validation data
    val_idx = int(len(data)*(1-val_frac))
    data, val_data = data[:val_idx], data[val_idx:]
    
    if(train_on_gpu):
        net.cuda()
    
    counter = 0
    n_chars = len(net.chars)
    for e in range(epochs):
        h = net.init_hidden(batch_size)
        
        for x, y in batch_generator(data, batch_size, seq_length):
            
            counter += 1
            
            # One-hot encode our data and make them Torch tensors
            x = one_hot_encoder(x, n_chars)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            
            if(train_on_gpu):
                inputs, targets = inputs.cuda(), targets.cuda()

            # Create new variable for the hidden state
            h = tuple([each.data for each in h])
            
            # Zero accumulated grads
            net.zero_grad()
            
            output, h = net(inputs, h)
            
            loss = criterion(output, targets.view(batch_size*seq_length).long())
            loss.backward()
            
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            opt.step()
            
            # loss stats
            if counter % print_every == 0:
                # Get validation loss
                val_h = net.init_hidden(batch_size)
                val_losses = []
                net.eval()
                for x, y in batch_generator(val_data, batch_size, seq_length):
                    # One-hot encode our data and make them Torch tensors
                    x = one_hot_encoder(x, n_chars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)
                    
                    # Creating new variables for the hidden state, otherwise
                    # we'd backprop through the entire training history
                    val_h = tuple([each.data for each in val_h])
                    
                    inputs, targets = x, y
                    if(train_on_gpu):
                        inputs, targets = inputs.cuda(), targets.cuda()

                    output, val_h = net(inputs, val_h)
                    val_loss = criterion(output, targets.view(batch_size*seq_length).long())
                
                    val_losses.append(val_loss.item())
                
                net.train() # reset to train mode after iterationg through validation data
                
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))
            
            
            
        

In [16]:
train(net, encoded, epochs, batch_size, seq_length, lr, clip, val_frac, print_every)

Epoch: 1/20... Step: 64... Loss: 3.0895... Val Loss: 3.0825
Epoch: 1/20... Step: 128... Loss: 2.8238... Val Loss: 2.7622
Epoch: 1/20... Step: 192... Loss: 2.4209... Val Loss: 2.3484
Epoch: 1/20... Step: 256... Loss: 2.2366... Val Loss: 2.1980
Epoch: 2/20... Step: 320... Loss: 2.1264... Val Loss: 2.0852
Epoch: 2/20... Step: 384... Loss: 2.0582... Val Loss: 1.9988
Epoch: 2/20... Step: 448... Loss: 1.9383... Val Loss: 1.9254
Epoch: 2/20... Step: 512... Loss: 1.9479... Val Loss: 1.8648
Epoch: 2/20... Step: 576... Loss: 1.8944... Val Loss: 1.8153
Epoch: 3/20... Step: 640... Loss: 1.8932... Val Loss: 1.7597
Epoch: 3/20... Step: 704... Loss: 1.7709... Val Loss: 1.7164
Epoch: 3/20... Step: 768... Loss: 1.7507... Val Loss: 1.6811
Epoch: 3/20... Step: 832... Loss: 1.6929... Val Loss: 1.6443
Epoch: 3/20... Step: 896... Loss: 1.7242... Val Loss: 1.6147
Epoch: 4/20... Step: 960... Loss: 1.6761... Val Loss: 1.5867
Epoch: 4/20... Step: 1024... Loss: 1.6239... Val Loss: 1.5659
Epoch: 4/20... Step: 108

In [17]:
def predict(net, char, h=None, top_k=None):
        ''' Given a character, predict the next character.
            Returns the predicted character and the hidden state.
        '''
        
        # tensor inputs
        x = np.array([[net.char2idx[char]]])
        x = one_hot_encoder(x, len(net.chars))
        inputs = torch.from_numpy(x)
        
        if(train_on_gpu):
            inputs = inputs.cuda()
        
        # detach hidden state from history
        h = tuple([each.data for each in h])
        # get the output of the model
        out, h = net(inputs, h)

        # get the character probabilities
        p = F.softmax(out, dim=1).data
        if(train_on_gpu):
            p = p.cpu() # move to cpu
        
        # get top characters
        if top_k is None:
            top_ch = np.arange(len(net.chars))
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()
        
        # select the likely next character with some element of randomness
        p = p.numpy().squeeze()
        char = np.random.choice(top_ch, p=p/p.sum())
        
        # return the encoded value of the predicted char and the hidden state
        return net.idx2char[char], h

In [38]:
def sample(net, size, prime='The', top_k=None):
        
    if(train_on_gpu):
        net.cuda()
    else:
        net.cpu()
    
    net.eval() # eval mode
    
    # First off, run through the prime characters
    chars = [ch for ch in prime]
    h = net.init_hidden(1)
    for ch in prime:
        char, h = predict(net, ch, h, top_k=top_k)

    chars.append(char)
    
    # Now pass in the previous character and get a new one
    for ii in range(size):
        char, h = predict(net, chars[-1], h, top_k=top_k)
        chars.append(char)

    return ''.join(chars)

In [39]:
print(sample(net,1000,prime='The',top_k=2))

There was a shoulder of the stabling blade of his sister's bed the boy with a black brother of the Kingsguard. The steward would not have been the boy when he had never seen him a second, and the word was a strength of the 
silver on the story, and her father was a stallion, a boy with the bastard of the Kingsguard. "It's not that the man's bear, the boy was the second way at her from the stables." 
"I wonder," she said, "but the man is too stand and still here." Her brother was almost as the stone stars were to start to him. "The gods were there and so much to see." 
"There's not the boy," she told him." 
"The gods are all the south, the words will be a single of your sister's stale." 
"I don't want to say, I'm to start to the 
boy and the 
bastard with a silver tower to see you," she said, her fingers and shouted to the stables. 
"The king was all a start to step and see that the stars," she said, "the boy will never be a silver than you and the 
seal was all, a man's brothers, and I

In [40]:
# Saving the model
model_name = 'GotRNN.net'

checkpoint = {'hidden_size': net.hidden_size,
              'n_layers': net.n_layers,
              'state_dict': net.state_dict(),
              'tokens': net.chars,
              'opt': opt.state_dict(),}

with open(model_name, 'wb') as f:
    torch.save(checkpoint, f)

In [41]:
# Loading the model
with open('GotRNN.net', 'rb') as f:
    checkpoint = torch.load(f)
    
loaded = GotRNN(checkpoint['tokens'], hidden_size=checkpoint['hidden_size'], n_layers=checkpoint['n_layers'])
loaded.load_state_dict(checkpoint['state_dict'])
opt.load_state_dict(checkpoint['opt'])

In [45]:
# Sample using a loaded model
# print(sample(loaded, 2000, top_k=2, prime="The Lord of the Seven Kingdoms and Protector"))
print(sample(loaded, 2000, top_k=2, prime="Bastard"))


Bastard," he told him. "I will not be a longsword as they were to the stables." 
"I will be a strong," said Ser Jorah Mormont and the 
book of her son. "I have not been the bastards and the 
battlements of all of them, the bastard man to see the 
second woman the man's been a start of a boy. I have no company of the 
boy as the boy was the stables and stars and the sept of the seven seat, a moment the man was a stranger and start and too soon of her brother and the sept, and they wore the stallion of their 
silent stores, and her father was the stony stalls, and the septa to the 
stalls of the Wall, the 
bastard would be a single of the 
stables. They said, I have not brought your 
beautiful as the sept on the stalleon and the 
stone with a silver than the 
boy when he had told the stallion where he was the boy with the stableboys, and the stars were to see them to her. 
The septa would be the story of his sons, the sept of trees and start of the 
sides and the stone was a shadow of th