In [0]:
# Importing Libraries
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

In [0]:
# Open data containing Shakespear's poetry
with open('shakespear.txt') as f:
    text = f.read()

In [5]:
# Sample of the data
text[:113]

"\n                     1\n  From fairest creatures we desire increase,\n  That thereby beauty's rose might never die"

In [0]:
# Encoding the text and map each character into number and vice-versa

# We create 2 dctonaries
# int2char: maps number into characters
# char2int: maps characters into numbers
chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {ch:ii for ii, ch in int2char.items()}

In [7]:
print(chars)

('k', 'S', '5', 'Y', '8', 'n', ':', 'm', 't', '?', 'j', '1', '9', '[', 'Z', 'e', 'N', 'o', ' ', '4', '0', 'Q', '&', 'H', 'f', 'u', '2', 'P', 'L', 'M', 'q', 'c', 'p', 'W', ')', 'T', 's', 'r', 'w', '|', 'D', 'J', 'X', 'F', 'A', 'R', 'B', 'C', 'z', 'i', 'a', 'U', '"', 'y', 'v', '_', '\n', 'V', '3', '>', "'", '`', '(', ';', '<', 'h', '6', 'g', 'G', 'l', 'b', 'K', 'I', 'd', 'O', '-', 'x', '}', '7', '!', ',', ']', 'E', '.')


In [0]:
# Encode the text
encode = np.array([char2int[ch] for ch in text])

In [9]:
encode[1]

18

In [10]:
print(char2int['\n'])

56


In [0]:
# Defining method to encode one hot labels
def one_hot_encode(arr, n_labels):
    # Initilize the one hot encoded array
    one_hot = np.zeros( (np.multiply(*arr.shape), n_labels), dtype=np.float32)
    
    # Fill the appropriate elements with ones
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1
    
    # Finally reshape it to get back the original array
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    
    return one_hot

In [0]:
# Defining method to make mini-batches for training
def get_batches(arr, batch_size, seq_length):
    '''Create a generator that returns batches of size
       batch_size x seq_length from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       batch_size: Batch size, the number of sequences per batch
       seq_length: Number of encoded chars in a sequence
    '''
    
    batch_size_total = batch_size * seq_length
    
    # total number of batches we can make
    n_batches = len(arr)//batch_size_total
    
    # Keep only enough characters to make full batches
    arr = arr[:n_batches * batch_size_total]
    # Reshape into batch_size rows
    arr = arr.reshape((batch_size, -1))
    
    # iterate through the array, one sequence at a time
    for n in range(0, arr.shape[1], seq_length):
        # The features
        x = arr[:, n:n+seq_length]
        # The targets, shifted by one
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y


In [0]:
# Check gpu is available
train_on_gpu = torch.cuda.is_available()
#train_on_gpu

In [0]:
# Declaring the model
class CharRNN(nn.Module):
    
    def __init__(self, tokens, n_hidden=256, n_layers=4,
                               drop_prob=0.5, lr=0.001):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr
        
        # creating character dictionaries
        self.chars = tokens
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch: ii for ii, ch in self.int2char.items()}
        
        #define the LSTM
        self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers, 
                            dropout=drop_prob, batch_first=True)
        
        #define a dropout layer
        self.dropout = nn.Dropout(drop_prob)
        
        
        #define the final, fully-connected output layer
        self.fc = nn.Linear(n_hidden, len(self.chars))
      
    
    def forward(self, x, hidden):
        ''' Forward pass through the network. 
            These inputs are x, and the hidden/cell state `hidden`. '''
                
        #get the outputs and the new hidden state from the lstm
        r_output, hidden = self.lstm(x, hidden)
        
        #pass through a dropout layer
        out = self.dropout(r_output)
        
        # Stack up LSTM outputs using view
        out = out.contiguous().view(-1, self.n_hidden)
        
        #put x through the fully-connected layer
        out = self.fc(out)
        
        # return the final output and the hidden state
        return out, hidden
    
    
    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x n_hidden,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        
        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
        
        return hidden

In [0]:
# Declaring the train method
def train(net, data, epochs=10, batch_size=10, seq_length=50, lr=0.001, clip=5, val_frac=0.1, print_every=10):
    ''' Training a network 
    
        Arguments
        ---------
        
        net: CharRNN network
        data: text data to train the network
        epochs: Number of epochs to train
        batch_size: Number of mini-sequences per mini-batch, aka batch size
        seq_length: Number of character steps per mini-batch
        lr: learning rate
        clip: gradient clipping
        val_frac: Fraction of data to hold out for validation
        print_every: Number of steps for printing training and validation loss
    
    '''
    net.train()
    
    opt = torch.optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    
    # create training and validation data
    val_idx = int(len(data)*(1-val_frac))
    data, val_data = data[:val_idx], data[val_idx:]
    
    if(train_on_gpu):
        net.cuda()
    
    counter = 0
    n_chars = len(net.chars)
    for e in range(epochs):
        # initialize hidden state
        h = net.init_hidden(batch_size)
        
        for x, y in get_batches(data, batch_size, seq_length):
            counter += 1
            
            # One-hot encode our data and make them Torch tensors
            x = one_hot_encode(x, n_chars)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            
            if(train_on_gpu):
                inputs, targets = inputs.cuda(), targets.cuda()

            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history
            h = tuple([each.data for each in h])

            # zero accumulated gradients
            net.zero_grad()
            
            # get the output from the model
            output, h = net(inputs, h)
            
            # calculate the loss and perform backprop
            loss = criterion(output, targets.view(batch_size*seq_length).long())
            loss.backward()
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            opt.step()
            
            # loss stats
            if counter % print_every == 0:
                # Get validation loss
                val_h = net.init_hidden(batch_size)
                val_losses = []
                net.eval()
                for x, y in get_batches(val_data, batch_size, seq_length):
                    # One-hot encode our data and make them Torch tensors
                    x = one_hot_encode(x, n_chars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)
                    
                    # Creating new variables for the hidden state, otherwise
                    # we'd backprop through the entire training history
                    val_h = tuple([each.data for each in val_h])
                    
                    inputs, targets = x, y
                    if(train_on_gpu):
                        inputs, targets = inputs.cuda(), targets.cuda()

                    output, val_h = net(inputs, val_h)
                    val_loss = criterion(output, targets.view(batch_size*seq_length).long())
                
                    val_losses.append(val_loss.item())
                
                net.train() # reset to train mode after iterationg through validation data
                
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))

In [16]:
# Define and print the net
n_hidden=512
n_layers=2

net = CharRNN(chars, n_hidden, n_layers)
print(net)

# Declaring the hyperparameters
batch_size = 128
seq_length = 100
n_epochs = 25 # start smaller if you are just testing initial behavior

# train the model
train(net, encode, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=100)

CharRNN(
  (lstm): LSTM(84, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5)
  (fc): Linear(in_features=512, out_features=84, bias=True)
)
Epoch: 1/25... Step: 100... Loss: 3.1870... Val Loss: 3.1209
Epoch: 1/25... Step: 200... Loss: 2.6308... Val Loss: 2.5452
Epoch: 2/25... Step: 300... Loss: 2.2705... Val Loss: 2.1459
Epoch: 2/25... Step: 400... Loss: 2.0948... Val Loss: 1.9773
Epoch: 2/25... Step: 500... Loss: 1.9114... Val Loss: 1.8583
Epoch: 3/25... Step: 600... Loss: 1.8315... Val Loss: 1.7722
Epoch: 3/25... Step: 700... Loss: 1.7667... Val Loss: 1.7003
Epoch: 3/25... Step: 800... Loss: 1.7734... Val Loss: 1.6405
Epoch: 4/25... Step: 900... Loss: 1.6770... Val Loss: 1.6031
Epoch: 4/25... Step: 1000... Loss: 1.6212... Val Loss: 1.5696
Epoch: 4/25... Step: 1100... Loss: 1.6208... Val Loss: 1.5379
Epoch: 5/25... Step: 1200... Loss: 1.5793... Val Loss: 1.5183
Epoch: 5/25... Step: 1300... Loss: 1.5419... Val Loss: 1.4924
Epoch: 5/25... Step: 1400... Loss: 

In [0]:
# Defining a method to generate the next character
def predict(net, char, h=None, top_k=None):
        ''' Given a character, predict the next character.
            Returns the predicted character and the hidden state.
        '''
        
        # tensor inputs
        x = np.array([[net.char2int[char]]])
        x = one_hot_encode(x, len(net.chars))
        inputs = torch.from_numpy(x)
        
        if(train_on_gpu):
            inputs = inputs.cuda()
        
        # detach hidden state from history
        h = tuple([each.data for each in h])
        # get the output of the model
        out, h = net(inputs, h)

        # get the character probabilities
        p = F.softmax(out, dim=1).data
        if(train_on_gpu):
            p = p.cpu() # move to cpu
        
        # get top characters
        if top_k is None:
            top_ch = np.arange(len(net.chars))
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()
        
        # select the likely next character with some element of randomness
        p = p.numpy().squeeze()
        char = np.random.choice(top_ch, p=p/p.sum())
        
        # return the encoded value of the predicted char and the hidden state
        return net.int2char[char], h

In [0]:
# Declaring a method to generate new text
def sample(net, size, prime='The', top_k=None):
        
    if(train_on_gpu):
        net.cuda()
    else:
        net.cpu()
    
    net.eval() # eval mode
    
    # First off, run through the prime characters
    chars = [ch for ch in prime]
    h = net.init_hidden(1)
    for ch in prime:
        char, h = predict(net, ch, h, top_k=top_k)

    chars.append(char)
    
    # Now pass in the previous character and get a new one
    for ii in range(size):
        char, h = predict(net, chars[-1], h, top_k=top_k)
        chars.append(char)

    return ''.join(chars)

In [30]:
# Generating new text
print(sample(net, 1000, prime='H', top_k=5))

Hy sere trind
    As a that him hits that he word or me, to blow,
                       
                                                    Eneen I with thy more tone wises be the mast
 
                                                   Exeunt PALESAR, Angeare a doors]              "   
    And the will on that the Kert and thom sore,
                              Enent the CORES, INDIAND, INLEND OR ANTOR OF OLDOSTALE Th          THe talled are the starl, the well.
    I thou to but the mean of that so stees thou a dore
    ar we him is as the wiss bat in the pores an this and
    then thin here a thee sele the pontens thee weal me mare thee mastel of to the
    beasites be wour hiss outhing and the best in sing.
  PORIUS. I ame ald the more weer siss thee thou speit.
  PURIAN. His somenen sime to but thou and a mast.
    That with and as such it hene and bound his bare.
    Ther wise all to my sule, thee to that shell ane my buld
    And hine of me so alle wath the manter to be see

In [34]:
# Generating new text
print(sample(net, 500, prime='I', top_k=5))

IDAULES OF SOLENET, and SOMERSET,
                                       Exit SHELIND and LANDES

  SIR JUHN. A weary save your pleasuse and marry on,
    We was a must shall not be mercy'd to mischance,
    For the minister to the company of my
    That I have break a praises and such torce of man,
    What it then to survey heaven to speak a prepent
    And then with survey and such arts at these speess
    The sell of the will should heard all some speak,
    Wherein a conforn that the wish's t


In [22]:
# Generating new text
print(sample(net, 500, prime='I Like', top_k=5))

I Like trees,
    That I will stay well.
  BATTH. Take my lord, I will but most must be my son;
    To make thy soul a whell there is to make.
    The matter will not serve
    The patch of her thrist, to the whom hath say
    Antent them; fir, the warlike and a sort
    Where his own life is so tried with me.
    Words to your Grace so shall I die as still
    With trade of steep against him, who impain'd him
    That take her strangles thou and stead of speed;
    Ther thou art all my peace.
  BASSAN
