In [1]:
import numpy as np
import torch 
from torch import nn
import torch.nn.functional as F

In [2]:
# open text file and read in data as text
with open('data/data.txt', 'r') as f :
    text  = f.read()

In [3]:
# CHecking out first hundred characters
text[:100]

'Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverythin'

## Tokenization

In [4]:
# Convert text into numerical tokens as models only learn from numerical data
# Providing each ext to a unique index

# Encode the text and map each character to an integer and vice and versa
# chars is a set of all unique characters - using set
chars = tuple(set(text))

# Creating two dictionaries:
# int2char , which maps integers to characters 
# char2int , which maps characters to unique integers 
int2char = dict(enumerate(chars))
char2int = {ch: ii for ii, ch in int2char.items()}

# encode the text
encoded = np.array([char2int[ch] for ch in text])

In [5]:
encoded[:100]

array([68, 66, 67, 56, 25,  0, 37, 24, 57, 81, 81, 81, 17, 67, 56, 56, 71,
       24,  7, 67, 60, 14, 46, 14,  0, 22, 24, 67, 37,  0, 24, 67, 46, 46,
       24, 67, 46, 14, 29,  0, 13, 24,  0, 21,  0, 37, 71, 24, 38, 26, 66,
       67, 56, 56, 71, 24,  7, 67, 60, 14, 46, 71, 24, 14, 22, 24, 38, 26,
       66, 67, 56, 56, 71, 24, 14, 26, 24, 14, 25, 22, 24,  3, 73, 26, 81,
       73, 67, 71, 53, 81, 81, 39, 21,  0, 37, 71, 25, 66, 14, 26])

## Pre-Processing the data

In [6]:
def  one_hot_encode(arr, n_labels):
    
    # Initialize the the encoded array
    one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)
    
    # Fill the appropriate elements with ones
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    
    # Finally reshape it to get back to the original array
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    
    return one_hot

In [7]:
# check that the function works as expected
# one_hot is created with length 8 with 1 present at index [3,5,1]
test_seq = np.array([[3, 5, 1]])
one_hot = one_hot_encode(test_seq, 8)

print(one_hot)

[[[0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 1. 0. 0.]
  [0. 1. 0. 0. 0. 0. 0. 0.]]]


## Making training mini-batches

In [8]:
def get_batches(arr, batch_size, seq_length):
    '''Create a generator that returns batches of size
       batch_size x seq_length from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       batch_size: Batch size, the number of sequences per batch
       seq_length: Number of encoded chars in a sequence
    '''
    batch_size_total = batch_size * seq_length
    # total number of batches we can make
    n_batches = len(arr)//batch_size_total
    
    # Keep only enough characters to make full batches
    arr = arr[:n_batches * batch_size_total]
    # Reshape into batch_size rows
    arr = arr.reshape((batch_size, -1))
    
     # iterate through the array, one sequence at a time
    for n in range(0, arr.shape[1], seq_length):
        # The features
        x = arr[:, n:n+seq_length]
        # The targets, shifted by one
        y = np.zeros_like(x)
        # y has same elements of x all just shifted by 1
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
        #Last element of y is first element of x
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

## Test Your Implementation

In [9]:
# Example set batch size of 8 and 50 sequence steps.

batches = get_batches(encoded, 8, 50)
x, y = next(batches)


# printing out the first 10 items in a sequence# printi 
print('x\n', x[:10, :10])
print('\ny\n', y[:10, :10])

x
 [[68 66 67 56 25  0 37 24 57 81]
 [22  3 26 24 25 66 67 25 24 67]
 [ 0 26  4 24  3 37 24 67 24  7]
 [22 24 25 66  0 24 40 66 14  0]
 [24 22 67 73 24 66  0 37 24 25]
 [40 38 22 22 14  3 26 24 67 26]
 [24 27 26 26 67 24 66 67  4 24]
 [41 34 46  3 26 22 29 71 53 24]]

y
 [[66 67 56 25  0 37 24 57 81 81]
 [ 3 26 24 25 66 67 25 24 67 25]
 [26  4 24  3 37 24 67 24  7  3]
 [24 25 66  0 24 40 66 14  0  7]
 [22 67 73 24 66  0 37 24 25  0]
 [38 22 22 14  3 26 24 67 26  4]
 [27 26 26 67 24 66 67  4 24 22]
 [34 46  3 26 22 29 71 53 24 43]]


## Defining the network with PyTorch

In [17]:
class CharRNN(nn.Module):
    
    def __init__(self , tokens, n_hidden = 256, n_layers = 2, drop_prob = 0.5 , lr = 0.001):
        
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr
        
        
        # Creating character dictionaries 
        self.chars = tokens
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch: ii for ii, ch in self.int2char.items()}
        # Defining a LSTM
        self.lstm = nn.LSTM(256,2, dropout = drop_prob, batch_first=True)
        # Adding a Dropout
        self.dropout = nn.Dropout(drop_prob)
        
        # Defining a fully connected output layer
        self.fc = nn.Linear(n_hidden, len(self.chars))
        
    def forward(self,x, hidden):
        
        ''' Forward pass through the network. 
            These inputs are x, and the hidden/cell state `hidden`. '''
                
            # Outputs from the hidden state of lstm 
        r_output, hidden = self.lstm(x, hidden)
            
            # Pass output through a dropout layer 
        out = self.dropout(r_output)
            
             # Stack up LSTM outputs using view
            # you may need to use contiguous to reshape the output
        out = out.contiguous().view(-1, self.n_hidden)
        
            ## TODO: put x through the fully-connected layer
        out = self.fc(out)

            # return the final output and the hidden state
        return out, hidden
            
     
    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x n_hidden,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        
        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
        
        return hidden
        

## Time to train
