In [0]:
import numpy as np
import torch
from torch import nn, optim
import torch.nn.functional as F

In [0]:
import base64
import requests

In [0]:
master = "https://raw.githubusercontent.com/HimanshuSuman7/nd101_recurrent_neural_network/master/Data/anna_karenina.txt"
req = requests.get(master)
TEXT = req.text

### Loading Data

In [0]:
# with open("/content/anna_karenina.txt", "rb") as obj:
    # text = obj.read()

In [8]:
TEXT[:100]

'Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverythin'

### Tokenization
Creating two dictionaries:
1. int2char -> maps integers to the characters
2. char2int -> maps characters to unique integers

In [12]:
chars = tuple(set(TEXT))
print(chars)

(';', ')', 'C', '\n', 'B', '4', ' ', 'i', '"', 'K', 'x', 'Z', 'T', 'L', 'a', 'j', '`', 'F', 'y', 'X', 'I', 't', '&', '@', 'c', 'p', 'h', 'A', 'w', 'V', '$', 'z', '_', 'b', '%', 'v', '9', 'J', 'k', 's', 'R', 'P', 'M', 'E', '(', '7', '1', 'u', 'S', 'o', 'q', 'g', 'N', 'l', 'n', 'G', 'Y', '8', '?', 'm', 'Q', '!', '.', '3', '6', '/', '5', ',', '0', 'e', '*', '2', 'f', 'O', "'", 'D', 'W', 'r', 'U', 'H', '-', 'd', ':')


In [13]:
int2char = dict(enumerate(chars))
print(int2char)

{0: ';', 1: ')', 2: 'C', 3: '\n', 4: 'B', 5: '4', 6: ' ', 7: 'i', 8: '"', 9: 'K', 10: 'x', 11: 'Z', 12: 'T', 13: 'L', 14: 'a', 15: 'j', 16: '`', 17: 'F', 18: 'y', 19: 'X', 20: 'I', 21: 't', 22: '&', 23: '@', 24: 'c', 25: 'p', 26: 'h', 27: 'A', 28: 'w', 29: 'V', 30: '$', 31: 'z', 32: '_', 33: 'b', 34: '%', 35: 'v', 36: '9', 37: 'J', 38: 'k', 39: 's', 40: 'R', 41: 'P', 42: 'M', 43: 'E', 44: '(', 45: '7', 46: '1', 47: 'u', 48: 'S', 49: 'o', 50: 'q', 51: 'g', 52: 'N', 53: 'l', 54: 'n', 55: 'G', 56: 'Y', 57: '8', 58: '?', 59: 'm', 60: 'Q', 61: '!', 62: '.', 63: '3', 64: '6', 65: '/', 66: '5', 67: ',', 68: '0', 69: 'e', 70: '*', 71: '2', 72: 'f', 73: 'O', 74: "'", 75: 'D', 76: 'W', 77: 'r', 78: 'U', 79: 'H', 80: '-', 81: 'd', 82: ':'}


In [14]:
char2int = {ch: count for count, ch in int2char.items()}
print(char2int)

{';': 0, ')': 1, 'C': 2, '\n': 3, 'B': 4, '4': 5, ' ': 6, 'i': 7, '"': 8, 'K': 9, 'x': 10, 'Z': 11, 'T': 12, 'L': 13, 'a': 14, 'j': 15, '`': 16, 'F': 17, 'y': 18, 'X': 19, 'I': 20, 't': 21, '&': 22, '@': 23, 'c': 24, 'p': 25, 'h': 26, 'A': 27, 'w': 28, 'V': 29, '$': 30, 'z': 31, '_': 32, 'b': 33, '%': 34, 'v': 35, '9': 36, 'J': 37, 'k': 38, 's': 39, 'R': 40, 'P': 41, 'M': 42, 'E': 43, '(': 44, '7': 45, '1': 46, 'u': 47, 'S': 48, 'o': 49, 'q': 50, 'g': 51, 'N': 52, 'l': 53, 'n': 54, 'G': 55, 'Y': 56, '8': 57, '?': 58, 'm': 59, 'Q': 60, '!': 61, '.': 62, '3': 63, '6': 64, '/': 65, '5': 66, ',': 67, '0': 68, 'e': 69, '*': 70, '2': 71, 'f': 72, 'O': 73, "'": 74, 'D': 75, 'W': 76, 'r': 77, 'U': 78, 'H': 79, '-': 80, 'd': 81, ':': 82}


In [16]:
encode_text = np.array([char2int[ch] for ch in TEXT])
encode_text[:100]

array([ 2, 26, 14, 25, 21, 69, 77,  6, 46,  3,  3,  3, 79, 14, 25, 25, 18,
        6, 72, 14, 59,  7, 53,  7, 69, 39,  6, 14, 77, 69,  6, 14, 53, 53,
        6, 14, 53,  7, 38, 69,  0,  6, 69, 35, 69, 77, 18,  6, 47, 54, 26,
       14, 25, 25, 18,  6, 72, 14, 59,  7, 53, 18,  6,  7, 39,  6, 47, 54,
       26, 14, 25, 25, 18,  6,  7, 54,  6,  7, 21, 39,  6, 49, 28, 54,  3,
       28, 14, 18, 62,  3,  3, 43, 35, 69, 77, 18, 21, 26,  7, 54])

### Data Pre-Processing

In [0]:
def one_hot_encode(arr, n_labels):
    # initialize the encoded array
    one_hot = np.zeros((arr.size, n_labels), dtype=np.float32)
    # fill appropriate elements with one
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    # reshape back to original array
    one_hot = one_hot.reshape(*arr.shape, n_labels)
    return one_hot

In [18]:
# test_case

test_seq = np.array([1, 2, 3, 4, 5])
one_hot = one_hot_encode(test_seq, 10)
print(one_hot)

[[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]


In [0]:
def get_batches(arr, batch_size, seq_length):
    batch_size_total = batch_size * seq_length
    n_batches = len(arr) // batch_size_total
    
    arr = arr[:n_batches*batch_size_total]
    arr = arr.reshape(batch_size, -1)
    
    for n in range(0, arr.shape[1], seq_length):
        x = arr[:, n:n+seq_length]
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

In [20]:
# test_case

batches = get_batches(encode_text, 8, 50)
x, y = next(batches)

print("x = \n", x[:10, :10], "\n")
print("y = \n", y[:10, :10])

x = 
 [[ 2 26 14 25 21 69 77  6 46  3]
 [39 49 54  6 21 26 14 21  6 14]
 [69 54 81  6 49 77  6 14  6 72]
 [39  6 21 26 69  6 24 26  7 69]
 [ 6 39 14 28  6 26 69 77  6 21]
 [24 47 39 39  7 49 54  6 14 54]
 [ 6 27 54 54 14  6 26 14 81  6]
 [73 33 53 49 54 39 38 18 62  6]] 

y = 
 [[26 14 25 21 69 77  6 46  3  3]
 [49 54  6 21 26 14 21  6 14 21]
 [54 81  6 49 77  6 14  6 72 49]
 [ 6 21 26 69  6 24 26  7 69 72]
 [39 14 28  6 26 69 77  6 21 69]
 [47 39 39  7 49 54  6 14 54 81]
 [27 54 54 14  6 26 14 81  6 39]
 [33 53 49 54 39 38 18 62  6  8]]


### Training the Model

In [21]:
use_gpu = torch.cuda.is_available()
if use_gpu:
    print("Training on GPU.")
else:
    print("Using CPU.")

Training on GPU.


In [0]:
class CharRNN(nn.Module):
    def __init__(self, tokens, n_hidden=256, n_layers=2, drop_prob=0.5, lr=0.001):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr
        # creating character dictionaries
        self.chars = tokens
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch: count for count, ch in self.int2char.items()}
        # define LSTM
        self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers, dropout=drop_prob, batch_first=True)
        # define dropout layer
        self.dropout = nn.Dropout(drop_prob)
        # define final, fully-connected output layer
        self.fc = nn.Linear(n_hidden, len(self.chars))
    
    def forward(self, x, hidden):
        r_out, hidden = self.lstm(x, hidden)
        out = self.dropout(r_out)
        # stack up LSTM outputs
        out = out.contiguous().view(-1, self.n_hidden)
        out = self.fc(out)
        return out, hidden
    
    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        if use_gpu:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
        return hidden

In [0]:
def train_network(net, data, epochs=10, batch_size=10, seq_length=50, lr=0.001, clip=5, val_frac=0.1, print_every=10):
    net.train()
    optimizer = optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    
    # create training and validation data
    val_idx = int(len(data) * (1 - val_frac))
    data, val_data = data[:val_idx], data[val_idx:]
    
    # train on gpu if available
    if use_gpu:
        net.cuda()
    
    counter = 0
    n_chars = len(net.chars)
    for e in range(epochs):
        hidden_state = net.init_hidden(batch_size)
        
        for x, y in get_batches(data, batch_size, seq_length):
            counter += 1
            
            x = one_hot_encode(x, n_chars)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            if use_gpu:
                inputs, targets = inputs.cuda(), targets.cuda()
            
            # new copy of hidden_state
            hidden_state = tuple([each.data for each in hidden_state])
            
            net.zero_grad()
            
            output, hidden_state = net(inputs, hidden_state)
            
            # calculate loss and perform backpropagation
            loss = criterion(output, targets.view(batch_size * seq_length).long())
            loss.backward()
            # using clip_grad_norm to avoid exploding gradient problem
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            optimizer.step()
            
            # stats
            if counter % print_every == 0:
                val_hidden = net.init_hidden(batch_size)
                val_losses = []
                
                net.eval()
                for x, y in get_batches(val_data, batch_size, seq_length):
                    x = one_hot_encode(x, n_chars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)
                    
                    val_hidden = tuple([each.data for each in val_hidden])
                    
                    inputs, targets = x, y
                    if use_gpu:
                        inputs, targets = inputs.cuda(), targets.cuda()
                    
                    output, val_hidden = net(inputs, val_hidden)
                    val_loss = criterion(output, targets.view(batch_size * seq_length).long())
                    val_losses.append(val_loss.item())
                
                net.train()
                
                print("epoch : {} / {}, ".format(e+1, epochs),
                      "step : {}, ".format(counter),
                      "loss : {0:.3f}, ".format(loss.item()), 
                      "validation_loss : {0:.3f}".format(np.mean(val_losses)))

In [24]:
n_hidden = 512
n_layers = 2

net = CharRNN(chars, n_hidden, n_layers)
net

CharRNN(
  (lstm): LSTM(83, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=83, bias=True)
)

In [25]:
batch_size = 128
seq_length = 100
n_epochs = 20

train_network(net, encode_text, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=100)

epoch : 1 / 20,  step : 100,  loss : 3.053,  validation_loss : 3.033
epoch : 2 / 20,  step : 200,  loss : 2.392,  validation_loss : 2.379
epoch : 3 / 20,  step : 300,  loss : 2.144,  validation_loss : 2.106
epoch : 3 / 20,  step : 400,  loss : 1.930,  validation_loss : 1.930
epoch : 4 / 20,  step : 500,  loss : 1.859,  validation_loss : 1.806
epoch : 5 / 20,  step : 600,  loss : 1.726,  validation_loss : 1.713
epoch : 6 / 20,  step : 700,  loss : 1.659,  validation_loss : 1.645
epoch : 6 / 20,  step : 800,  loss : 1.609,  validation_loss : 1.587
epoch : 7 / 20,  step : 900,  loss : 1.542,  validation_loss : 1.539
epoch : 8 / 20,  step : 1000,  loss : 1.514,  validation_loss : 1.504
epoch : 8 / 20,  step : 1100,  loss : 1.452,  validation_loss : 1.474
epoch : 9 / 20,  step : 1200,  loss : 1.428,  validation_loss : 1.448
epoch : 10 / 20,  step : 1300,  loss : 1.416,  validation_loss : 1.426
epoch : 11 / 20,  step : 1400,  loss : 1.421,  validation_loss : 1.404
epoch : 11 / 20,  step : 15

### Checkpoint

In [0]:
model_name = "rnn_20_epoch.net"

check_point = {
    "n_hidden": net.n_hidden,
    "n_layers": net.n_layers,
    "state_dict": net.state_dict(),
    "tokens": net.chars
}

with open(model_name, "wb") as f:
  torch.save(check_point, f)

### Top K Sampling

In [0]:
def predict(net, char, h=None, top_k=None):
  """returns: predicted char and hidden_state"""
  # tensor inputs
  x = np.array([[net.char2int[char]]])
  x = one_hot_encode(x, len(net.chars))
  inputs = torch.from_numpy(x)
  # check for gpu
  if use_gpu:
      inputs = inputs.cuda()
  # detach hidden_state from history
  h = tuple([each.data for each in h])
  out, h = net(inputs, h)
  # get character probabilities
  p = F.softmax(out, dim=1).data
  if use_gpu:
      p = p.cpu()
  # get top characters
  if top_k is None:
      top_ch = np.arange(len(net.chars))
  else:
      p, top_ch = p.topk(top_k)
      top_ch = top_ch.numpy().squeeze()
  # select the next char
  p = p.numpy().squeeze()
  char = np.random.choice(top_ch, p=p/p.sum())
  # return encoded value of predicted char and hidden_state
  return net.int2char[char], h

### Priming and Generating Text

In [0]:
def sample(net, size, prime="The", top_k=None):
  if use_gpu:
      net.cuda()
  # eval mode
  net.eval()
  chars = [ch for ch in prime]
  h = net.init_hidden(1)
  for ch in prime:
      char, h = predict(net, ch, h, top_k=top_k)
  chars.append(char)
  # rotate chars
  for _ in range(size):
      char, h = predict(net, chars[-1], h, top_k=top_k)
      chars.append(char)
  # return string
  return "".join(chars)

In [38]:
sample(net, 500, prime="Anna", top_k=5)

'Anna. He went, trying to\nsay that they would have an offer was in\nlove. He\'s always\ndisagreeable to\nstand,\nand what to brought the\nremoments of another strange, streaghed and with a smile. "And I can do this instance and tears to make us the correct on their people\'s fine, and he has not\ntime to be the mad thim though\nhe\'s a conversation."\n\n"Well, how\'s something as the face with the most mistake."\n\n"Well, I have nothing to say. In a man\'s commind in the same time all the meadow, that is, in\nthat, as'

### Loading CheckPoint

In [39]:
with open("/content/rnn_20_epoch.net", "rb") as f:
    check_point = torch.load(f)

load_pt = CharRNN(check_point["tokens"], n_hidden=check_point["n_hidden"], n_layers=check_point["n_layers"])
load_pt.load_state_dict(check_point["state_dict"])

<All keys matched successfully>

In [40]:
sample(load_pt, 1000, top_k=5, prime="And he said")

'And he said sometimes, and his composide she thought of the same sort of all that had sumperenter with her that what was attacking her husband. He was a strange sang to\ntheir sensition. Alexey Alexandrovitch was sitting to her as he saw the men shrown in\nthe side of the reaching things\nof the same whele on the conversation. "And it was that she has\nbeen and with myself. He\'s that I shall be, to make us, and to\nbe disputed in that pancion to hurt\nhis face; there is it to me, and they were suffering all out\nof the\nparticular man from their presence. Why are so such meaning of the people, and were says to\nthe chorce of his mile when he\'s not always\nbeen so simple to the country."\n\n"If I\'ll say, they were timilly. His bed was so daughter."\n\n"Oh; no," answered Stepan Arkadyevitch, and her\nfamiliar, shrowd, and her face went. "Yes, tell you all about the meeting of his own step," his brother\nhad been say that in the same stay of his head, the chief short states of the mar