In [1]:
#Trains a neural network using RNN cells on shakespeare 
#Training example found here:
#https://cs.stanford.edu/people/karpathy/char-rnn/shakespear.txt

In [5]:
#Scrape the website for data
import requests
from bs4 import BeautifulSoup
from lxml import html

In [3]:
link = '''https://cs.stanford.edu/people/karpathy/char-rnn/shakespear.txt'''

In [7]:
html = requests.get(link).text
soup = BeautifulSoup(html,'html.parser')

In [14]:
text = soup.find_all(text = True)[0]
print(text[0:50])
print(text[-50:])

That, poor contempt, or claim'd thou slept so fait
HAM:
You guess: I'll take my bloody back.

BRUTUS



In [28]:
##################
#Begin making the neural network
import sys,random,math
from collections import Counter
import numpy as np
import sys
%run Tensor_Framework_with_Numpy.ipynb

In [29]:
np.random.seed(0)
vocab = list(set(text)) #Get every unique word in Shakespeare
word2index = { } #Get indices for each word
for i, word in enumerate(vocab):
    word2index[word] = i #Build indices
indices = np.array(list(map(lambda x:word2index[x],text))) #Get the indices for each word

In [30]:
print(indices[0:5])
print(word2index)
print(vocab[0:5])

[60 55 16 20  8]
{'d': 0, ';': 1, 'E': 2, '.': 3, "'": 4, 'w': 5, 'H': 6, 'C': 7, ',': 8, 'n': 9, 'O': 10, 'q': 11, 'x': 12, 's': 13, '\n': 14, ':': 15, 'a': 16, 'j': 17, 'e': 18, 'I': 19, 't': 20, 'b': 21, 'u': 22, 'o': 23, 'k': 24, 'P': 25, 'p': 26, 'Q': 27, 'Y': 28, 'X': 29, 'D': 30, 'B': 31, 'y': 32, 'f': 33, 'M': 34, '!': 35, 'N': 36, 'i': 37, 'r': 38, 'm': 39, 'J': 40, 'l': 41, '-': 42, 'Z': 43, '?': 44, 'c': 45, 'F': 46, 'A': 47, 'U': 48, 'z': 49, 'g': 50, ' ': 51, 'G': 52, 'R': 53, 'W': 54, 'h': 55, 'K': 56, 'v': 57, 'V': 58, 'L': 59, 'T': 60, 'S': 61}
['d', ';', 'E', '.', "'"]


In [32]:
embed = Embedding(vocab_size = len(vocab), dim = 512)
model = RNNCell(n_inputs = 512, n_hidden = 512, n_output = len(vocab))
criterion = CrossEntropyLoss()
optim = SGD(parameters = model.get_parameters() + embed.get_parameters(),
           alpha = 0.5)

In [33]:
batch_size = 32 #Train 32 records at a time
bptt = 16 #Specify backpropagation to stop 16 steps into the past
n_batches = int((indices.shape[0] / batch_size))

In [34]:
#Because backprop is truncated, we need to subset datasets of size bptt
trimmed_indices = indices[:n_batches * batch_size]
batched_indices = trimmed_indices.reshape(batch_size, n_batches).transpose()

input_batched_indices = batched_indices[0:-1] #Subset the data to create predicted and target data
target_batched_indices = batched_indices[1:]

n_bptt = int(((n_batches - 1) / bptt)) #Number of backprops to perform
input_batches = input_batched_indices[:n_bptt * bptt] #Of input indices, create batches
input_batches = input_batches.reshape(n_bptt,bptt,batch_size) #Reshape for input
target_batches = target_batched_indices[:n_bptt * bptt] #Create final target batches
target_batches = target_batches.reshape(n_bptt,bptt,batch_size) #Reshape the same size as input batches

In [61]:
#Aside for readability
print(trimmed_indices.shape)
print(batched_indices.shape)
print(input_batched_indices.shape)
print(target_batched_indices.shape)
print(input_batches.shape)
print(target_batches.shape)
print(n_bptt)
print()
print(text[0:5])
print(indices[0:5])
print(batched_indices[0:5,0]) 
print()
print(input_batches[0][0:5,0])
print(target_batches[0][0:5,0])#Note the target batch is the input batch offset by 1

(99968,)
(3124, 32)
(3123, 32)
(3123, 32)
(195, 16, 32)
(195, 16, 32)
195

That,
[60 55 16 20  8]
[60 55 16 20  8]

[60 55 16 20  8]
[55 16 20  8 51]


In [79]:
def generate_sample(output = None,n = 30, init_char = ' '):
    s = ''
    hidden = model.init_hidden(batch_size = 1)
    input = Tensor(np.array([word2index[init_char]]))
    for i in range(n):
        rnn_input = embed.forward(input)
        output.hidden = model.forward(input = rnn_input, hidden = hidden)
        output.data *= 10
        temp_dest = output.cross_entropy(target_batched_indices)
        temp_dist /= temp_dist.sum()
        
        m = (temp_dist > np.random.rand()).argmax()
        c = vocab[m]
        input = Tensor(np.array([m]))
        s += c
    return s


In [89]:
#Network training function
def train(iterations = 100):
    for iter in range(iterations):
        total_loss = 0 #Start loss measure
        n_loss = 0
        
        hidden = model.init_hidden(batch_size = batch_size)
        for batch_i in range(len(input_batches)):
            hidden = Tensor(hidden.data, autograd = True)#Create hidden layer
            loss = None
            losses = list()
            for t in range(bptt): #Start backprop tensors
                input = Tensor(input_batches[batch_i][t],autograd = True)
                rnn_input = embed.forward(input = input) #Embedding layer for forward prop
                output, hidden = model.forward(input = rnn_input,
                                              hidden = hidden)
                target = Tensor(target_batches[batch_i][t], autograd = True) #Define target
                batch_loss = criterion.forward(output,target) #Measure loss
                losses.append(batch_loss)
                if(t == 0):
                    loss = batch_loss #Define first loss, else increase the loss
                else:
                    loss += batch_loss
            loss.backward(Tensor(np.ones_like(loss.data)))
            optim.step()
            total_loss += loss.data
        optim.alpha *= 0.99
                    

In [None]:
train()

In [None]:
print(generate_sample(n=2000, init_char='\n'))