In [102]:
#Trains a neural network using LSTM cells on shakespeare 
#Training example found here:
#https://cs.stanford.edu/people/karpathy/char-rnn/shakespear.txt

In [103]:
#Scrape the website for data
import requests
from bs4 import BeautifulSoup
from lxml import html
import sys,random,math
from collections import Counter
import numpy as np
import sys
%run Tensor_Framework_with_Numpy.ipynb
np.random.seed(0)

In [104]:
link = '''https://cs.stanford.edu/people/karpathy/char-rnn/shakespear.txt'''
html = requests.get(link).text
soup = BeautifulSoup(html,'html.parser')
text = soup.find_all(text = True)[0]

In [105]:
vocab = list(set(text))
word2index = {}
for i,word in enumerate(vocab):
    word2index[word]=i
indices = np.array(list(map(lambda x:word2index[x], text)))

In [106]:
embed = Embedding(vocab_size=len(vocab),dim=512)
model = LSTMCell(n_inputs=512, n_hidden=512, n_output=len(vocab))
model.w_ho.weight.data *= 0

In [107]:
criterion = CrossEntropyLoss()
optim = SGD(parameters=model.get_parameters() + embed.get_parameters(),
            alpha=0.05)

In [108]:
batch_size = 16
bptt = 25
n_batches = int((indices.shape[0] / (batch_size)))

In [109]:
trimmed_indices = indices[:n_batches*batch_size]
batched_indices = trimmed_indices.reshape(batch_size, n_batches)
batched_indices = batched_indices.transpose()

In [110]:
input_batched_indices = batched_indices[0:-1]
target_batched_indices = batched_indices[1:]

In [111]:
n_bptt = int(((n_batches-1) / bptt))
input_batches = input_batched_indices[:n_bptt*bptt]
input_batches = input_batches.reshape(n_bptt,bptt,batch_size)
target_batches = target_batched_indices[:n_bptt*bptt]
target_batches = target_batches.reshape(n_bptt, bptt, batch_size)
min_loss = 1000

In [112]:
def train(iterations = 100):
    for iter in range(iterations):
        total_loss, n_loss = (0, 0)
        
        hidden = model.init_hidden(batch_size=batch_size)
        batches_to_train = len(input_batches)

        for batch_i in range(batches_to_train):

            hidden = (Tensor(hidden[0].data, autograd=True),
                   Tensor(hidden[1].data, autograd=True))
            losses = list()

            for t in range(bptt):
                input = Tensor(input_batches[batch_i][t], autograd=True)
                rnn_input = embed.forward(input=input)
                output, hidden = model.forward(input=rnn_input, hidden=hidden)

                target = Tensor(target_batches[batch_i][t], autograd=True)
                batch_loss = criterion.forward(output, target)

                if(t == 0):
                    losses.append(batch_loss)
                else:
                    losses.append(batch_loss + losses[-1])
            loss = losses[-1]

            loss.backward()
            optim.step()

            total_loss += loss.data / bptt
            epoch_loss = np.exp(total_loss / (batch_i+1))
            
            log = "\r Iter:" + str(iter)
            log += " - Alpha:" + str(optim.alpha)[0:5]
            log += " - Batch "+str(batch_i+1)+"/"+str(len(input_batches))
            log += " - Loss:" + str(epoch_loss)

            sys.stdout.write(log)
        optim.alpha *= 0.99

In [113]:
def generate_sample(n=30, init_char=' '):
    s = ""
    hidden = model.init_hidden(batch_size=1)
    input = Tensor(np.array([word2index[init_char]]))
    for i in range(n):
        rnn_input = embed.forward(input)
        output, hidden = model.forward(input=rnn_input, hidden=hidden)
        output.data *= 15
        temp_dist = output.softmax()
        temp_dist /= temp_dist.sum()

        m = output.data.argmax()           
        c = vocab[m]
        input = Tensor(np.array([m]))
        s += c
    return s

In [114]:
def train(iterations=400, min_loss = 1000):
    min_loss = min_loss
    for iter in range(iterations):
        total_loss = 0
        n_loss = 0

        hidden = model.init_hidden(batch_size=batch_size)
        batches_to_train = len(input_batches)
    #     batches_to_train = 32
        for batch_i in range(batches_to_train):

            hidden = (Tensor(hidden[0].data, autograd=True), Tensor(hidden[1].data, autograd=True))

            losses = list()
            for t in range(bptt):
                input = Tensor(input_batches[batch_i][t], autograd=True)
                rnn_input = embed.forward(input=input)
                output, hidden = model.forward(input=rnn_input, hidden=hidden)

                target = Tensor(target_batches[batch_i][t], autograd=True)    
                batch_loss = criterion.forward(output, target)

                if(t == 0):
                    losses.append(batch_loss)
                else:
                    losses.append(batch_loss + losses[-1])

            loss = losses[-1]

            loss.backward()
            optim.step()
            total_loss += loss.data / bptt

            epoch_loss = np.exp(total_loss / (batch_i+1))
            
            

            log = "\r Iter:" + str(iter)
            log += " - Alpha:" + str(optim.alpha)[0:5]
            log += " - Batch "+str(batch_i+1)+"/"+str(len(input_batches))
            log += " - Min Loss:" + str(min_loss)[0:5]
            log += " - Loss:" + str(epoch_loss)
            if(batch_i == 0):
                log += " - " + generate_sample(n=70, init_char='T').replace("\n"," ")
            if(batch_i % 10 == 0):
                sys.stdout.write(log)
        optim.alpha *= 0.99

In [None]:
train(400)

 Iter:33 - Alpha:0.035 - Batch 31/249 - Min Loss:1000 - Loss:15.462806919381231- hat, ever thee seever the seever the seever the seever the seever the 

In [None]:
print(generate_sample(n=1, init_char='\n'))