# Notebook 4 (bonus): Shakespeare Generator

In this exercise we aim to compose text just like Shakespeare would do today. To this end, the network takes a text file as input and is trained to predict the next character in a sequence. In this case the input data (input.txt) is part of Shakespeares "The Taming of the Shrew". The network is used to generate new data that is similar to the given input data. After some training the generated text should look like a real Shakespearean artwork. A detailed description can be found here: http://karpathy.github.io/2015/05/21/rnn-effectiveness/.

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np

# Import Data

In [None]:
# Option B.
!git clone https://github.com/GreschAl/ML4Q_retreat22_ML_with_python

In [None]:
# Option B.
folder = "/content/ML4Q_retreat22_ML_with_python/exercises/4_bonus/"

In [None]:
# data I/O
data = open(folder+'input.txt', 'r').read()  # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('data has %d characters, %d unique.' % (data_size, vocab_size))
char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}

In [None]:
# define model
# hyperparameters
hidden_size = 100  # size of hidden layer of neurons
seq_length = 25  # number of steps to unroll the RNN for
learning_rate = 1e-1
optimizer_func = torch.optim.Adagrad

# model
model = nn.LSTM(vocab_size,hidden_size)
linear = nn.Linear(hidden_size,vocab_size)
print(model)
print(linear)

# optimizers
opt_model = optimizer_func(model.parameters(),lr=learning_rate)
opt_lin   = optimizer_func(linear.parameters(),lr=learning_rate)

Helper function to calculate the loss.

In [None]:
def lossFun(model, linear, inputs, targets, cprev, hprev):
    """
    inputs,targets are both list of integers.
    hprev is 1xH array of initial hidden state
    returns the loss, gradients on model parameters, and last hidden state
    """
    xs, cs, hs, caches, ys, ps = {}, {}, {}, {}, {}, {}
    hs[-1] = hprev
    cs[-1] = cprev
    loss = 0
    
    # forward pass
    for t in range(len(inputs)):
        xs[t] = torch.zeros((1,vocab_size))  # encode in 1-of-k representation
        xs[t][0,inputs[t]] = 1
        
        caches[t], (hs[t], cs[t]) = model.forward(xs[t], (hs[t-1],cs[t-1])) 
        ys[t] = linear.forward(hs[t])  # unnormalized log probabilities for next chars
        ps[t] = torch.softmax(ys[t],dim=1)  # probabilities for next chars
        loss += F.cross_entropy(ys[t],targets[t].view(-1))  # cross-entropy loss
    
    return loss

Helper function to create new samples.

In [None]:
def sample(model, linear, c, h, seed_ix, n):
    """
    sample a sequence of integers from the model
    h is memory state, seed_ix is seed letter for first time step
    """
    x = torch.zeros((1,vocab_size),dtype=torch.float)
    x[0,seed_ix] = 1
    ixes = []
    with torch.no_grad():
        for t in range(n):
            _, (h, c) = model.forward(x, (h,c)) 
            y = linear.forward(h)
            p = F.softmax(y,dim=1).numpy()
            ix = np.random.choice(range(vocab_size), p=p.ravel())
            x = torch.zeros((1,vocab_size),dtype=torch.float)
            x[0,ix] = 1
            ixes.append(ix)
    return ixes

Endless loop of training and printing samples every 1000 iterations.

In [None]:
n, p = 0, 0
smooth_loss = -np.log(1.0 / vocab_size) * seq_length  # loss at iteration 0
while True:
    # prepare inputs (we're sweeping from left to right in steps seq_length long)
    if p + seq_length + 1 >= len(data) or n == 0:
        cprev = torch.zeros((1, hidden_size))
        hprev = torch.zeros((1, hidden_size))  # reset RNN memory
        p = 0  # go from start of data
    inputs = torch.tensor([char_to_ix[ch] for ch in data[p:p + seq_length]],dtype=torch.long)
    targets = torch.tensor([char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]],dtype=torch.long)

    # sample from the model now and then
    if n % 1000 == 0:
        sample_ix = sample(model, linear, cprev, hprev, inputs[0], 200)
        txt = ''.join(ix_to_char[ix] for ix in sample_ix)
        print('----\n %s \n----' % (txt,))

    # forward seq_length characters through the net and fetch gradient
    loss = lossFun(model, linear, inputs, targets, cprev, hprev)
    # perform parameter update with optimizers
    opt_model.zero_grad()
    opt_lin.zero_grad()
    loss.backward()
    opt_model.step()
    opt_lin.step()
    
    smooth_loss = smooth_loss * 0.999 + loss * 0.001
    if n % 1000 == 0: 
        print('iter %d, loss: %f' % (n, smooth_loss))  # print progress    

    p += seq_length  # move data pointer
    n += 1  # iteration counter