In [188]:
'''This notebook trains a RNN to generate Shakespearean Sonnets and
requires a text file containing all Shakespearean Sonnets'''
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM, Dropout
from keras.utils import np_utils

# load the raw sonnets, splicing on new lines
data = np.loadtxt('/data/shakespeare.txt', delimiter='\n', dtype=str)

In [189]:
# clean the raw sonnets to create training string
raw_X = ""
for line in data:
  line = line.strip()
  if not line.isdigit() and len(line) >= 1:
    for symbol in [',', '.', ';', '?', '!', ':', '(', ')']:
      line = line.replace(symbol, '').lower()
    raw_X += '>' + line + '\n'

In [190]:
# create alphabet of unique characters used in sonnets
alphabet = list(set(raw_X))

# create symbol to index and index to symbol dictionaries from alphabet
symbol_to_idx = {}
idx_to_symbol = {}
for i in range(len(alphabet)):
  symbol_to_idx[alphabet[i]] = i
  idx_to_symbol[i] = alphabet[i]

In [191]:
# build training sequences of length 40 (X) and corresponding next character labels (Y)
preX = []
preY = []
for i in range(0, len(raw_X) - 40, 4):
  preX.append([symbol_to_idx[char] for char in raw_X[i : i + 40]])
  preY.append(symbol_to_idx[raw_X[i + 40]])
X = np.reshape(preX, (len(preX), 40, 1)) / len(alphabet)
Y = np_utils.to_categorical(preY)

In [212]:
# initialize 200 unit LSTM model and fit to X and Y
model = Sequential()
model.add(LSTM(200, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(LSTM(200, return_sequences=False))
model.add(Dense(Y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
model.fit(X, Y, batch_size=64, epochs=60)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


<keras.callbacks.callbacks.History at 0x63dfc3c88>

In [265]:
# set initial seed for generation as "shall i compare thee to a summer's day \n"
cur_str = []
for char in "shall i compare thee to a summer's day \n":
    cur_str.append(symbol_to_idx[char])
    
# poem string to store poem lines as they are generated
poem = ""
for char in cur_str:
    poem += idx_to_symbol[char]

temperature = 0.3
# generate enough characters (1500) to guarantee that at least 14 lines of text are generated
for i in range(1500):
    # normalize current character sequence to make next character prediction
    extract = np.reshape(cur_str, (1, len(cur_str), 1)) / len(alphabet)
    # implements temperature parameter on sampled predicions
    pred = model.predict(extract)
    pred = np.array(pred)
    pred = np.log(pred) / temperature
    pred_exp = np.exp(pred)
    pred = pred_exp / np.sum(pred_exp)
    # adjust p-values to ensure their sum is less than or equal to 1.0
    adj = 0.00000001 / len(pred)
    for i in range(len(pred)):
        pred[i] -= adj
    # make prediction based on multinomial distribution
    out_idx = np.argmax(np.random.multinomial(1, np.reshape(pred, pred.size), 1))
    # add predicted character's index to the current character sequence
    cur_str.append(out_idx)
    # add corresponding character to poem
    out = idx_to_symbol[out_idx]
    poem += out
    # take off the first character of the current character sequence to keep its length at 40
    cur_str = cur_str[1:]
    
# poem generation
poem_lines = poem.split(sep='\n')
final_poem = []
for i in range(len(poem_lines)):
    if i == 0 and len(poem_lines[i]) > 1:
        final_poem.append(poem_lines[i].capitalize()[0:len(poem_lines[i]) - 1] + '?')
    elif len(final_poem) < 13 and len(poem_lines[i]) > 1:
        if '>' in poem_lines[i]:
            final_poem.append(poem_lines[i][1:].capitalize().replace('>', '') + np.random.choice(['', ',', '?', ';', '!', '.'], p=[0.1, 0.5, 0.1, 0.05, 0.05, 0.2]))
        else:
            final_poem.append(poem_lines[i][0:].capitalize().replace('>', '') + np.random.choice(['', ',', '?', ';', '!', '.'], p=[0.1, 0.5, 0.1, 0.05, 0.05, 0.2]))
    elif len(final_poem) < 14 and len(poem_lines[i]) > 1:
        final_poem.append(poem_lines[i][1:].capitalize().replace('>', '') + '.')
for line in final_poem:
    print(line)

Shall i compare thee to a summer's day?
Ooe aeauyy,
Th t le d yhat hate the  ie messte,
Fhen ioo and 've uet dutst tou seat'.
For io need au wletcu sor my love ioared
Bnt at that thou iive that t vanes brill ant feerth halee,
And soaeven tour mq aevutes stand bpan!
Torng myes whete the bum eress shouea!
Though iour self in me thys brr with live,
That shall ious sn eered vemwouee,
The fonloundee with of siing mnd aloueeeed,
Bnt thou art thy self thy gou wererasg boeekint oel;
Tt wings to the mie ofe wind more that hie woell his sweet,
Ihe soayngd san ncapl mlf the sum.
