In [60]:
import numpy as np
import sys
import os
import matplotlib.pyplot as plt
from nltk.tokenize import TweetTokenizer, word_tokenize, wordpunct_tokenize, sent_tokenize
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.optimizers import SGD
from keras.models import load_model

np.random.seed(21)

In [61]:
with open('shakespeare.txt', 'r') as f:
    data = f.read() #Reading in Sonnets (not setting lowercase)

In [62]:
#We run the LSTM on data where sonnets are mildly processed
#Each sonnet is stripped by line, and the indices are removed, and put back together again
sonnets = data.split('\n\n')
sonnet_lens = [len(sonnet) for sonnet in sonnets] #Splitting each sonnet up
sonnets = [sonnet.strip() for sonnet in sonnets] #Removing whitespace

lines = [sonnet.split('\n') for sonnet in sonnets] #splitting up the sonnets into lines
lines = [line[1:] for line in lines] #Removing index of poem
lines = [[line.strip() for line in sonnet] for sonnet in lines] #removing whitespace from each line

fulltext = "" 
for sonnet in lines:
    for line in sonnet: #Putting all the lines back together, with a space between each poem
        fulltext += line + "\n"
    fulltext += "\n"

In [63]:
#Formulation of data using direct dataset
seqarray = [] #array of sequences of 40-length characters from fulltext
nextchar = [] #array of chars following each 40-length sequence
seqlength = 40
step = 5
for i in range(0, len(fulltext) - seqlength, step):
    seqarray.append(fulltext[i:i + seqlength]) #adding sequence of 40 characters, every 20 characters
    nextchar.append(fulltext[i + seqlength])
    
chars = sorted(list(set(fulltext))) #Getting all unique chars in data
chars.append("’")
print("Number of unique characters:", len(chars))
print(chars)
char_indices = dict((char, chars.index(char)) for char in chars) # Dictionary mapping unique character to integer indices    

# we can now 1-hot encode each character in our dataset, based on our dictionary we made 
x = np.zeros((len(seqarray), seqlength, len(chars)), dtype=np.bool)
y = np.zeros((len(seqarray), len(chars)), dtype=np.bool)
for i, sequence in enumerate(seqarray):
    for j, char in enumerate(sequence):
        x[i, j, char_indices[char]] = 1 #encoding our X and Y, our data and target
    y[i, char_indices[nextchar[i]]] = 1

print("Size of training sequences:", x.shape)
print("Size of training targets:", y.shape)

Number of unique characters: 62
['\n', ' ', '!', "'", '(', ')', ',', '-', '.', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'R', 'S', 'T', 'U', 'V', 'W', 'Y', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '’']
Size of training sequences: (18758, 40, 62)
Size of training targets: (18758, 62)


In [64]:
#Creating our Keras Model, with and LSTM layer and a dense softmax layer
model = Sequential()
model.add(LSTM(200, input_shape=(seqlength, len(chars))))
model.add(Dense(len(chars), activation='softmax'))
model.summary()

optimizer = SGD(lr=0.01, momentum=0.9, nesterov=True)
model.compile(optimizer=optimizer, loss='categorical_crossentropy')

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 200)               210400    
_________________________________________________________________
dense_5 (Dense)              (None, 62)                12462     
Total params: 222,862
Trainable params: 222,862
Non-trainable params: 0
_________________________________________________________________


In [65]:
loss = []
for i in range(10):
    history = model.fit(x, y, batch_size=128, epochs=10)
    loss.append(history.history['loss'][0])
print(loss)

model.save('LSTMShakespeare.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
E

Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[3.376349994503037, 2.806515699511259, 2.3791744040964966, 2.194111976202572, 2.0770246880257077, 1.9934385002059765, 1.915225080086842, 1.8471019242742521, 1.7752014348623608, 1.7026198313985885]


In [66]:
def nextchar(preds, temperature=1.0):
    #We reweight the model using temperature
    #We then sample from the prediction from the model
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature #shifting each prediciton by temperature
    exp_preds = np.exp(preds) 
    preds = exp_preds / np.sum(exp_preds) #reweighting
    probas = np.random.multinomial(1, preds, 1) #chooses 1 element from reweighted probabilities 
    return np.argmax(probas)

In [72]:
def makesonnet(temp):
    #Given a temperature, generate a new sonnet.
    seed = "shall i compare thee to a summer’s day?\n"
    sonnet = ""    
    for i in range(600):
        # Vectorize generated text
        sampled = np.zeros((1, seqlength, len(chars)))
        for j, char in enumerate(seed):
            sampled[0, j, char_indices[char]] = 1.

        # Predict next character
        preds = model.predict(sampled, verbose=0)[0]
        pred_idx = nextchar(preds, temperature=temp)
        next_char = chars[pred_idx]

        # Append predicted character to new sonnet and seed text
        sonnet += next_char
        seed += next_char
        #Remove first character of our seed text
        seed = seed[1:]

    return sonnet

In [76]:
print(makesonnet(0.25))

And the wild to love the will ded my deat,
And me hou to the thing the stret sun to thee,
The I be my sure the peath the free do her,
I thing a to the to thou dost do doth the sight,
So love wo then be and the resto sull,
So art my the be of thee to the tree des,
The eye the bet be the arth the deare dee.

Whe an the panter with the wine of me dot,
And to the peate the cen the least of sell,
I mants the reching the price the store dees,
The beath the canker the well dese doth dost do free,
Whe thy sell by under the for dest the ser,
So sine in the trough doth live of ment,
And the heart the wi


In [82]:
print(makesonnet(0.75))

The anow fresen tree wrat he line ould fore,
When in thy withtr sem not ave ig on my deast

Yor I sane mo no than to carper will warts,
And right he restigh purne and were to wair thee stor,
The but ne stern weave hor he wemy not,
By fould most rost retire of hear her cenvys.
elige co sulligh, I deir muse semes.
Whele to forgpring so doth love praks of,
Sears well beausit, an the with the art,
O ale thou be buthed be bush and beturn wingreent:
What ho hame selise do horg ma th un in myne dsey I hasp,
And time the still whthe ho not of love by,
Whingen I be sand and wat ot 'es bligen,
And thou 


In [83]:
print(makesonnet(1.5))

Mink pued tive en wimur gruss as solfer,
Is anksulp)rpaps, and wij, if sestraF.

Sonciken tealy,'rlicbed my nservilG?
Ay daetTotlunose:med harz pulinheWh waithes cromeldH:;nl
chal lath tBou eay'A bionftGep, peacinetARd is ly,
Uly pie,
Arsel, my ereat eath pilsed in thee lost,
ShildavH ich ir tiughid, non igry heaPI:
Whem contion weat)noteI of brose des),
Peep foublfiegoqk werks thoutgrigk inottinvoSt:
WinnPiwe-ec ' wrre sired Wgots
Men baty arm gillce, whoulto domevfrlle bib,
Then vose grae ba art is me,
Ewaing nooly thee def'rded grofsl llowai:es'q,
S
rim geveed ewfly drightq'eul aflimgn.:
Yr
