In [1]:
%%capture
# Run this!
from keras.models import load_model
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM
from keras.callbacks import LambdaCallback, ModelCheckpoint
import numpy as np
import random, sys, io, string

In [3]:
###
# REPLACE THE <addFileName> BELOW WITH The Time Machine
###
text = io.open('Data/The Time Machine.txt', encoding = 'UTF-8').read()
###
# Let's have a look at some of the text
print(text[0:198])

# This cuts out punctuation and make all the characters lower case
text = text.lower().translate(str.maketrans("", "", string.punctuation))

# Character index dictionary
charset = sorted(list(set(text)))
index_from_char = dict((c, i) for i, c in enumerate(charset))
char_from_index = dict((i, c) for i, c in enumerate(charset))

print('text length: %s characters' %len(text))
print('unique characters: %s' %len(charset))

﻿The Time Traveller (for so it will be convenient to speak of him) was expounding a recondite matter to us. His pale grey eyes shone and twinkled, and his usually pale face was flushed and animated.
text length: 174201 characters
unique characters: 39


In [4]:
###
# REPLACE <sequenceLength> WITH 40 AND <step> WITH 4
###
sequence_length = 40
step = 4
###
sequences = []
target_chars = []
for i in range(0, len(text) - sequence_length, step):
    sequences.append([text[i: i + sequence_length]])
    target_chars.append(text[i + sequence_length])
print('number of training sequences:', len(sequences))

number of training sequences: 43541


In [6]:
# One-hot vectorise
X = np.zeros((len(sequences), sequence_length, len(charset)), dtype=np.bool)
y = np.zeros((len(sequences), len(charset)), dtype=np.bool)
###
# REPLACE THE <addSequences> BELOW WITH sequences
###
for n, sequence in enumerate(sequences):
###
    for m, character in enumerate(list(sequence[0])):
        X[n, m, index_from_char[character]] = 1
    y[n, index_from_char[target_chars[n]]] = 1

In [7]:
model = Sequential()
###
# REPLACE THE <addLSTM> BELOW WITH LSTM (use uppercase) AND <addLayerSize> WITH 128
###
model.add(LSTM(128, input_shape = (X.shape[1], X.shape[2])))
###
###
# REPLACE THE <addSoftmaxFunction> with 'softmax' (INCLUDING THE QUOTES)
###
model.add(Dense(y.shape[1], activation = 'softmax'))
###
model.compile(loss = 'categorical_crossentropy', optimizer = 'Adam')

In [8]:
# Run this, but do not edit.
# It helps generate the text and save the model epochs.
# Generate new text
def on_epoch_end(epoch, _):
    diversity = 0.5
    print('\n### Generating text with diversity %0.2f' %(diversity))

    start = random.randint(0, len(text) - sequence_length - 1)
    seed = text[start: start + sequence_length]
    print('### Generating with seed: "%s"' %seed[:40])

    output = seed[:40].lower().translate(str.maketrans("", "", string.punctuation))
    print(output, end = '')

    for i in range(500):
        x_pred = np.zeros((1, sequence_length, len(charset)))
        for t, char in enumerate(output):
            x_pred[0, t, index_from_char[char]] = 1.

        predictions = model.predict(x_pred, verbose=0)[0]
        exp_preds = np.exp(np.log(np.asarray(predictions).astype('float64')) / diversity)
        next_index = np.argmax(np.random.multinomial(1, exp_preds / np.sum(exp_preds), 1))
        next_char = char_from_index[next_index]

        output = output[1:] + next_char

        print(next_char, end = '')
    print()
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

# Save the model
checkpoint = ModelCheckpoint('Models/model-epoch-{epoch:02d}.hdf5', 
                             monitor = 'loss', verbose = 1, save_best_only = True, mode = 'min')

In [9]:
###
# REPLACE <addPrintCallback> WITH print_callback AND <addCheckpoint> WITH checkpoint
###
model.fit(X, y, batch_size = 128, epochs = 3, callbacks = [print_callback, checkpoint])
###

Epoch 1/3
### Generating text with diversity 0.50
### Generating with seed: "eed”  “agreed” said the editor and the r"
eed”  “agreed” said the editor and the rst here toy ss an csec tele toos thert ore cane there nrepestoo ene ons then thef in saaand sser nena sand tae toe the thaw noswaed tous te thee thed itin ther toaan toe sos ole ani shod ther shee ace feoe dod cahd tiomde the stiled sone tnon tyo gennt ane sthet ine  henose taseosleas tant oiu toe the thes one s toed ten ant rog osgetee the an spon sonetafthe the soas the bain isl the tire thele the ine thenol thesin thbee on the the sont tfen tee terss oathe  aand the d osandand aas the ies the

Epoch 00001: loss improved from inf to 2.74630, saving model to Models\model-epoch-01.hdf5
Epoch 2/3
### Generating text with diversity 0.50
### Generating with seed: "ght or ten of these exquisite creatures "
ght or ten of these exquisite creatures here the s aad the nerely in the band of the the the the the the mat wi t ind thi and the

<tensorflow.python.keras.callbacks.History at 0x279fed56df0>

In [10]:
from keras.models import load_model
print("loading model... ", end = '')

###
# REPLACE <addLoadModel> BELOW WITH load_model
###
model = load_model('Models/arthur-model-epoch-30.hdf5')
###
model.compile(loss = 'categorical_crossentropy', optimizer = 'Adam')
###

print("model loaded")

loading model... model loaded


In [11]:
###
# REPLACE <addFilePath> BELOW WITH 'Data/Arthur tales.txt' (INCLUDING THE QUOTATION MARKS)
###
text = io.open('Data/Arthur tales.txt', encoding='UTF-8').read()
###

# Cut out punctuation and make lower case
text = text.lower().translate(str.maketrans("", "", string.punctuation))

# Character index dictionary
charset = sorted(list(set(text)))
index_from_char = dict((c, i) for i, c in enumerate(charset))
char_from_index = dict((i, c) for i, c in enumerate(charset))

print('text length: %s characters' %len(text))
print('unique characters: %s' %len(charset))

text length: 3645951 characters
unique characters: 43


In [12]:
# Generate text
diversity = 0.5
print('\n### Generating text with diversity %0.2f' %(diversity))
###
# REPLACE <sequenceLength> BELOW WITH 50
###
sequence_length = 50
###
# Next we'll make a starting point for our text generator
###
# REPLACE <writeSentence> WITH A SENTENCE OF AT LEAST 50 CHARACTERS
###
seed = "A SENTENCE OF AT LEAST 50 CHARACTERS"
###
seed = seed.lower().translate(str.maketrans("", "", string.punctuation))
###
# OR, ALTERNATIVELY, UNCOMMENT THE FOLLOWING TWO LINES AND GRAB A RANDOM STRING FROM THE TEXT FILE
###
#start = random.randint(0, len(text) - sequence_length - 1)
#seed = text[start: start + sequence_length]
###
print('### Generating with seed: "%s"' %seed[:40])

output = seed[:sequence_length].lower().translate(str.maketrans("", "", string.punctuation))
print(output, end = '')
###
# REPLACE THE <numCharsToGenerate> BELOW WITH THE NUMBER OF CHARACTERS WE WISH TO GENERATE, e.g. 1500
###
for i in range(1000):
###
    x_pred = np.zeros((1, sequence_length, len(charset)))
    for t, char in enumerate(output):
        x_pred[0, t, index_from_char[char]] = 1.

    predictions = model.predict(x_pred, verbose=0)[0]
    exp_preds = np.exp(np.log(np.asarray(predictions).astype('float64')) / diversity)
    next_index = np.argmax(np.random.multinomial(1, exp_preds / np.sum(exp_preds), 1))
    next_char = char_from_index[next_index]

    output = output[1:] + next_char

    print(next_char, end = '')
print()


### Generating text with diversity 0.50
### Generating with seed: "a sentence of at least 50 characters"
a sentence of at least 50 charactersrswawwwgwwwwwwwrrwt  rwwwrw    ww  wwwwawdw    ww v w w  wwdvw      wwlwn dw   waa  aw   www  w ww wwvvwwwrs ntwrrv rwawww    wwd  wvww   wwwdw  wwd   vwwww    wdlww   dw wrwwrw  w wwdw    wdr    wwaww  ww  lwwwwwwe a     wamw w   www  www  wwwww ww w wwwww     w wwd  d www  ww w  ww ww  ww w r wrw w     wwwdl www ww  wwaow    wwrdwwwwuwaww uwww ww    www wwgw  wwwwr wwrw w w      wwwwwdw  ww aw  aw    ww   wdr    wwoewdw www  wwaww    w  wwwwwdw   udwwwynwa w     wwarr w r w  www w wwww    www www w w aw    w r    ww wwwrw d wwww     w wwwwwww  wwwnaew wwww      w wwnwdw wd  w wwwwwwwsrrw     warw  ww   wwwwnwd  wwwwwn wwd  w wwwwwwwwirw wn     rwwaww      ywwwwdw    ww wwwwwwww w w    wwrdwdw    w  a  rr     ww awwdn r r   ww  wwwaww  wwww r w wdrvw    ww a   wwwd  wwwdw   ww      ww wawww  wwr wwd    wwwww ww ww wwr   wiww    ww wwww wwwn  www