In [1]:
from __future__ import print_function
from keras.callbacks import LambdaCallback, ModelCheckpoint, EarlyStopping
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

Using TensorFlow backend.


In [0]:
# This should be the only variable to change per corpus
NAME = 'shakespeare'

In [4]:
from google.colab import drive
drive.mount('/content/drive/')

PATH = '/content/drive/My Drive/LA Hacks/' 
DICT_PATH = PATH + 'Dictionaries/' + 'char_index_' + NAME + '.p'
MODEL_PATH = PATH + 'Model Weights/' + NAME + '.h5'

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [0]:
# Load corpus
path = PATH + 'shakespeare_corpus.txt'
with open(path, encoding='utf-8') as f:
    text = f.read().lower()

In [7]:
chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

total chars: 58


In [0]:
import pickle

with open(DICT_PATH, 'wb') as f:
  pickle.dump(char_indices, f)

In [9]:
print(chars)

['\n', ' ', '!', '"', '&', "'", '(', ')', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '>', '?', '[', ']', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '|', '}']


In [10]:
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 1815710


In [11]:
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Vectorization...


In [12]:
# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars), activation='softmax'))

Build model...
Instructions for updating:
Colocations handled automatically by placer.


In [0]:
optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [0]:
# Load model
model.load_weights(MODEL_PATH)

In [0]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [0]:
def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

checkpoint = ModelCheckpoint(MODEL_PATH, monitor='loss', verbose=0, save_best_only=True, save_weights_only=True)

early_stopping = EarlyStopping(monitor='loss', patience=3)

generate_text = LambdaCallback(on_epoch_end=on_epoch_end)


In [0]:
model.fit(x, y,
          batch_size=128,
          epochs=60,
          callbacks=[generate_text, checkpoint, early_stopping])

Instructions for updating:
Use tf.cast instead.
Epoch 1/60

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: ". thisby, the flowers of odious savours "
. thisby, the flowers of odious savours are the stand her the man of the love it is the bear
    and the complete within
                                                                                                                                                                                                                                                                                                                               
----- diversity: 0.5
----- Generating with seed: ". thisby, the flowers of odious savours "
. thisby, the flowers of odious savours to see him and the spirfers;
    when the world the songery the love the torner and the so letters,
    but is the father- the love the breat the death
    waster sir, and more of the commons to see the claud,
    and the priture with me 

  after removing the cwd from sys.path.


hereass

  antony. so i have fill of the confess a heard, my lord.
  deliares. the the friend shall i praise the stand.
    the pate and the shalt distributerorer in the peace.
    but i do be death,
----- diversity: 1.0
----- Generating with seed: "siness in some other fight,
    as cause"
siness in some other fight,
    as cause by his falsitment grancion of hell.
  liunt. the sell the come in lided. does. and hath thee  
    i some soul that detage, and the more boys;
    that's changly agoas ferch, therneag, come
    the houseole timout, had you the day; his foed.
  britly. let it sel shall shall, get her suffole.
  was letter. well, shepherd, buble, that,
  that sluthing thy stake not priefest's cludghy; and persut, w
----- diversity: 1.2
----- Generating with seed: "siness in some other fight,
    as cause"
siness in some other fight,
    as cause you.'
  kinghom.                                  exeunt.
sirtan.] hownshing,
    sincted keeg yours let?
  buckingham. w'd. how  vel 

In [0]:
!ls

drive  gdrive  nietzsche.h5  sample_data
