In [None]:
import numpy as np
import pandas as pd
import keras
from keras import layers
from keras import models

import os
print(os.listdir("../input"))

In [None]:
import glob

book_filenames = sorted(glob.glob("../input/*.txt"))

print("Found {} books".format(len(book_filenames)))

In [None]:
import codecs

corpus_raw = u""
for filename in book_filenames:
    with codecs.open(filename, encoding="utf8", errors='ignore') as book_file:
        corpus_raw += book_file.read()

print("Corpus is {} characters long".format(len(corpus_raw)))

In [None]:
corpus_raw

In [None]:
# Length of extracted character sequences
maxlen = 160

# We sample a new sequence every `step` characters
step = 9

# This holds our extracted sequences
sentences = []

# This holds the targets (the follow-up characters)
next_chars = []

for i in range(0, len(corpus_raw) - maxlen, step):
    sentences.append(corpus_raw[i: i + maxlen])
    next_chars.append(corpus_raw[i + maxlen])
print('Number of sequences:', len(sentences))

# List of unique characters in the corpus
chars = sorted(list(set(corpus_raw)))
print('Unique characters:', len(chars))
# Dictionary mapping unique characters to their index in `chars`
char_indices = dict((char, chars.index(char)) for char in chars)

# Next, one-hot encode the characters into binary arrays.
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

In [None]:
model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

In [None]:
optimizer = keras.optimizers.adam(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [None]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
import random
import sys

for epoch in range(1, 60):
    print('epoch', epoch)
    # Fit the model for 1 epoch on the available training data
    model.fit(x, y,
              batch_size=512,
              epochs=1)

    # Select a text seed at random
    start_index = random.randint(0, len(corpus_raw) - maxlen - 1)
    generated_text = corpus_raw[start_index: start_index + maxlen]
    print('--- Generating with seed: "' + generated_text + '"')

    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ temperature:', temperature)
        sys.stdout.write(generated_text)

        # We generate 400 characters
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.

            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]

            generated_text += next_char
            generated_text = generated_text[1:]

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()