# Generating Haikus with a Deep Learning Model

In [None]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.preprocessing.sequence import pad_sequences
import numpy as np
import random, sys

'''
    Example script to generate haiku Text.
    It is recommended to run this script on GPU, as recurrent
    networks are quite computationally intensive.
    If you try this script on new data, make sure your corpus 
    has at least ~100k characters. ~1M is better.
'''

path = "haiku_all.txt"
text = open(path).read().lower()
print('corpus length:', len(text))

chars = set(text)
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 100
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i : i + maxlen])
    next_chars.append(text[i + maxlen])

print('nb sequences:', len(sentences))

print('Vectorization...')
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

# print "X.shape: %s, Y.shape: %s" % (X.shape, y.shape)

# build the model: 2 stacked LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(len(chars), 512, return_sequences=False))
model.add(Dropout(0.2))
## Remove above 2 lines and replace by below 2 lines to make 2 layers LSTM.
#model.add(LSTM(len(chars), 512, return_sequences=True))
#model.add(Dropout(0.2))
#model.add(LSTM(512, 512, return_sequences=False))
#model.add(Dropout(0.2))
model.add(Dense(512, len(chars)))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop')


# helper function to sample an index from a probability array
def sample(a, temperature=1.0):
    a = np.log(a)/temperature
    a = np.exp(a)/np.sum(np.exp(a))
    return np.argmax(np.random.multinomial(1,a,1))

# train the model, output generated text after each iteration

def generate_from_model(model, begin_sent=None, diversity_l=[0.2, 0.5, 1.0, 1.2]):
    if begin_sent is None:
        start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in diversity_l:
#         print 
#         print '----- diversity:', diversity

        generated = ''
        if begin_sent is None:
            sentence = text[start_index : start_index + maxlen]
        else:
            sentence = begin_sent
        generated += sentence
#         print '----- Generating with seed: "' + sentence + '"'
        sys.stdout.write(generated)
    
        tot_lines = 0
        tot_chars = 0

        while True:
            if tot_lines > 3 or tot_chars > 120:
                break
            x = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x[0, t, char_indices[char]] = 1.

            preds = model.predict(x, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            
            tot_chars += 1
            generated += next_char
            if next_char == '\t':
                tot_lines += 1
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()