In [None]:
from __future__ import print_function
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file

from functools import partial
from bayes_opt import BayesianOptimization

import numpy as np
import random
import sys
import os
import warnings
warnings.filterwarnings('ignore')

## Reading dataset

In [2]:
text = open(os.path.join(os.getcwd(), 'data/shakespeare.txt')).read().lower().split('\n')
poem_list = []
raw_text = ''
for j in range(len(text) + 1):
    if j == len(text):
        poem_list.append(raw_text)
    elif text[j] == '':
        if raw_text != '':
            poem_list.append(raw_text)
        raw_text = ''
        continue
    elif text[j][-1].isdigit():
        continue
    else:
        subsentence = text[j] + '\n'
        raw_text += subsentence  
print('Number of poems:', len(poem_list))

Number of poems: 154


## Create Sequences

In [3]:
# organize into sequences of characters
def create_sequence(raw_text, length, step):
    sequences = []
    next_chars = []
    for i in range(0, len(raw_text) - length, step):
        # select sequence of tokens
        seq = raw_text[i:i + length]
        # store
        sequences.append(seq)
        next_chars.append(raw_text[i + length])
    return sequences, next_chars

In [4]:
length = 40
step = 1
sequences = []
next_chars = []
for poem in poem_list:
    sub_sequences, sub_next_chars = create_sequence(poem, length, step)
    sequences += sub_sequences
    next_chars += sub_next_chars
print('Total Sequences: %d' % len(sequences))

Total Sequences: 88130


## Character Mappings and Inverse Mappings

In [5]:
poem_string = "".join(poem_list)
chars = sorted(list(set(poem_string)))
char_index_map = dict((c, i) for i, c in enumerate(chars))
index_char_map = dict((i, c) for i, c in enumerate(chars))
vocab_size = len(char_index_map)
print('Vocabulary Size: %d' % vocab_size)

Vocabulary Size: 38


## Vectorization

In [6]:
X = np.zeros((len(sequences), length, len(chars)), dtype=np.bool)
y = np.zeros((len(sequences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sequences):
    for t, char in enumerate(sentence):
        X[i, t, char_index_map[char]] = 1
    y[i, char_index_map[next_chars[i]]] = 1    

## Build the RNN Training Model

In [7]:
import warnings
warnings.filterwarnings('ignore')
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(length, len(chars))))
model.add(Dense(len(chars))) 
model.add(Activation('softmax'))

model.summary()   

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

Build model...



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               85504     
_________________________________________________________________
dense_1 (Dense)              (None, 38)                4902      
_________________________________________________________________
activation_1 (Activation)    (None, 38)                0         
Total params: 90,406
Trainable params: 90,406
Non-trainable params: 0
_________________________________________________________________




## Sampling function (combining softmax with temperature)

In [8]:
def sample(preds, temperature):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

## Training the model with outputs

In [None]:
import warnings
warnings.filterwarnings('ignore')
for iteration in range(10):
    model.fit(X, y, batch_size=128, nb_epoch=10)

    for temperature in [1.5, 0.75, 0.25]:
        print()
        print('----- temperature parameter:', temperature)

        generated = ''
        sentence = "shall i compare thee to a summer's day?\n"
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        line_count = 0
        while (line_count <= 12):
            x = np.zeros((1, length, len(chars)))
            for t, char in enumerate(sentence):
                x[0, t, char_index_map[char]] = 1.

            preds = model.predict(x, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = index_char_map[next_index]
            
            if next_char == '\n':
                line_count += 1

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Epoch 1/10





Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

----- temperature parameter: 1.5
----- Generating with seed: "shall i compare thee to a summer's day?
"
shall i compare thee to a summer's day?
for reforgn wited slymerl of you remay,
ar eaf hell beauty's chyer recorsiou,
and wherefis, iamarded divayed ,
dow's iehaill, ks inveragnatit his oftant nugely my
  they viam expender them do bver's storn
have they glasing flies meeastore lourlest
or love ward 'a'gms, lovi seemint epent.
  chald these outknce your frommom tulls b.
thds, flower perriking hanth
incom you outblessricted erf most onbran?
ro forning me forins (ilate one eress rackd,
and she is rrcedy upons, loin wat?
leating theired's ewgreit try cupant and.


----- temperature parameter: 0.75
----- Generating with seed: "shall i compare thee to a summer's day?
"
shall i c