In [101]:
from __future__ import print_function
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import gensim
import random
import string
import os
import sys

from keras.callbacks import LambdaCallback
from keras.layers.recurrent import LSTM
from keras.layers.embeddings import Embedding
from keras.layers import Dense, Activation
from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import keras.backend as K

## Helper function for processing data

In [102]:
# Helper function for processing each word (Capitalization and special characters)
def parse_word(word, syllable_dict):
    word = word.lower()
    if word in syllable_dict:
        return word
    else:
        special_character_list= [',', '.', '?', '!', ';', ':', '(', ')', "'"]
        for j in range(2):
            if word[-1:] in special_character_list:
                word = word[:-1]
                if word in syllable_dict:
                    return word
            if word[:1] in special_character_list:
                word = word[1:]
                if word in syllable_dict:
                    return word
        return word
    
# Helper function for reading the Syllable dictionary
def get_syllable_dict(filename):
    syllable_dict = {}
    file = open(os.path.join(os.getcwd(), filename)).read().split('\n')
    
    for line in file:
        if line.split():
            word_syllable = line.split()
            real_count = []
            end_count = []
            # Deal with the ending cases
            for i in range(1, len(word_syllable)):
                if word_syllable[i][0] == 'E':
                    end_count.append(int(word_syllable[i][1]))
                else:
                    real_count.append(int(word_syllable[i][0]))
            # Rank the syllable from the highest to the lowest        
            syllable_dict[word_syllable[0]] = [real_count[::-1], end_count[::-1]]

    return syllable_dict

# Find the syllable of an input word
def find_syllable(word, syllable_dict, remain):
        key = word.lower()
        # Extract the real and end syllable lists
        real_syllable = syllable_dict[key][0]
        end_syllable = syllable_dict[key][1]
        
        # Check if the word's real syllable satisfies our requirement
        for i in range(len(real_syllable)):
            if real_syllable[i] <= remain:
                return random.choice(real_syllable[i:])
        
        if len(end_syllable) != 0:
            for j in range(len(end_syllable)):
                if end_syllable[j] == remain:
                    return end_syllable[j]
        
        # If there's no valid syllable within the range, return 11 > 10
        return 11

## Data preprocessing

In [103]:
# Reading the file
filename = 'data/shakespeare.txt'
syllable_name = 'data/Syllable_dictionary.txt'
Shakes_poem = open(os.path.join(os.getcwd(), filename)).read().split('\n')

syllable_dict = get_syllable_dict(syllable_name)
sentences = []

# Extracting observation and syllable_dict
max_sentence_len = 0
for sentence in Shakes_poem:
    raw_list = sentence.split(' ')
    if len(raw_list) != 1:
        if raw_list[-1].isdigit():
            continue
        else:    
            word_list = []
            for word in raw_list:
                if word != '':
                    if word in syllable_dict:
                        word_list.append(word)
                    else:
                        word_list.append(parse_word(word, syllable_dict))
            sentences.append(word_list)

for sentence in sentences:
    new_length = len(sentence)
    if new_length > max_sentence_len:
        max_sentence_len = new_length
print('Maximum sentence length:', max_sentence_len)        

Maximum sentence length: 11


In [104]:
word_model = gensim.models.Word2Vec(sentences, size=100, min_count=1, window=5, iter=100)
pretrained_weights = word_model.wv.syn0
vocab_size, embedding_size = pretrained_weights.shape
print('Result embedding shape:', pretrained_weights.shape)

Result embedding shape: (3205, 100)


## Vectorization

In [105]:
def word2idx(word):
    return word_model.wv.vocab[word].index
def idx2word(idx):
    return word_model.wv.index2word[idx]

In [106]:
print('\nPreparing the data for LSTM...')
X_train = np.zeros([len(sentences), max_sentence_len], dtype=np.int32)
y_train = np.zeros([len(sentences)], dtype=np.int32)
for i, sentence in enumerate(sentences):
    for t, word in enumerate(sentence[:-1]):
        X_train[i, t] = word2idx(word)
    y_train[i] = word2idx(sentence[-1])
print('train_x shape:', X_train.shape)
print('train_y shape:', y_train.shape)


Preparing the data for LSTM...
train_x shape: (2155, 11)
train_y shape: (2155,)


## Building the LSTM model

In [107]:
import warnings
warnings.filterwarnings('ignore')
print('\nTraining LSTM...')
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_size, weights=[pretrained_weights]))
model.add(LSTM(units=embedding_size))
model.add(Dense(units=vocab_size))
model.add(Activation('softmax'))

model.summary()

optimizer = RMSprop(lr=0.01)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])


Training LSTM...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_11 (Embedding)     (None, None, 100)         320500    
_________________________________________________________________
lstm_11 (LSTM)               (None, 100)               80400     
_________________________________________________________________
dense_11 (Dense)             (None, 3205)              323705    
_________________________________________________________________
activation_11 (Activation)   (None, 3205)              0         
Total params: 724,605
Trainable params: 724,605
Non-trainable params: 0
_________________________________________________________________


## Sampling function (combining softmax with temperature)

In [108]:
def sample(preds, temperature):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

## Generating text with trained model

In [109]:
## Sample Poem
'''
From fairest creatures we desire increase,
That thereby beauty's rose might never die,
But as the riper should by time decease,
His tender heir might bear his memory:
But thou contracted to thine own bright eyes,
Feed'st thy light's flame with self-substantial fuel,
Making a famine where abundance lies,
Thy self thy foe, to thy sweet self too cruel:
Thou that art now the world's fresh ornament,
And only herald to the gaudy spring,
Within thine own bud buriest thy content,
And tender churl mak'st waste in niggarding:
  Pity the world, or else this glutton be,
  To eat the world's due, by the grave and thee.
'''

"\nFrom fairest creatures we desire increase,\nThat thereby beauty's rose might never die,\nBut as the riper should by time decease,\nHis tender heir might bear his memory:\nBut thou contracted to thine own bright eyes,\nFeed'st thy light's flame with self-substantial fuel,\nMaking a famine where abundance lies,\nThy self thy foe, to thy sweet self too cruel:\nThou that art now the world's fresh ornament,\nAnd only herald to the gaudy spring,\nWithin thine own bud buriest thy content,\nAnd tender churl mak'st waste in niggarding:\n  Pity the world, or else this glutton be,\n  To eat the world's due, by the grave and thee.\n"

In [110]:
import warnings
warnings.filterwarnings('ignore')
M_syllable = 10

model.fit(X_train, y_train, batch_size=128, nb_epoch=30)
    
for temperature in [1.5, 0.75, 0.25]:
    print()
    print('----- temperature parameter:', temperature)
        
    poem = ''
    capital_list = ['From fairest', 'That thereby', 'But as', 'His tender', 'But thou', "Feed'st thy", 'Making a',
                         'Thy self', 'Thou that', 'And only', 'Within thine', 'And tender', 'Pity the', 'To eat']
        
    for i in range(14):
        given_words = capital_list[i]
        sentence = given_words + ' '
        word_indexes = []
        syllable_remain = M_syllable
            
        given_word_list = given_words.split(' ')
        for word in given_word_list:
            word = word.lower()
            word_indexes.append(word2idx(word))
            syllable_remain -= find_syllable(word, syllable_dict, M_syllable)
            
        while syllable_remain > 1:
            prediction = model.predict(x=np.array(word_indexes), verbose=0)
            next_index = sample(prediction[-1], temperature)
            next_word = idx2word(next_index)
            next_syllable = find_syllable(word, syllable_dict, syllable_remain)
                
            if next_syllable != 11:
                syllable_remain -= next_syllable
                word_indexes.append(next_index)
            
        for j in range(len(word_indexes) - 2):
            sentence += idx2word(word_indexes[j + 2]) + ' '
    
        if i == 12 or i == 13:
            sentence = '  ' + sentence
            
        poem += sentence + '\n'
        
    print(poem)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30

----- temperature parameter: 1.5
From fairest sounds polished few 
That thereby suborned intermixed gluttoning 
But as neglected kiss eternity fond basest sin death 
His tender take graven arrest 
But thou prognosticate mad silent signs home benefit o'erworn 
Feed'st thy insufficiency compeers growth vexed gently speak centre 
Making a stretched alone sluttish sparkling stormy anchored 
Thy self quiet convert distills race soft ranged pursuing 
Thou that stained tongue cured hot astronomy forsworn ornaments 
And only lose men spend 
Within thine hung unless endeared break say homage 
And tender palate years young 
  Pity the moan colour oth