# Poetry Generator

## Import Libraries

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import random

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

## Define Parameters

In [2]:
filepath = "poetry_laur.txt"
save_file_path = 'predicted_poetry.txt'
PADDING = 'pre'
TRUNC = 'pre'

VOCAB_SIZE = 10000
EMB_DIM = 128

OPTIMIZER = 'adam'
LOSS = 'categorical_crossentropy'
METRICS = ['acc']
EPOCHS = 500
BATCH_SIZE = 128
VAL_SPLIT = 0.1

SEEDER = 'sleep my child'    ## Seeder word to start prediction of poetry
NUM_PREDICTIONS = 100

model_name = "model_laur.h5"

data = "This is some random statement \n being used as placeholder for the actual data that is to be \n imported later from a file."

## Load Data

In [3]:
with open(filepath) as f:
    data = f.read()
    f.close()
data = data.replace('\n', ' \n<>')
sentences = data.lower().split('<>')
print(len(sentences))
print(sentences[1])
print(len(sentences[1]))

1693
and you that are blooming in your prime 

41


## Tokenize Data

In [4]:
tokenizer = Tokenizer(filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t')
tokenizer.fit_on_texts(sentences)
word_index = tokenizer.word_index
total_words = len(word_index) + 1
print(total_words)
print(word_index['\n'])

2691
1


In [5]:
## create reverse_word_index
reverse_word_index = {}
for word, i in word_index.items():
    reverse_word_index[i] = word
print(reverse_word_index)



## Change Sentences to Sequences

In [6]:
sequences = tokenizer.texts_to_sequences(sentences)
new_seq = []
for row in sequences:
    for i in range(2, len(row)+1):
        new_seq.append(row[:i])
padded_seq = pad_sequences(new_seq, padding=PADDING, truncating=TRUNC)
print(padded_seq[0])

[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 52 13]


## Extract trainX and trainY from sequences

In [7]:
trainX = padded_seq[:,:-1]
trainY = padded_seq[:,-1]
trainY = tf.keras.utils.to_categorical(trainY, num_classes=total_words)

INP_LEN = trainX.shape[1]
OUT_LEN = trainY.shape[1]

print(trainX.shape)
print(trainY.shape)

(13730, 16)
(13730, 2691)


## Define Callback

In [8]:
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epochs, log={}):
        if (log.get('acc')>0.90):
            self.model.stop_training = True
            print("\n Stopped training since model reached accuracy of 90%")
callback = myCallback()

## Define Model

In [9]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(total_words, EMB_DIM, input_length=INP_LEN),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(total_words, activation='softmax')
])

Instructions for updating:
Colocations handled automatically by placer.


In [10]:
model.compile(optimizer=OPTIMIZER, loss=LOSS, metrics=METRICS)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 16, 128)           344448    
_________________________________________________________________
bidirectional (Bidirectional (None, 64)                41216     
_________________________________________________________________
dense (Dense)                (None, 2691)              174915    
Total params: 560,579
Trainable params: 560,579
Non-trainable params: 0
_________________________________________________________________


## Train Model

In [None]:
history = model.fit(trainX, trainY, validation_split=VAL_SPLIT, verbose=1, epochs = EPOCHS, batch_size=BATCH_SIZE, callbacks=[callback])

Train on 12357 samples, validate on 1373 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500


Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 80/500
Epoch 81/500
Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500

## Save Model

In [None]:
model.save(model_name)

## Plot Results

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(EPOCHS)

plt.plot(epochs, acc, 'r')
plt.plot(epochs, val_acc, 'b')
plt.xlabel('EPOCHS')
plt.ylabel('Accuracies')
plt.legend('Train Acc', 'Val Acc')
plt.figure()

plt.plot(epochs, loss, 'r')
plt.plot(epochs, val_loss, 'b')
plt.xlabel('EPOCHS')
plt.ylabel('Losses')
plt.legend('Train Loss', 'Val Loss')
plt.figure()

## Generate Poetry by iterating over the predictor from the model

In [None]:
sentence = SEEDER.lower()
corpus = ''
print (sentence, end=' ')
for i in range(NUM_PREDICTIONS):
    seq = tokenizer.texts_to_sequences([sentence])
    seq = pad_sequences(seq, padding=PADDING, truncating=TRUNC, maxlen=INP_LEN)
    new_word = model.predict([seq])[0]
    new_word = reverse_word_index[np.argmax(new_word)]
    if new_word=='\n':
        sentence = reverse_word_index[random.randint(1, total_words)]
    else:
        sentence = sentence + ' ' + new_word
    corpus = corpus + ' ' + new_word
    print (new_word, end=' ')

## Save Poetry in a file

In [None]:
try:
    with open(save_file_path, 'w') as f:
        f.write(corpus)
        f.close()
except:
    print("\nWrite was Unsuccessful")