# Poetry Generation using Deep NLP 

###### we would be using keras API for with tensorflow backend 

#### Importing Various Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt



#### Variables 

In [2]:
max_sequence_length=100
embedding_dim=50
latent_dim=25
max_vocab_size=3000
validation_split1=0.2
epochs1=1000
batch_size1=128



#### load in the data

In [3]:
input_text=[]
target_text=[]
for line in open('robert_frost.txt'):
    line=line.rstrip()
    if not line:
        
        
        continue
    
    input_line= '<sos> ' + line
    output_line= line + ' <eos>'
    
    input_text.append(input_line)
    target_text.append(output_line)
    
all_text=input_text+target_text
    

In [4]:
input_text

['<sos> Two roads diverged in a yellow wood,',
 '<sos> And sorry I could not travel both',
 '<sos> And be one traveler, long I stood',
 '<sos> And looked down one as far as I could',
 '<sos> To where it bent in the undergrowth;',
 '<sos> Then took the other, as just as fair,',
 '<sos> And having perhaps the better claim',
 '<sos> Because it was grassy and wanted wear,',
 '<sos> Though as for that the passing there',
 '<sos> Had worn them really about the same,',
 '<sos> And both that morning equally lay',
 '<sos> In leaves no step had trodden black.',
 '<sos> Oh, I kept the first for another day!',
 '<sos> Yet knowing how way leads on to way',
 '<sos> I doubted if I should ever come back.',
 '<sos> I shall be telling this with a sigh',
 '<sos> Somewhere ages and ages hence:',
 '<sos> Two roads diverged in a wood, and I,',
 '<sos> I took the one less traveled by,',
 '<sos> And that has made all the difference.',
 '<sos> Whose woods these are I think I know.',
 '<sos> His house is in the

In [5]:
len(input_text)

1436

In [6]:
len(target_text)

1436

#### IMPORTING KERAS TOKENIZER

In [7]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.



##### Tokenizing The sequence

In [8]:
tokenizer=Tokenizer(num_words=max_vocab_size,filters='')
tokenizer.fit_on_texts(all_text)
input_sequences=tokenizer.texts_to_sequences(input_text)
target_sequences=tokenizer.texts_to_sequences(target_text)
max_sequence_length_from_data=max(len(s) for s in input_sequences)
print("max_sequence_length_from_data",max_sequence_length_from_data)
                            

max_sequence_length_from_data 12


In [9]:
word2idx=tokenizer.word_index
print("max_vocab",len(word2idx))


max_vocab 3056


In [10]:
word2idx

{'<sos>': 1,
 '<eos>': 2,
 'the': 3,
 'to': 4,
 'and': 5,
 'i': 6,
 'a': 7,
 'of': 8,
 'in': 9,
 'you': 10,
 'it': 11,
 'he': 12,
 'that': 13,
 'was': 14,
 'for': 15,
 'but': 16,
 'as': 17,
 'on': 18,
 'with': 19,
 'what': 20,
 'all': 21,
 'they': 22,
 'had': 23,
 'one': 24,
 'me': 25,
 'his': 26,
 'be': 27,
 'if': 28,
 'have': 29,
 'we': 30,
 'not': 31,
 'is': 32,
 'from': 33,
 'she': 34,
 'up': 35,
 'out': 36,
 'like': 37,
 'see': 38,
 'or': 39,
 'where': 40,
 'at': 41,
 'no': 42,
 'when': 43,
 'my': 44,
 'her': 45,
 'so': 46,
 'by': 47,
 'some': 48,
 "it's": 49,
 'him': 50,
 "don't": 51,
 'them': 52,
 'this': 53,
 'were': 54,
 'there': 55,
 'been': 56,
 'can': 57,
 'too': 58,
 'are': 59,
 'tell': 60,
 "he's": 61,
 'do': 62,
 'old': 63,
 "'i": 64,
 'could': 65,
 'know': 66,
 'down': 67,
 'here': 68,
 'an': 69,
 'off': 70,
 'your': 71,
 'think': 72,
 'must': 73,
 'only': 74,
 'make': 75,
 'let': 76,
 'just': 77,
 'come': 78,
 'will': 79,
 'would': 80,
 'way': 81,
 'made': 82,
 'say': 

##### Padding the sequences

In [11]:
new_max_sequence_length=min(max_sequence_length,max_sequence_length_from_data)
input_sequences=pad_sequences(input_sequences,maxlen=new_max_sequence_length,padding='post')
target_sequences=pad_sequences(target_sequences,maxlen=new_max_sequence_length,padding='post')
input_sequences.shape

(1436, 12)


##### load in pre-trained word vectors


In [12]:
print('Loading word vectors...')
word2vec = {}
with open('glove.6B/glove.6B.%sd.txt' % embedding_dim) as f:
    

    for line in f:
        
        
        values = line.split()
        word = values[0]
        vec = np.asarray(values[1:], dtype='float32')
        word2vec[word] = vec
print('Found %s word vectors.' % len(word2vec))




print('Filling pre-trained embeddings...')
num_words = min(max_vocab_size, len(word2idx) + 1)
embedding_matrix = np.zeros((num_words, embedding_dim))
for word, i in word2idx.items():
    
    if i < max_vocab_size:
        
        embedding_vector = word2vec.get(word)
        if embedding_vector is not None:
            
            
            embedding_matrix[i] = embedding_vector

Loading word vectors...
Found 400000 word vectors.
Filling pre-trained embeddings...


##### one-hot the targets

In [13]:
one_hot_target=np.zeros((len(input_sequences),new_max_sequence_length,num_words))
for i,target_sequences in enumerate(target_sequences):
    for j,word in enumerate(target_sequences):
        if word>0:
            one_hot_target[i,j,word]=1
                        

In [14]:
one_hot_target.shape


(1436, 12, 3000)

##### Importing Keras Libraries

In [15]:
from keras.layers import Embedding,LSTM,Dropout,Input,Dense
from keras.models import Model
from keras.optimizers import Adam, SGD

In [16]:
embedding=Embedding(num_words,embedding_dim,weights=[embedding_matrix])
    

##### Model Architecture

In [17]:
input_1=Input(shape=(new_max_sequence_length,))
initial_h=Input(shape=(latent_dim,))
initial_c=Input(shape=(latent_dim,))
x=embedding(input_1)
lstm=LSTM(latent_dim,return_sequences=True,return_state=True)
x,_,_=lstm(x,initial_state=[initial_h,initial_c])
dense=Dense(num_words,activation='softmax')
output=dense(x)

model=Model([input_1,initial_h,initial_c],output)
model.compile(loss='categorical_crossentropy',
  
  optimizer=Adam(lr=0.01),
  metrics=['accuracy'])
z=np.zeros((len(input_sequences),latent_dim))
model.fit([input_sequences,z,z],one_hot_target,batch_size=batch_size1,validation_split=validation_split1,epochs=epochs1)

Train on 1148 samples, validate on 288 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000

KeyboardInterrupt: 

##### Prediction model

In [47]:
input_2=Input(shape=(1,))
x=embedding(input_2)
x,h,c=lstm(x,initial_state=[initial_h,initial_c])
output_2=dense(x)

model_2=Model([input_2,initial_h,initial_c],[output_2,h,c])

idx2word = {v:k for k, v in word2idx.items()}



In [None]:
def sample():
    np_input=np.array([[word2idx['<sos>']]])
    h = np.zeros((1, latent_dim))
    c=np.zeros((1,latent_dim))
    eos=word2idx['<eos>']
    output_sentence = []
    
    for _ in range(new_max_sequence_length):
        
        o, h, c = model_2.predict([np_input, h, c])

    
        probs = o[0,0]
        if np.argmax(probs) == 0:
            
        
            print("wtf")
        probs[0] = 0
        probs /= probs.sum()
        idx = np.random.choice(len(probs), p=probs)
        if idx == eos:
            
        
            break

    # accuulate output
        output_sentence.append(idx2word.get(idx, '<WTF %s>' % idx))

    # make the next input into model
        np_input[0,0] = idx

    return ' '.join(output_sentence)

      

##### Generating Poetry

In [53]:
while True:
    
    for _ in range(4):
        
        print(sample())

    ans = input("---generate another? [Y/n]---")
    if ans and ans[0].lower().startswith('n'):
        
        break

he no one street,
had checked the pace
2o3 let them stay to do not to help
a leak and emptied then. from spoiled everything
---generate another? [Y/n]---y
all i took him mow in snow
transfixed on mountain slopes almost erect.
with half an ear to the piano's vigor.
that position behind the old bathtub, what to look-
---generate another? [Y/n]---y
to make a present of me to each other.
and dangle feet
though i kept remembering
of his his crew left him on where it said
---generate another? [Y/n]---n
