Charles Bukowski Poetry Generator

Description: A generative model trained on poetry written by Charles Bukowski, that generates short-form poetry based on keywords from user input.

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import nltk
import re
import random

In [2]:
with open('bukowski_poems.txt', 'r', encoding='utf-8') as f:
    raw_txt=f.read()

print(f"Read Test:\n\nTotal Characters: {len(raw_txt)}\n\nText from file:\n\n{raw_txt[:500]}")



Read Test:

Total Characters: 48026

Text from file:

a 340 dollar horse and a hundred dollar whore


don’t ever get the idea I am a poet; you can see me

at the racetrack any day half drunk

betting quarters, sidewheelers and straight thoroughs,

but let me tell you, there are some women there

who go where the money goes, and sometimes when you

look at these whores these onehundreddollar whores

you wonder sometimes if nature isn’t playing a joke

dealing out so much breast and ass and the way

it’s all hung together, you look and you look and




In [3]:
clean_txt=raw_txt.lower()
clean_txt=re.sub(r'\d+', '', clean_txt)
print(f"Cleaned Data:\n\n{clean_txt[:500]}")
fully_clean_txt=clean_txt

Cleaned Data:

a  dollar horse and a hundred dollar whore


don’t ever get the idea i am a poet; you can see me

at the racetrack any day half drunk

betting quarters, sidewheelers and straight thoroughs,

but let me tell you, there are some women there

who go where the money goes, and sometimes when you

look at these whores these onehundreddollar whores

you wonder sometimes if nature isn’t playing a joke

dealing out so much breast and ass and the way

it’s all hung together, you look and you look and

you


In [4]:
tokenizer= Tokenizer(num_words=None, oov_token="<unknown>")
tokenizer.fit_on_texts([fully_clean_txt])
word_index=tokenizer.word_index
index_word={index: word for word, index in word_index.items()}

size_of_vocab=len(word_index) +1
print(f"Size of Vocabulary:{size_of_vocab}")
print(list(word_index.items())[:20])


Size of Vocabulary:2008
[('<unknown>', 1), ('the', 2), ('and', 3), ('i', 4), ('a', 5), ('to', 6), ('in', 7), ('of', 8), ('you', 9), ('he', 10), ('that', 11), ('they', 12), ('was', 13), ('it', 14), ('on', 15), ('with', 16), ('said', 17), ('but', 18), ('my', 19), ('not', 20)]


In [5]:
id_sequence=tokenizer.texts_to_sequences([fully_clean_txt])[0]
print(f"Total Tokens: {len(id_sequence)}")
print(id_sequence[:20])

Total Tokens: 9092
[5, 384, 527, 3, 5, 241, 384, 290, 80, 291, 66, 2, 831, 4, 145, 5, 146, 9, 81, 59]


In [6]:
len_of_seq=40
input_seqs=[]
targ_wds=[]

for i in range(len(id_sequence)-len_of_seq):
    seq_input=id_sequence[i: i+len_of_seq]
    targ_wd=id_sequence[i+len_of_seq]
    input_seqs.append(seq_input)
    targ_wds.append(targ_wd)

print(f"Number of pairs created: {len(input_seqs)}")
print(f"Input Sequence Test: {input_seqs[0][:5]}")
print(f"Target Word ID Test: {targ_wds[0]}")
print(f"Target Word Test: {index_word[targ_wds[0]]}")

    
    

Number of pairs created: 9052
Input Sequence Test: [5, 384, 527, 3, 5]
Target Word ID Test: 24
Target Word Test: are


In [7]:
padded_input_seqs=pad_sequences(input_seqs, padding='pre', maxlen=len_of_seq)

In [19]:
from tensorflow.keras.utils import to_categorical
x=np.array(padded_input_seqs)
y=to_categorical(targ_wds, num_classes=size_of_vocab)

print(f"X Shape: {x.shape} \n Y Shape{y.shape} \nVocab Size: {size_of_vocab}")

X Shape: (9052, 40) 
 Y Shape(9052, 2008) 
Vocab Size: 2008


Model Component

In [20]:
size_of_batch=120
lstm_units=260
epoch_num=60
embedding_dimension=100
drop_rate=0.3

model=Sequential([
    Embedding(input_dim=size_of_vocab, output_dim=embedding_dimension, input_length=len_of_seq),

    LSTM(lstm_units, return_sequences=True),
    
    Dropout(drop_rate),

    
    LSTM(lstm_units),
    
    Dropout(drop_rate),


    Dense(size_of_vocab, activation='softmax')
])

In [21]:
model.compile(loss="categorical_crossentropy",
              optimizer="adam", metrics=["accuracy"]
)

model.summary()

In [18]:
history=model.fit(x,y, 
                  batch_size=size_of_batch, 
                  epochs=epoch_num, 
                  verbose=1)

model.save('my_poem_writer_model.h5')
print(f"Save Location: 'my_poem_writer_model.h5'")

Epoch 1/60


InvalidArgumentError: Graph execution error:

Detected at node sequential_1_1/lstm_2_1/while/TensorListPushBack_14 defined at (most recent call last):
  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\runpy.py", line 197, in _run_module_as_main

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\runpy.py", line 87, in _run_code

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\ipykernel_launcher.py", line 18, in <module>

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\ipykernel\kernelapp.py", line 739, in start

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\tornado\platform\asyncio.py", line 211, in start

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\asyncio\base_events.py", line 601, in run_forever

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\asyncio\base_events.py", line 1905, in _run_once

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\asyncio\events.py", line 80, in _run

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\ipykernel\kernelbase.py", line 534, in process_one

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\ipykernel\ipkernel.py", line 362, in execute_request

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\ipykernel\kernelbase.py", line 778, in execute_request

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\ipykernel\ipkernel.py", line 449, in do_execute

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\IPython\core\interactiveshell.py", line 3048, in run_cell

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\IPython\core\interactiveshell.py", line 3103, in _run_cell

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\IPython\core\interactiveshell.py", line 3308, in run_cell_async

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\IPython\core\interactiveshell.py", line 3490, in run_ast_nodes

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\IPython\core\interactiveshell.py", line 3550, in run_code

  File "C:\Users\Abdulaziz\AppData\Local\Temp\ipykernel_13128\4103052508.py", line 1, in <module>

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 377, in fit

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 220, in function

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 133, in multi_step_on_iterator

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 114, in one_step_on_data

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 78, in train_step

  File "C:\Users\Abdulaziz\miniconda3\envs\poetry_writer\lib\site-packages\tensorflow\core\function\capture\capture_container.py", line 154, in capture_by_value

Tried to append a tensor with incompatible shape to a list. Op element shape: [0] list shape: [260,1040]
	 [[{{node sequential_1_1/lstm_2_1/while/TensorListPushBack_14}}]] [Op:__inference_multi_step_on_iterator_5667]