In [32]:
#tutorial followed: https://github.com/aparrish/gutenberg-poetry-corpus/blob/master/quick-experiments.ipynb

In [26]:
import gzip, json
import random
import re
from collections import Counter

In [7]:
all_lines =[]
for line in gzip.open("gutenberg-poetry-v001.ndjson.gz"):
    all_lines.append(json.loads(line.strip()))

In [8]:
random.sample(all_lines, 8)

[{'s': "And shared the lawyer's food and bed,", 'gid': '17604'},
 {'s': 'And tended me with care.', 'gid': '35553'},
 {'s': 'Keep stern and still as destiny,', 'gid': '41955'},
 {'s': 'I hope that I some green here getten may."', 'gid': '2383'},
 {'s': 'Are dreams! this only _is_--', 'gid': '27739'},
 {'s': 'Few of us have the blood of kings, few are of courtly birth,',
  'gid': '5625'},
 {'s': 'A fond and gentle foster-dam--', 'gid': '38463'},
 {'s': 'These are our realms, no limit to their sway!', 'gid': '35394'}]

In [10]:
flower_lines = [line['s'] for line in all_lines if re.search(r'\bflower\b', line['s'], re.I)]

In [11]:
random.sample(flower_lines, 8)

['That are the flower of the earth?"',
 'The flower of fairy lore.',
 'From flower to star read upward; you shall see',
 'Thou breathest beauty like a flower,',
 'And flower and vine, like angel wings',
 "The very flower of Issland; 'twas a fair yet fearful scene.",
 'To make a stone a flower.',
 'we caught flower and new bramble-fruit']

In [18]:
longest = max([len(x) for x in flower_lines])# find the length of the longest line
center = longest - len("flower") # and use it to create a "center" offset that will work for all lines.

sorted_flower_lines = sorted(
    [line for line in flower_lines if re.search(r"\bflower\b\s\w", line)], #only lines with word following
    key=lambda line: line[re.search(r'\bflower\b\s', line).end():])     #sort on the substring following the match.
    
for line in sorted_flower_lines[350:400]: #change these numbers to see a different slice
    offset = center - re.search(r'\bflower\b', line, re.I).start()
    print((" "*offset)+line) #left pad the string with spaces to align on flower

                                        Or why sae sweet a flower as love
                                                So sweet a flower as she."
                                                         A flower as yet unblossomed. Warmth and light
                                           Is only half in flower as yet. But why--
                                        "To gain so fair a flower as you,
                                               Cast like a flower aside?
                              (Yon scarlet fruit-bell is a flower asleep;)
                                                 As doth a flower at Apollo's touch.
                                             'Twas a pigmy flower at best,
                                               But he, the flower at head and soil at root,
                                               But he, the flower at head and soil at root,
                                        Blooms the perfect flower at last.
                       

In [22]:
found_adj = []
for line in flower_lines:
    matches = re.findall(r"(the|a)\s(\b\w+\b)\s(\bflower\b)", line, re.I)
    for match in matches:
        found_adj.append(match[1])

In [23]:
random.sample(found_adj, 12)

['beauteous',
 'poison',
 'purple',
 'constellated',
 'happy',
 'falling',
 'little',
 'stateliest',
 'purple',
 'humblest',
 'morning',
 'humblest']

In [27]:
Counter(found_adj).most_common(12)

[('little', 26),
 ('white', 23),
 ('sweetest', 22),
 ('wild', 19),
 ('fairest', 15),
 ('tender', 13),
 ('sweet', 11),
 ('purple', 11),
 ('meanest', 11),
 ('lovely', 10),
 ('bonnie', 10),
 ('faded', 9)]

In [29]:
import pronouncing

In [30]:
source_word = "flowering"
source_word_rhymes = pronouncing.rhymes(source_word)

In [31]:
source_word_rhymes

['cowering',
 'devouring',
 'empowering',
 'glowering',
 'powering',
 'scouring',
 'showering',
 'souring',
 'towering']

In [34]:
#And then look through the lines of poetry in the corpus for lines that end with any of these words:
for line in all_lines:
    text = line['s']
    match = re.search(r'(\b\w+\b)\W*$', text)
    if match:
        last_word = match.group()
        if last_word in source_word_rhymes:
            print(text)

In the Winter you are cowering
"Oh, yes!" exclaimed John, with a towering
In the Winter you are cowering
winged things may never pass, nay, not even the cowering
Ithaca, these are wooing me against my will, and devouring
"Of Coleridge, I can not speak but with reverence. His towering
upbraid him. "Son of Tydeus," he said, "why stand you cowering
the heaviness of his heart, "why are the Achaeans again scouring
Maidens with towering
Are its waters, aye showering
In the Winter you are cowering
In the Winter you are cowering
So hunted, yet defiant, cowering
The moonlit crests of foaming waves gleam towering


In [35]:
phones = pronouncing.phones_for_word(source_word)[0] #
phones

'F L AW1 ER0 IH0 NG'

In [37]:
pronouncing.rhyming_part(phones)

'AW1 ER0 IH0 NG'

In [41]:
#The following cell builds the data structure proposed above: a dictionary that maps rhyming parts 
#to a dictionary that maps words with that rhyming part to the lines of poetry that they're found at the end of.
from collections import defaultdict
by_rhyming_part = defaultdict(lambda: defaultdict(list))
for line in all_lines:
    text = line['s']
    if not(32 < len(text) < 48): # only use lines of uniform lengths
        continue
    match = re.search(r'(\b\w+\b)\W*$', text)
    if match:
        last_word = match.group()
        pronunciations = pronouncing.phones_for_word(last_word)
        if len(pronunciations) > 0:
               rhyming_part = pronouncing.rhyming_part(pronunciations[0])
               # group by rhyming phones (for rhymes) and words (to avoid duplicate words)
               by_rhyming_part[rhyming_part][last_word.lower()].append(text)
               
               

In [58]:
#A random key/value pair from this dictionary, so you can see its structure:
random_rhyming_part = random.choice(list(by_rhyming_part.keys()))
random_rhyming_part, by_rhyming_part[random_rhyming_part]

('AO1 R W ER0 D',
 defaultdict(list,
             {'forward': ['See, he stoops, nay, shooting forward',
               'Down at the sand where the tide threw forward',
               'Time three Archangels come forward',
               'The Duke stepped rather aside than forward',
               'Throws himself head foremost, forward',
               'Throws himself head foremost, forward',
               'The maddening pendulum urges me forward',
               "Saw her mother's dwelling, and press'd forward",
               'Seize its ear, and drag it forward',
               'And the bright-hued sledge drew forward',
               'The shark and the roach dart forward',
               'Out of the stones. You saw him looking forward',
               'Each foam-curled wave darts forward',
               'No salām {36b} she gave—but hurried forward',
               'Saw her mother’s dwelling, and press’d forward',
               'Desire of joy beats mightily pulsing forward'],
       

## Markov Chain text generation

In [64]:
import markovify

In [65]:
big_poem = "\n".join([line['s'] for line in random.sample(all_lines, 250000)])

In [69]:
model = markovify.NewlineText(big_poem)
for i in range(14):
    print(model.make_sentence())

Whyl that this Moor did conceive a great wave raised me by night, the stars,
Cauf-leather shoon to thy frown.
Her hopes with free leave
And first below the doorstep
How pleasant for the manner of wood
. . . that way . . . . . .
Than the spots on the threshold stands
And, for his pains.
Owes to his quarry,--then to feed Love, but near me!
Couched in the forsaken dwelling,
On Him, who to her if she suffer not the mission of man I had a little shanty, in the house
He worked and sang beneath the waters. No!--
In the opening lines of swarthy Nubians bare up his weazon,
Swift almost as life!


#This is okay but the lines don't make a lot of sense, and are sometimes too long. You can constrain the length using Markovify's .make_short_sentence() method:

In [71]:
model.make_short_sentence(60)

'Where brightest joys and griefs; but I must leave it stay'

In [74]:
for i in range(6):
    print()
    for i in range(random.randrange(1,5)):
        print(model.make_short_sentence(40))
    #ensure the last line has a period at the end for closure.
    print(re.sub(r"(\w)[^\w.]?$", r"\1.", model. make_short_sentence(40)))
    print()
    print("～ ❀ ～")
        


My ribs are a friend;--
Of sweetness it had done it, sir.

～ ❀ ～

Since she is gone out.
My gratitude to God--
And all the day,
O'er their freedom protected
many a work no other noise than that.

～ ❀ ～

Bound at her smote,
although it rests awhile until
With mirth that still be,
particle of it, I am there,--am back.

～ ❀ ～

Trace the sad Sicilian maid,
And the lean water-worm
their beauty they would insure us,
Five times re-illum'd, as oft you see.

～ ❀ ～

We should be worn, like a flight
That falls through the dark of life.

～ ❀ ～

But it must be,
The hired man takes hound,
Thou to thy shrine.

～ ❀ ～
