## Additional Goals: Rhyming
In this Jupyter notebook, we train an unsupervised HMM on the given Shakespearean sonnets and generate a 14-line sonnet with appropriate rhyming line pairs. 

In [1]:
import re, os
import string
from HMM import unsupervised_HMM
from HMM_helper import (
    text_to_wordcloud,
    states_to_wordclouds,
    sample_sentence,
    visualize_sparsities,
    animate_emission
)
import numpy as np
import random
import pickle

In [2]:
original_text = open(os.path.join(os.getcwd(), 'data/shakespeare_no99_no126.txt')).read()
original_text = "".join(filter(lambda x: not x.isdigit(), original_text)) 
original_text = original_text.lower().strip()
original_text = re.sub("[.,?!\";:]", "", original_text)

In [3]:
raw_text = re.sub(r'(\n\s*)+\n', '\n\n', original_text)


raw_lines = []

for line in raw_text.split('\n'):
    if len(line) != 0:
        temp = re.sub('^[^a-zA-Z]*|[^a-zA-Z]*$|','', line)
        temp2 = re.sub(r'[\(\)]', '', temp)
        temp3 = temp2.replace("' "," ").replace(" '"," ")
        raw_lines.append(temp3)
        
                     
cleaned_raw_text = "\n".join(raw_lines)

sonnet_list = []
sonnet = []
for line in raw_lines:
    line_split = line.split()
    
    sonnet.append(line_split)
    if len(sonnet) == 14:
        sonnet_list.append(sonnet)
        sonnet = []


In [4]:
# Create rhyming dictionary

import copy

rhyme_dict = dict()

def update_rhyme_dict(w1,w2,my_dict):
    if (w1 not in my_dict):
        my_dict[word1] = set()

    if (w2 not in my_dict):
        my_dict[word2] = set()
        
    my_dict[w1].add(w2)
    my_dict[w2].add(w1)
    
    for v1 in my_dict[w1]:
        if (v1 != w2):
            my_dict[w2].add(v1)
    
    for v2 in my_dict[w2]:
        if (v2 != w1):
            my_dict[w1].add(v2)

for song in sonnet_list:

    for (a,b) in [(0,2),(1,3), (4, 6), (5,7), (8, 10), (9,11), (12, 13)]:
        word1, word2 = song[a][-1], song[b][-1]
        
        update_rhyme_dict(word1, word2, rhyme_dict)


In [5]:
# Getting syllable dictionary
syllable_dict = {}
file = open(os.path.join(os.getcwd(),
                'data/Syllable_dictionary.txt')).read()
lines = [line.split() for line in file.split('\n') if line.split()]

for line in lines:
    real, end = [], []
    for i in range(1, len(line)):
        if line[i][0] == 'E':
            end.append(int(line[i][1]))
        else:
            real.append(int(line[i][0]))
    syllable_dict[line[0]] = [real[::-1], end[::-1]]


In [6]:
# Slight modification to HMM_helper parse_observations function

def parse_observations(text):
    # Convert text to dataset.
    lines = [line.split() for line in text.split('\n') if line.split()]

    obs_counter = 0
    obs = []
    obs_map = {}

    for line in lines:
        obs_elem = []
        
        ############################ 
        # CHANGE MADE: REVERSING LINES
        line.reverse()
        ############################
        
        for word in line:
            
            if word not in obs_map:
                # Add unique words to the observations map.
                obs_map[word] = obs_counter
                obs_counter += 1
            
            # Add the encoded word.
            obs_elem.append(obs_map[word])
        
        # Add the encoded sequence.
        obs.append(obs_elem)

    return obs, obs_map

In [None]:
# Train and save HMM model using pickle package
obs, obs_map = parse_observations(cleaned_raw_text)  
hmm = unsupervised_HMM(obs, 20, 100)

pickle.dump(hmm, open( "hmm_rhyme.model", "wb" ) )

In [10]:
hmm = pickle.load(open( "hmm_rhyme.model", "rb" ) )

def invert_obs_map(o_map):
    new_obs_map = dict()
    
    for key, val in o_map.items():
        new_obs_map[val] = key
    return new_obs_map

def generate_sample_sentence(my_hmm, inv_obs_map, seed_word_idx, syl_dict, n_syl=10):
    emission, states = my_hmm.generate_sonnet_rhyme_emission(n_syl, seed_word_idx, inv_obs_map, syl_dict)
    
    sentence = [inv_obs_map[elem] for elem in emission][::-1]
    sentence[0] = sentence[0].capitalize()
    
    return " ".join(sentence)

i_obs_map = invert_obs_map(obs_map)

print(generate_sample_sentence(hmm, i_obs_map, obs_map['increase'], syllable_dict))
print(generate_sample_sentence(hmm, i_obs_map, obs_map['decease'], syllable_dict))

But can where very summer love increase
Thy change is a conquest relief decease


In [12]:
# Generate 14-line Shakespearean sonnet with rhyming scheme
def generate_shakespeare_sonnet(my_hmm, obs_map, rhyme_dict, syl_dict):
    final_sonnet = []
    
    i_obs_map = invert_obs_map(obs_map)
    
    for _ in range(3):
        word1 = np.random.choice(list(rhyme_dict.keys()))
        word1_r = np.random.choice(list(rhyme_dict[word1]))
        
        word2 = np.random.choice(list(rhyme_dict.keys()))
        word2_r = np.random.choice(list(rhyme_dict[word2]))
        
        temp1 = generate_sample_sentence(my_hmm, i_obs_map, obs_map[word1], syl_dict)
        temp1_r = generate_sample_sentence(my_hmm, i_obs_map, obs_map[word1_r], syl_dict)
        
        temp2 = generate_sample_sentence(my_hmm, i_obs_map, obs_map[word2], syl_dict)
        temp2_r = generate_sample_sentence(my_hmm, i_obs_map, obs_map[word2_r], syl_dict)
        
        final_sonnet.append(temp1)
        final_sonnet.append(temp2)
        final_sonnet.append(temp1_r)
        final_sonnet.append(temp2_r)
    
    word_lst = np.random.choice(list(rhyme_dict.keys()))
    word_lst_r = np.random.choice(list(rhyme_dict[word_lst]))
    final_sonnet.append(generate_sample_sentence(my_hmm, i_obs_map, obs_map[word_lst], syl_dict))
    final_sonnet.append(generate_sample_sentence(my_hmm, i_obs_map, obs_map[word_lst_r], syl_dict))
    
    return "\n".join(final_sonnet)
    
print(generate_shakespeare_sonnet(hmm, obs_map, rhyme_dict, syllable_dict))

Even thus all those nature thy whom defy
When the rich hardest space what but this lend
Make the best to please and burn that thereby
Beauteous got birds know the yet happier friend
First music or you half former report
He in executor swift character
Shames now rage to thy self and to doth sport
All lo upon thou am of register
To been night transport plagues of thine broken
To poesy with testy are in had
They my own rotten love in so open
Mine love hell add visage tomb lack me bad
Not and thou thoughts not than my alchemy
Thy and addeth another's majesty
