In [16]:
import os
import re
import numpy as np
from IPython.display import HTML

from HMM import unsupervised_HMM
from HMM_helper import (
    text_to_wordcloud,
    states_to_wordclouds,
    parse_observations,
    sample_sentence,
    visualize_sparsities,
    animate_emission
)

### Training Unsupervised HMM

#### Testing with different numbers of hidden states

In [2]:
text = open(os.path.join(os.getcwd(), 'data/shakespeare_cleaned.txt')).read()
obs, obs_map = parse_observations(text)

In [None]:
hmm10 = unsupervised_HMM(obs, 10, 100)

In [None]:
hmm16 = unsupervised_HMM(obs, 16, 100)

#### Sample Output

In [21]:
# Getting syllable dictionary
syllable_dict = {}
with open('data/Syllable_dictionary.txt') as f:
    for line in f.readlines():
        curr_line = line.split()
        syllable_dict[curr_line[0]] = re.sub(r'[^\w]', '', curr_line[-1]).lower()

In [61]:
def get_sample_sonnet(hmm, obs_map):
    # Total of 14 lines, 10 syllables each. There are 3 quatrains (4 lines) and 1 couplet (2 lines)
    sonnet = ""
    full = sample_sentence(hmm, obs_map, n_words=150).split()
    tot_syllable = 0
    line_syllable = 0
    line_num = 0
    for word in full:
        sonnet += word + " "
        tot_syllable += int(syllable_dict[word.lower()])
        line_syllable += int(syllable_dict[word.lower()])
        if line_syllable >= 10:
            # creating new line once we go over 10 syllables, but syllables do not carry over to next line
            line_syllable = 0
            line_num += 1
            sonnet += "\n"
            if line_num % 4 == 0:
                sonnet += "\n"
        if tot_syllable >= 140:
            # once we have over 140 syllables we return our sonnet
            break
    return sonnet

In [8]:
# For testing
hmm1 = unsupervised_HMM(obs, 1, 10)


Iteration 0 of 10


NameError: name 'hmm10' is not defined

In [70]:
print(get_sample_sonnet(hmm1, obs_map))

Not excuse so account may with thy sure 
knowing should imprisoned is methods me 
decrepit trifles bear i than thereby 
and be costs praise shall mine time my cover 

in winters new brought of hunted thee you 
my should book a steal friends me thou which none 
stand not double leave issueless cold yet 
nor promise thy can looking world wrong edge 

which taught where commend in my that world forth 
to a and despised unfair but but 
each and your the takes should like loves she thy 
by dissuade lack together to salutation 

will place is of at death longer golden 
his former father gives absence 


In [None]:
print('Sample Sonnet:\n====================')
print(sample_sentence(hmm10, obs_map, n_words=25))

In [None]:
print('Sample Sonnet:\n====================')
print(sample_sentence(hmm16, obs_map, n_words=25))

### Visualization

#### Overall Wordcloud over All Sonnets

In [None]:
wordcloud = text_to_wordcloud(text, title='Shakespeare')

#### Wordclouds for Each Hidden State

In [None]:
wordclouds = states_to_wordclouds(hmm10, obs_map)

In [None]:
wordclouds = states_to_wordclouds(hmm16, obs_map)