In [2]:
import os
import re
import numpy as np
from IPython.display import HTML

from HMM import unsupervised_HMM
from HMM_helper import (
    text_to_wordcloud,
    states_to_wordclouds,
    parse_observations,
    sample_sentence,
    visualize_sparsities,
    animate_emission
)

### Training Unsupervised HMM

#### Testing with different numbers of hidden states

In [3]:
text = open(os.path.join(os.getcwd(), 'data/shakespeare_cleaned.txt')).read()
obs, obs_map = parse_observations(text)

In [None]:
hmm10 = unsupervised_HMM(obs, 10, 100)

In [None]:
hmm16 = unsupervised_HMM(obs, 16, 100)

#### Sample Output

In [8]:
# Getting syllable dictionary
syllable_dict = {}
with open('data/Syllable_dictionary.txt') as f:
    for line in f.readlines():
        curr_line = line.split()
        syllable_dict[re.sub(r'[^\w]', '', curr_line[0]).lower()] = curr_line[-1]

In [16]:
def get_sample_sonnet(hmm, obs_map):
    # Total of 14 lines, 10 syllables each. There are 3 quatrains (4 lines) and 1 couplet (2 lines)
    sonnet = ""
    full = sample_sentence(hmm, obs_map, n_words=150).split()
    line_syllable = 0
    line_num = 0
    for word in full:
        if line_syllable == 0:
            word = word.capitalize()
        sonnet += word + " "
        line_syllable += int(syllable_dict[word.lower()])
        if line_syllable >= 10:
            # creating new line once we go over 10 syllables, but syllables do not carry over to next line
            line_syllable = 0
            line_num += 1
            sonnet += "\n"
            if line_num % 4 == 0:
                sonnet += "\n"
        if line_num == 14:
            # once we have over 14 lines we return our sonnet
            break
    return sonnet

In [6]:
# For testing
hmm1 = unsupervised_HMM(obs, 1, 10)


Iteration 0 of 10


In [17]:
print(get_sample_sonnet(hmm1, obs_map))

To still lest for self more his thee thou thy 
Sound sight yhave means a day haply or our 
Me do forth within thee pourst forsake happy 
For call an minds clear looks so then surmount 

Be world thy that night loves mightst my praise in 
Not say to more shall goddess and the hence 
Him moving where hard dully full imaginary 
Will on the was hath and yet it thy madness 

To do since hath their error cannot say 
That thee then blame i even be which thou 
Go eat them shame my is not against with 
Painting bring shall mayst they from shame in bear 

To have and mistake do thou bends thy pay 
After remain rather thine thy doth affairs 



In [None]:
print('Sample Sonnet:\n====================')
print(sample_sentence(hmm10, obs_map, n_words=25))

In [None]:
print('Sample Sonnet:\n====================')
print(sample_sentence(hmm16, obs_map, n_words=25))

### Visualization

#### Overall Wordcloud over All Sonnets

In [None]:
wordcloud = text_to_wordcloud(text, title='Shakespeare')

#### Wordclouds for Each Hidden State

In [None]:
wordclouds = states_to_wordclouds(hmm10, obs_map)

In [None]:
wordclouds = states_to_wordclouds(hmm16, obs_map)