In [1]:
import os
import numpy as np
from IPython.display import HTML

from HMM import unsupervised_HMM
from HMM_helper import (
    text_to_wordcloud,
    states_to_wordclouds,
    parse_observations,
    sample_sentence,
    visualize_sparsities,
    animate_emission
)

### Training Unsupervised HMM

#### Testing with different numbers of hidden states

In [2]:
text = open(os.path.join(os.getcwd(), 'data/shakespeare_cleaned.txt')).read()
obs, obs_map = parse_observations(text)

In [4]:
# For testing
hmm1 = unsupervised_HMM(obs, 1, 100)

Iteration 0 of 100


KeyboardInterrupt: 

In [None]:
hmm10 = unsupervised_HMM(obs, 10, 100)

In [None]:
hmm16 = unsupervised_HMM(obs, 16, 100)

#### Sample Output

In [None]:
# Getting syllable dictionary
syllable_dict = {}
with open('data/Syllable_dictionary.txt') as f:
    for line in f.readlines():
        curr_line = line.split()
        syllable_dict[curr_line[0]] = curr_line[1]

In [None]:
def get_sample_sonnet(hmm, obs_map):
    # Total of 14 lines, 10 syllables each. There are 3 quatrains (4 lines) and 1 couplet (2 lines)
    sonnet = ""
    full = sample_sentence(hmm10, obs_map, n_words=100).split()
    tot_syllable = 0
    line_syllable = 0
    for word in full:
        sonnet += word
        tot_syllable += syllable_dict[word]
        line_syllable += syllable_dict[word]
        if line_syllable >= 10:
            # creating new line once we go over 10 syllables, but syllables do not carry over to next line
            line_syllable = 0
            sonnet += "\n\n"
        if tot_syllable >= 140:
            # once we have over 140 syllables we return our sonnet
            break
    return sonnet

In [None]:
print('\nSample Sentence:\n====================')
print(get_sample_sonnet(hmm1, obs_map))

In [None]:
print('Sample Sonnet:\n====================')
print(sample_sentence(hmm10, obs_map, n_words=25))

In [None]:
print('Sample Sonnet:\n====================')
print(sample_sentence(hmm16, obs_map, n_words=25))

### Visualization

#### Overall Wordcloud over All Sonnets

In [None]:
wordcloud = text_to_wordcloud(text, title='Shakespeare')

#### Wordclouds for Each Hidden State

In [None]:
wordclouds = states_to_wordclouds(hmm10, obs_map)

In [None]:
wordclouds = states_to_wordclouds(hmm16, obs_map)