In [0]:
from google.colab import files
def getLocalFiles():
    _files = files.upload()
    if len(_files) >0:
       for k,v in _files.items():
         open(k,'wb').write(v)
getLocalFiles()

files.upload()

In [0]:
import numpy as np
import nltk
import graphviz as gv

from nltk.tokenize import RegexpTokenizer
from HMM import unsupervised_HMM

In [0]:
def tokenize(filename):
    tokenizer = RegexpTokenizer('\w[\w|\'|-]*\w|\w') 

    tokens = []
    with open(filename) as f:
        for line in f:
            line = line.strip()
            if (not line.isdigit() and len(line) > 0):
                line = line.lower()
                tokens.append(tokenizer.tokenize(line))
    return tokens

def assign_ids(tokens):
    counter = 0
    ids_map = {}

    for line in tokens:
        for word in line:
            if word not in ids_map:
                ids_map[word] = counter
                counter += 1
    return ids_map

def generate_ids(tokens, ids_map):
    ids = []
    for line in tokens:
        line_ids = [ids_map[word] for word in line]
        ids.append(line_ids)
    return ids
        
def reverse_map(ids_map):
    ids_map_r = {}
    for key, value in ids_map.items():
        ids_map_r[value] = key
    return ids_map_r

def write_poem(hmm, ids_map_r, nwords, nlines=14):
    poem = ''
    
    for n in range(nlines):
        line_ids, states = hmm.generate_emission(nwords)
        words = [ids_map_r[i] for i in line_ids]
        poem += ' '.join(words).capitalize()
        
        if ((n + 1) % 4 == 0) or (n == 13):
          poem += '.\n'
        else:
          poem += ',\n'
          
    return poem
  
def make_graph(matrix):

    graph = gv.Digraph(format='png')
    n, m = matrix.shape
    
    for i in range(n):
      graph.node(str(i))
    
    rows, cols = np.where(matrix >= 0.01)    
    rows = map(str, rows.tolist())
    cols = map(str, cols.tolist())
    
    edges = zip(rows, cols)
    weighted_edges = zip(edges, map(lambda x: "%0.2f" % (x), matrix[rows, cols]))
    
    for edge in weighted_edges:
      if isinstance(e[0], tuple):
        graph.edge(*(e[0] + (e[1],)))
      else:
        graph.edge(e)
    return graph

In [0]:
tokens = tokenize('shakespeare.txt')
ids_map = assign_ids(tokens)
ids = generate_ids(tokens, ids_map)
ids_map_r = reverse_map(ids_map)

In [0]:
hmm = unsupervised_HMM(ids, 25, 1000)

In [51]:
poem = write_poem(hmm, ids_map_r, 8)
print(poem)

Bring looks every have fair days and like,
With whose thy dust may and to thy,
Dignifies the story of the worst of with,
In their self my trespass argument to another.
Thy speaking false with old that pride ransom,
Of thee grieved alone confess first heavy dare,
Not as i on sworn wing of such,
These love and rhyme with the sun boat.
Were me watch beauty do promise every i,
Do speed one why age by authority i,
And i let fool much then walk of,
Reigns love not too me due be my.
Torment love it will thee the rebel and,
Lest never art visage from tell my neglect.



In [84]:
O = np.array(hmm.O)

for i in range(len(O)):
    top10 = O[i].argsort()[-10:][::]
    print("State " + str(i))
    for j in top10:
        print(ids_map_r[j] + ", ", end="")
    print("\n")

State 0
must, can, would, may, shall, am, should, will, do, thou, 

State 1
thus, doth, how, so, being, but, nor, are, to, thee, 

State 2
where, their, as, him, what, thy, all, so, me, thee, 

State 3
that, to, for, not, she, it, as, but, thee, and, 

State 4
whilst, ah, how, then, for, or, which, but, o, and, 

State 5
this, no, and, or, all, so, to, in, with, of, 

State 6
than, no, thy, shall, do, though, what, as, when, that, 

State 7
you, me, is, that, this, as, all, in, not, to, 

State 8
the, with, of, so, from, a, by, be, in, to, 

State 9
but, from, by, with, what, for, her, of, to, in, 

State 10
other, beauty, mind, will, worth, name, praise, love, heart, self, 

State 11
like, let, yet, but, then, so, that, how, for, if, 

State 12
that, yet, therefore, nor, when, so, who, o, but, and, 

State 13
what, beauty, still, they, it, he, which, mine, you, i, 

State 14
you, on, live, make, not, in, dost, i, art, be, 

State 15
for, which, shall, love, still, hath, are, and, that

In [77]:
A = np.array(hmm.A)
make_graph(A).render('naive_hmm')

'naive_hmm.png'