In [0]:
from google.colab import files
def getLocalFiles():
    _files = files.upload()
    if len(_files) >0:
       for k,v in _files.items():
        open(k,'wb').write(v)
getLocalFiles()

files.upload()

In [0]:
import numpy as np
import nltk
import graphviz as gv

from nltk.tokenize import RegexpTokenizer
from nltk.corpus import cmudict
from backwards_HMM import unsupervised_HMM

In [0]:
def tokenize(filename):
  tokenizer = RegexpTokenizer('\w[\w|\'|-]*\w|\w') 

  tokens = []
  with open(filename) as f:
    for line in f:
      line = line.strip()
      if (not line.isdigit() and len(line) > 1):
        line = line.lower()
        t = tokenizer.tokenize(line)
        if len(t) > 1:
          tokens.append(t[::-1])
  return tokens

def parse_rhyme(tokens):
  rhyme = {}
  dic = cmudict.dict()
  for line in tokens:
    for word in line:
      key = ''
      
      try:
        pro = dic[word][-1]
        key = ','.join(pro[-2:])
      except (KeyError):
        pass
      
      if len(key) > 0:
        if key in rhyme.keys():
          rhyme[key].add(word)
        else:
          rhyme[key] = set()
          rhyme[key].add(word)
          
  for key, value in list(rhyme.items()):
    if len(list(value)) < 2:
      del rhyme[key]
    else:
      rhyme[key] = list(value)
  
  return rhyme

def assign_ids(tokens):
    counter = 0
    ids_map = {}

    for line in tokens:
        for word in line:
            if word not in ids_map:
                ids_map[word] = counter
                counter += 1
    return ids_map

def generate_ids(tokens, ids_map):
    ids = []
    for line in tokens:
        line_ids = [ids_map[word] for word in line]
        ids.append(line_ids)
    return ids
        
def reverse_map(m, value):
  if value == 'id':
    ids_map_r = {}
    for key, value in m.items():
        ids_map_r[value] = key
    return ids_map_r
  else:
    rhyme_map_r = {}
    for key in m:
      for v in m[key]:
        rhyme_map_r.setdefault(v,[]).append(key)
    return rhyme_map_r
  
def write_poem(hmm, ids_map, ids_map_r, rhyme_map, rhyme_map_r, nwords, nlines=14):
    pattern = {2:0, 3:1, 6:4, 7:5, 10:8, 11:9, 13:12}
    poem = ''
    
    end_words = [''] * nlines
    
    for n in range(nlines):
      if n in pattern:
        end = rhyme_map_r[end_words[pattern[n]]][0]
        rhymes = rhyme_map[end]
        rhymes.remove(end_words[pattern[n]])
        end_words[n] = np.random.choice(rhymes)
      else:
        end_words[n] = np.random.choice(list(rhyme_map_r.keys()))
        
    for n in range(nlines):
        line_ids, states = hmm.generate_emission(nwords, ids_map[end_words[n]])
        words = [ids_map_r[i] for i in line_ids]
        poem += ' '.join(words).capitalize()
        
        if ((n + 1) % 4 == 0) or (n == 13):
          poem += '.\n'
        else:
          poem += ',\n'
          
    return poem

def make_graph(matrix):

    graph = gv.Digraph(format='png')
    n, m = matrix.shape
    
    for i in range(n):
      graph.node(str(i))
    
    rows, cols = np.where(matrix >= 0.01)    
    weights = matrix[rows, cols]
    
    rows = map(str, rows.tolist())
    cols = map(str, cols.tolist())
    
    
    edges = zip(rows, cols)
    weighted_edges = zip(edges, map(lambda x: "%0.2f" % (x), weights))
    
    for edge in weighted_edges:
      if isinstance(edge[0], tuple):
        graph.edge(*(edge[0] + (edge[1],)))
      else:
        graph.edge(edge)
    return graph

In [0]:
tokens = tokenize('shakespeare.txt')

ids_map = assign_ids(tokens)
ids = generate_ids(tokens, ids_map)
ids_map_r = reverse_map(ids_map, value='id')

rhyme_map = parse_rhyme(tokens)
rhyme_map_r = reverse_map(rhyme_map, value='rhyme')

In [0]:
model = unsupervised_HMM(ids, 25, 1000)

In [89]:
poem = write_poem(model, ids_map, ids_map_r, rhyme_map, rhyme_map_r, 8)
print(poem)

One of unset candles the beauty twire worthiness,
Year earth hast bonds i not be happy,
That with thy alone held on the this,
That of bright life in second bed copy.
Thou from her glass are of my ragged,
Name forget as what then audit love runs,
Prize this i so shines that be naked,
Consecrate murd'rous root which have why impediments affections.
Robe old to bitterness are my need substance,
In thy duty there they depend defaced remains,
Of these fair in men to hours presence,
Her is to all this second wrongs gains.
Love shines under which hand for thus survive,
She sinks back i love's to not strive.



In [24]:
O = np.array(model.O)

for i in range(len(O)):
    top10 = O[i].argsort()[-10:][::-1]
    print("State " + str(i))
    for j in top10:
        print(ids_map_r[j] + ", ", end="")
    print("\n")

State 0
which, thou, time, world, it, night, beauty, that, why, day, 

State 1
i, and, to, do, shall, is, but, in, o, may, 

State 2
me, not, be, you, am, so, thee, time's, live, both, 

State 3
more, are, you, all, my, so, but, or, how, like, 

State 4
art, and, sweet, then, fair, o, dost, when, but, which, 

State 5
a, all, with, up, best, than, one, as, beauty, what, 

State 6
it, beauty, part, love, new, one, how, eye, he, full, 

State 7
days, face, pride, away, night, respect, set, show, state, die, 

State 8
love, self, heart, eyes, eye, own, verse, mind, name, friend, 

State 9
of, is, with, and, can, hath, nor, are, or, have, 

State 10
to, in, on, of, and, for, doth, with, from, than, 

State 11
hue, mine, time, doom, memory, rhyme, hate, skill, age, youth, 

State 12
thou, so, love, have, but, as, that, thee, for, yet, 

State 13
will, sun, day, store, self, view, heart, wrong, see, where, 

State 14
alone, spent, expressed, appear, report, knife, lies, increase, dyed, commi

In [32]:
A = np.array(model.A)
make_graph(A).render('rhyme_hmm')

'rhyme_hmm.png'