# Robert Frost poem generator using 2nd order Markov models

Because of the curse of dimentionality, we are going to store our Markov matrixes as dictionaries. Indeed, storing them as Tensors would be inefficient as most of the components would be 0.

In [80]:
import numpy as np
import string

In [81]:
initial = {} # start of a phrase, equivalent to pi0
first_order = {} # equivalent to A0
second_order = {} # equivalent to A[i,j,k]

In [82]:
def remove_punctuation(s):
    return s.translate(str.maketrans('','',string.punctuation))

In [83]:
# dataset
##!wget -nc https://raw.githubusercontent.com/lazyprogrammer/machine_learning_examples/master/hmm_class/robert_frost.txt

In [84]:
# create a function to handle adding a key value pair to a dictionary. The values will be lists of words
def add2dict(dictionary, key, value):
  if key not in dictionary:
    dictionary[key] = []
  dictionary[key].append(value)


In [85]:
for line in open('../datasets/poems/robert_frost.txt'):
    tokens = remove_punctuation(line.rstrip().lower()).split()
    #tokens = line.rstrip().lower().split()
    T = len(tokens)
    for i in range(T):
        t = tokens[i]
        if i == 0: # here we are looking at the first word of the sentence
            # mesure distribution of first word, fill in initial
            initial[t] = initial.get(t,0.) + 1
        else:
            t_1 = tokens[i-1]
            if i == T - 1: # here we are looking at the last word of the sentence
                # measure probability of ending the line, create a fake 'END' token at the end of lines.
                add2dict(second_order, (t_1, t), 'END')
            if i == 1: # here we are looking at the 2nd word of the sentence
                # measure distribution of second word given only first word
                add2dict(first_order, t_1, t)
            else:
                t_2 = tokens[i-2]
                add2dict(second_order, (t_2, t_1), t)          


In [86]:
# normalize the distribution
initial_total = sum(initial.values())
for token, count in initial.items():
    initial[token] = count / initial_total

In [87]:
# now we need to convert our list of tokens into dictionaries of or {token: probability}

def list2ProbabilityDict(ts):
    # turn each list of tokens into a dictionary of probabilities
    d = {}
    n = len(ts)
    for t in ts:
        d[t] = d.get(t, 0.) + 1
    for t, c in d.items():
        d[t] = c / n
    return d

In [88]:
# let's transform our dictionaries

for t_1, ts in first_order.items():
    # replace list with dict of proba
    first_order[t_1] = list2ProbabilityDict(ts)

for k, ts in second_order.items():
    second_order[k] = list2ProbabilityDict(ts)

In [90]:
# next we should create a function to sample words based on the dictionaries of probabilities we just calculated

def sample_word(d):
    # get a number between 0 and 1 from a uniform distribution
    p0 = np.random.random()
    # probability counter that will be incremented with each  probability of token in the keys of the probability dict
    cumulative = 0 
    for token, probability in d.items():
        # at this point of the loop, we should have a probability of "cumulative" to select the token
        cumulative += probability
        # if the random number we have generated at the beginning is lower than the probability of current token, then return token
        if p0 < cumulative:
            return token
    assert(False) # we should never get to this line

In [91]:
def generate():
    for i in range(4): # generate 4 lines
      sentence = []

      # sample initial word from initial dict
      w0 = sample_word(initial)
      sentence.append(w0)

      # sample second word from first_order dict based on w0
      w1 = sample_word(first_order[w0])
      sentence.append(w1)

      # second-order words until we find the 'END' token
      while True:
         w2 = sample_word(second_order[(w0, w1)])
         if w2 == 'END':
            break
         sentence.append(w2)
         w0 = w1
         w1 = w2
      print(' '.join(sentence))

In [92]:
generate()

then he came at me with one leg and a trapper looking in at the door and headboard of mothers bed is pushed
and fell back from him on the world will end in fire
from the house as far as i say
beyond which god is
