# Exam 1 - Nth Order Markov Chains

In [1]:
import os
import numpy as np

## Nth-Order Markov Models

In [2]:
import random

def build_markov_model(markov_model, text, order=1):
    words = text.split()
    words.append("*E*")
    
    if '*S*' in markov_model:
        if tuple(words[0:order]) in markov_model['*S*']:
            markov_model['*S*'][tuple(words[0:order])] += 1
        else:
            markov_model['*S*'][tuple(words[0:order])] = 1
    else:
        markov_model['*S*'] = {}
        markov_model['*S*'][tuple(words[0:order])] = 1
    
    for i in range(0, len(words)-order):
        word_set = tuple(words[i:i+order])
        
        if word_set in markov_model:
            if words[i+order] in markov_model[word_set]:
                markov_model[word_set][words[i+order]] += 1
            else:
                markov_model[word_set][words[i+order]] = 1
        else:
            markov_model[word_set] = {}
            markov_model[word_set][words[i+order]] = 1
                            
    return markov_model

def get_next_word(current_word, markov_model):

    # Sum counts for all transitions from a state
    state_sum = sum(markov_model[current_word].values())

    # Get a random value 0 <= value < 1
    random_val = random.randint(1, state_sum)
    
    # Pick a next_state based on their probabilities
    for next_state in markov_model[current_word]:
        if markov_model[current_word][next_state] >= random_val:
            return next_state
        else:
            random_val -= markov_model[current_word][next_state]
    
def generate_random_text(markov_model):
    
    # We must start at the initial state of the model
    current_word = "*S*"
    current_tuple = get_next_word(current_word, markov_model)
    
    # Keeping track of the sentence as a list (ignoring the start state)
    sentence = list(current_tuple)

    # Until the model generates an end state, keep adding random words
    while current_word != "*E*":
        current_word = get_next_word(current_tuple, markov_model)
        
        # Don't append the end state to our output
        if current_word != "*E*":
            sentence.append(current_word)
            
        current_list = list(current_tuple)
        current_list.pop(0)
        current_list.append(current_word)
        current_tuple = tuple(current_list)

    # Return the words with spaces between them
    return ' '.join(sentence)

In [4]:
markov_model = dict()

file = open("data/quotes.txt", "r")
for quote in file:
    markov_model = build_markov_model(markov_model, quote ,3)
#print (markov_model)

#random.seed(3)
print (generate_random_text(markov_model))
print (generate_random_text(markov_model))

“Programming is like sex: one mistake and you’re providing support for a lifetime.”(Michael Sinz)
“There are two major products that come out of Berkeley: LSD and UNIX. We don’t believe this to be a coincidence.”(Jeremy S. Anderson)
