# Markov Chain Sentence Builder
This is a program to build random sentences based on the data with sentences fed into it. This program uses a simple Markov chain that checks at every one and two words.

## Import Libraries

In [1]:
import random
from collections import defaultdict

## Load and Process Corpus

In [2]:
def load_training_file(file):
    with open(file) as f:
        raw_sentences = f.read()
        return raw_sentences

def prep_training(raw_sentences):
    corpus = raw_sentences.replace('\n',' ').split()
    return corpus

## Build Markov Models

In [3]:
def map_word_to_word(corpus):
    limit = len(corpus) - 1
    dict1_to_1 = defaultdict(list)
    for index, word in enumerate(corpus):
        if index < limit:
            suffix = corpus[index + 1]
            dict1_to_1[word].append(suffix)
    return dict1_to_1

def map_2_words_to_word(corpus):
    limit = len(corpus) - 2
    dict2_to_1 = defaultdict(list)
    for index, word in enumerate(corpus):
        if index < limit:
            key = word + ' ' + corpus[index + 1]
            suffix = corpus[index + 2]
            dict2_to_1[key].append(suffix)
    return dict2_to_1

## Select Random Seed

In [4]:
def random_word(corpus):            
    seed = input("Enter a word to start a sentence: ")
    if seed in corpus:
        word = seed
    else:
        word = None
        print("Try another word as a seed that exists in the corpus used.")      
    return word

## Apply the Markov Models

In [5]:
def word_after_single(prefix, suffix_map_1):
    accepted_words = []
    suffixes = suffix_map_1.get(prefix)
    if suffixes != None:
        for candidate in suffixes:
            accepted_words.append(candidate)
    return accepted_words

def  word_after_double(prefix, suffix_map_2):
    accepted_words = []
    suffixes = suffix_map_2.get(prefix)
    if suffixes != None:
        for candidate in suffixes:
            accepted_words.append(candidate)
    return accepted_words

## Build a Sentence

In [6]:
def sentence(suffix_map_1, suffix_map_2, corpus, end_prev_sentence):
    stop_characters = [".",":","!","?"]
    current_sentence = []
    final_sentence = ""
    word = random_word(corpus)
    keep_building = True
    if word != None:
        current_sentence.append(word)       
        while keep_building == True:
            word_choices = word_after_single(word, suffix_map_1)
            word = random.choice(word_choices)
            current_sentence.append(word)
            if any(character in word[-1] for character in stop_characters):
                keep_building = False
                break
            prefix = current_sentence[-2] + ' ' + current_sentence[-1]
            word_choices = word_after_double(prefix, suffix_map_2)      
            word = random.choice(word_choices)
            current_sentence.append(word)
            if any(character in word[-1] for character in stop_characters):
                keep_building = False
                break
    else:
        pass                  
    return current_sentence

## Code to Generate Random Sentences

In [7]:
end_prev_sentence = ""
raw_sentences = load_training_file("sentences.txt")
corpus = prep_training(raw_sentences)
suffix_map_1 = map_word_to_word(corpus)
suffix_map_2 = map_2_words_to_word(corpus)

In [8]:
print(sentence(suffix_map_1, suffix_map_2, corpus, end_prev_sentence))

Enter a word to start a sentence: the
['the', 'man', 'eats', 'tacos.']
