# Markov Chain Sentence Builder
This is a program to build random sentences based on the data with sentences fed into it. This program uses a simple Markov chain that checks at every one and/or two words and/or three words in which the user can choose the number of Markov chains to be applied.

## Import Libraries

In [1]:
import random
from collections import defaultdict

## Load and Process Corpus

In [2]:
def load_training_file(file):
    with open(file) as f:
        raw_sentences = f.read()
        return raw_sentences

def prep_training(raw_sentences):
    raw_sentences = raw_sentences.lower()
    raw_sentences = raw_sentences.strip(",_”“:;")
    raw_sentences = raw_sentences.replace(' \'', "")    
    raw_sentences = raw_sentences.replace('\' ' , "")    
    raw_sentences = raw_sentences.replace(',', "")
    raw_sentences = raw_sentences.replace('"', "")
    raw_sentences = raw_sentences.replace('_', "")
    raw_sentences = raw_sentences.replace('”', "")
    raw_sentences = raw_sentences.replace('“', "")
    raw_sentences = raw_sentences.replace(':', " :")
    raw_sentences = raw_sentences.replace(';', " ;")
    raw_sentences = raw_sentences.replace('.', " .")
    raw_sentences = raw_sentences.replace('!', " !")
    raw_sentences = raw_sentences.replace('?', " ?")
    corpus = raw_sentences.replace('\n',' ').split()
    return corpus

## Build Markov Models

In [3]:
def map_word_to_word(corpus):
    limit = len(corpus) - 1
    dict1_to_1 = defaultdict(list)
    for index, word in enumerate(corpus):
        if index < limit:
            suffix = corpus[index + 1]
            dict1_to_1[word].append(suffix)
    return dict1_to_1

def map_2_words_to_word(corpus):
    limit = len(corpus) - 2
    dict2_to_1 = defaultdict(list)
    for index, word in enumerate(corpus):
        if index < limit:
            key = word + ' ' + corpus[index + 1]
            suffix = corpus[index + 2]
            dict2_to_1[key].append(suffix)
    return dict2_to_1

def map_3_words_to_word(corpus):
    limit = len(corpus) - 3
    dict3_to_1 = defaultdict(list)
    for index, word in enumerate(corpus):
        if index < limit:
            key = word + ' ' + corpus[index + 1] + ' ' + corpus[index + 2]
            suffix = corpus[index + 3]
            dict3_to_1[key].append(suffix)
    return dict3_to_1

def map_4_words_to_word(corpus):
    limit = len(corpus) - 4
    dict4_to_1 = defaultdict(list)
    for index, word in enumerate(corpus):
        if index < limit:
            key = word + ' ' + corpus[index + 1] + ' ' + corpus[index + 2] + ' ' + corpus[index + 3]
            suffix = corpus[index + 4]
            dict4_to_1[key].append(suffix)
    return dict4_to_1

## Select Random Seed

In [4]:
def random_word(corpus):            
    seed = input("Enter a word to start a sentence: ")
    if seed in corpus:
        word = seed
    else:
        word = None
        print("Try another word as a seed that exists in the corpus used.")      
    return word

## Apply the Markov Models

In [5]:
def word_after_single(prefix, suffix_map_1):
    accepted_words = []
    suffixes = suffix_map_1.get(prefix)
    if suffixes != None:
        for candidate in suffixes:
            accepted_words.append(candidate)
    return accepted_words

def  word_after_double(prefix, suffix_map_2):
    accepted_words = []
    suffixes = suffix_map_2.get(prefix)
    if suffixes != None:
        for candidate in suffixes:
            accepted_words.append(candidate)
    return accepted_words

def  word_after_triple(prefix, suffix_map_3):
    accepted_words = []
    suffixes = suffix_map_3.get(prefix)
    if suffixes != None:
        for candidate in suffixes:
            accepted_words.append(candidate)
    return accepted_words

def  word_after_quadruple(prefix, suffix_map_4):
    accepted_words = []
    suffixes = suffix_map_4.get(prefix)
    if suffixes != None:
        for candidate in suffixes:
            accepted_words.append(candidate)
    return accepted_words

## Build a Sentence

In [6]:
def sentence_builder(suffix_map_1, suffix_map_2, suffix_map_3, suffix_map_4, corpus):
    final_sentence = ""
    try:
        number_of_sentences = int(input("How many sentences do you want? "))
        number_of_markov_chains = int(input("Choose 1, 2, or 3 Markov chains to be applied. How many Markov chains would you want to apply? "))
    except:
        print("You entered something other than integers. Enter only integers.")
        return final_sentence
    stop_characters = [".","!","?"]
    current_sentence = []
    word = random_word(corpus)    
    keep_building = True
    if word != None:
        current_sentence.append(word)         
        for i in range(number_of_sentences):
            keep_building = True      
            while keep_building == True:
                if number_of_markov_chains == 1:
                    if len(current_sentence) > 1:
                        if current_sentence[-1] == ":" or current_sentence[-1] == ";" or current_sentence[-2] == ":" or current_sentence[-2] == ";":
                            word = current_sentence[-2] + ' ' + current_sentence[-1]
                            word_choices = word_after_double(word, suffix_map_2)
                        else:
                            word_choices = word_after_single(word, suffix_map_1)
                    elif len(current_sentence) == 1:                                           
                        word_choices = word_after_single(word, suffix_map_1)
                    word = random.choice(word_choices)
                    current_sentence.append(word)
                    if any(character in word[-1] for character in stop_characters):
                        keep_building = False
                        break
                elif number_of_markov_chains == 2:
                    if len(current_sentence) >= 2:
                        if current_sentence[-1] == ":" and len(current_sentence) > 2 or current_sentence[-1] == ";" and len(current_sentence) > 2 or current_sentence[-2] == ":" and len(current_sentence) > 2 or current_sentence[-2] == ";" and len(current_sentence) > 2:
                            prefix = current_sentence[-3] + ' ' + current_sentence[-2] + ' ' + current_sentence[-1]
                            word_choices = word_after_triple(prefix, suffix_map_3)                        
                        else:   
                            prefix = current_sentence[-2] + ' ' + current_sentence[-1]
                            word_choices = word_after_double(prefix, suffix_map_2)
                        word = random.choice(word_choices)
                        current_sentence.append(word)
                        if any(character in word[-1] for character in stop_characters):
                            keep_building = False
                            break
                    elif len(current_sentence) <= 1:
                        word_choices = word_after_single(word, suffix_map_1)
                        word = random.choice(word_choices)
                        current_sentence.append(word)
                        if any(character in word[-1] for character in stop_characters):
                            keep_building = False
                            break
                elif number_of_markov_chains == 3:
                    if len(current_sentence) >= 3:
                        if current_sentence[-1] == ":" and len(current_sentence) > 3  or current_sentence[-1] == ";" and len(current_sentence) > 3 or current_sentence[-2] == ":" and len(current_sentence) > 3  or current_sentence[-2] == ";" and len(current_sentence) > 3 or current_sentence[-3] == ":" and len(current_sentence) > 3  or current_sentence[-3] == ";" and len(current_sentence) > 3:
                            prefix = current_sentence[-4] + ' ' + current_sentence[-3] + ' ' + current_sentence[-2] + ' ' + current_sentence[-1]
                            word_choices = word_after_quadruple(prefix, suffix_map_4)                        
                        else:   
                            prefix = current_sentence[-3] + ' ' + current_sentence[-2] + ' ' + current_sentence[-1]
                            word_choices = word_after_triple(prefix, suffix_map_3)
                        word = random.choice(word_choices)
                        current_sentence.append(word)
                        if any(character in word[-1] for character in stop_characters):
                            keep_building = False
                            break
                    elif len(current_sentence) == 2:
                        prefix = current_sentence[-2] + ' ' + current_sentence[-1]
                        word_choices = word_after_double(prefix, suffix_map_2)
                        word = random.choice(word_choices)
                        current_sentence.append(word)
                        if any(character in word[-1] for character in stop_characters):
                            keep_building = False
                            break
                    elif len(current_sentence) <= 1:                                          
                        word_choices = word_after_single(word, suffix_map_1)
                        word = random.choice(word_choices)
                        current_sentence.append(word)
                        if any(character in word[-1] for character in stop_characters):
                            keep_building = False
                            break
                else:
                    print("You entered an integer of Markov chains either less than 1 or more than 3, which are not available options to choose. Please only choose 1, 2, or 3 Markov chains to be applied.")
    else:
        pass
    for i in current_sentence:
        if final_sentence == "":
            final_sentence = final_sentence + i
        else:
            final_sentence = final_sentence + ' ' + i
    final_sentence = final_sentence.replace(' :', ":")
    final_sentence = final_sentence.replace(' ;', ";")
    final_sentence = final_sentence.replace(' .', ".")
    final_sentence = final_sentence.replace(' ?', "?")
    final_sentence = final_sentence.replace(' !', "!")
    return final_sentence

## Code to Generate Random Sentences

In [7]:
raw_sentences = load_training_file("Frankenstein.txt")
corpus = prep_training(raw_sentences)
suffix_map_1 = map_word_to_word(corpus)
suffix_map_2 = map_2_words_to_word(corpus)
suffix_map_3 = map_3_words_to_word(corpus)
suffix_map_4 = map_4_words_to_word(corpus)

In [8]:
print(sentence_builder(suffix_map_1, suffix_map_2, suffix_map_3, suffix_map_4, corpus))

How many sentences do you want? 30
Choose 1, 2, or 3 Markov chains to be applied. How many Markov chains would you want to apply? 1
Enter a word to start a sentence: the
the day and all the villain. he was something new incident happened; my ashes this suspense; yet at the sufferings i it is not help supposing all judges from my boy’s apprehensions. i shunned and wandered here some future maintenance. agatha and nights clear morning we were called to give you have lately living torture such was i will not then hanging round me with the fiend rang in a rock. but it was agonised with the streets of my desire as night wretchedly. i have exchanged our captain offered me in death; my lips and i should take my fellow beings who asked her hands a more time before dark speck upon their sympathies—as if you were directed to dawn i may have preserved my character and yet ever-changing scene has this to occupy herself of which promised gift i cannot be fulfilled and sense of returning to possess 

In [9]:
print(sentence_builder(suffix_map_1, suffix_map_2, suffix_map_3, suffix_map_4, corpus))

How many sentences do you want? 30
Choose 1, 2, or 3 Markov chains to be applied. How many Markov chains would you want to apply? 2
Enter a word to start a sentence: the
the sanguinary laws of electricity and galvanism which was to rob me of my ability to give life to whom she had nursed from its birth and growth of our own situation. the poor animals that crossed my path. the brave fellows whom i was. the delight i felt this delay very bitterly; for i had no fear therefore that he shall have softened your despair new and mightier scenes of nature and rashly and ignorantly i had why should i by my younger protectors. i approached the road; i heard of even in these wild and enthusiastic imagination was dreadful in itself sufficient beauty to obtain the liberty of his sentence was pronounced and i feel so many men of genius however erroneously directed scarcely ever fail in my face and said ‘good night sweet safie.’ he sat absorbed in reflection. then again the frost came and made the pa

In [10]:
print(sentence_builder(suffix_map_1, suffix_map_2, suffix_map_3, suffix_map_4, corpus))

How many sentences do you want? 30
Choose 1, 2, or 3 Markov chains to be applied. How many Markov chains would you want to apply? 3
Enter a word to start a sentence: the
the moment; and my present sensations strongly intimated that the fiend followed me and would discover himself to me when i felt the cheering warmth of summer and although not so warm as in england the southern gales which blow us speedily towards those shores which i so much desire; or must i die and he yet live? if i succeed many many months perhaps years will pass before you and have no cause for despair. but i—i have lost everything and cannot begin life anew. as he went on i felt as if he had been out the whole of our good uncle thomas’ library. my education was neglected yet i was passionately fond of reading. these volumes were my study day and night and my familiarity with them increased that regret which i had until now sought to forget: the whole train of my ideas. when i run over the frightful catalogue of m