# Markov Chain Sentence Builder
This is a program to build random sentences based on the data with sentences fed into it. This program uses a simple Markov chain that checks at every one and/or two words and/or three words in which the user can choose the number of Markov chains to be applied.

## Import Libraries

In [1]:
import random
from collections import defaultdict

## Load and Process Corpus

In [2]:
def load_training_file(file):
    with open(file) as f:
        raw_sentences = f.read()
        return raw_sentences

def prep_training(raw_sentences):
    raw_sentences = raw_sentences.lower()
    raw_sentences = raw_sentences.strip(",_”“:;")
    raw_sentences = raw_sentences.replace(' \'', "")    
    raw_sentences = raw_sentences.replace('\' ' , "")    
    raw_sentences = raw_sentences.replace(',', "")
    raw_sentences = raw_sentences.replace('"', "")
    raw_sentences = raw_sentences.replace('_', "")
    raw_sentences = raw_sentences.replace('”', "")
    raw_sentences = raw_sentences.replace('“', "")
    raw_sentences = raw_sentences.replace(':', " :")
    raw_sentences = raw_sentences.replace(';', " ;")
    raw_sentences = raw_sentences.replace('.', " .")
    raw_sentences = raw_sentences.replace('!', " !")
    raw_sentences = raw_sentences.replace('?', " ?")
    corpus = raw_sentences.replace('\n',' ').split()
    return corpus

## Build Markov Models

In [3]:
def map_word_to_word(corpus):
    limit = len(corpus) - 1
    dict1_to_1 = defaultdict(list)
    for index, word in enumerate(corpus):
        if index < limit:
            suffix = corpus[index + 1]
            dict1_to_1[word].append(suffix)
    return dict1_to_1

def map_2_words_to_word(corpus):
    limit = len(corpus) - 2
    dict2_to_1 = defaultdict(list)
    for index, word in enumerate(corpus):
        if index < limit:
            key = word + ' ' + corpus[index + 1]
            suffix = corpus[index + 2]
            dict2_to_1[key].append(suffix)
    return dict2_to_1

def map_3_words_to_word(corpus):
    limit = len(corpus) - 3
    dict3_to_1 = defaultdict(list)
    for index, word in enumerate(corpus):
        if index < limit:
            key = word + ' ' + corpus[index + 1] + ' ' + corpus[index + 2]
            suffix = corpus[index + 3]
            dict3_to_1[key].append(suffix)
    return dict3_to_1

def map_4_words_to_word(corpus):
    limit = len(corpus) - 4
    dict4_to_1 = defaultdict(list)
    for index, word in enumerate(corpus):
        if index < limit:
            key = word + ' ' + corpus[index + 1] + ' ' + corpus[index + 2] + ' ' + corpus[index + 3]
            suffix = corpus[index + 4]
            dict4_to_1[key].append(suffix)
    return dict4_to_1

## Select Random Seed

In [4]:
def random_word(corpus):            
    seed = input("Enter a word to start a sentence: ")
    if seed in corpus:
        word = seed
    else:
        word = None
        print("Try another word as a seed that exists in the corpus used.")      
    return word

## Apply the Markov Models

In [5]:
def word_after_single(prefix, suffix_map_1):
    accepted_words = []
    suffixes = suffix_map_1.get(prefix)
    if suffixes != None:
        for candidate in suffixes:
            accepted_words.append(candidate)
    return accepted_words

def  word_after_double(prefix, suffix_map_2):
    accepted_words = []
    suffixes = suffix_map_2.get(prefix)
    if suffixes != None:
        for candidate in suffixes:
            accepted_words.append(candidate)
    return accepted_words

def  word_after_triple(prefix, suffix_map_3):
    accepted_words = []
    suffixes = suffix_map_3.get(prefix)
    if suffixes != None:
        for candidate in suffixes:
            accepted_words.append(candidate)
    return accepted_words

def  word_after_quadruple(prefix, suffix_map_4):
    accepted_words = []
    suffixes = suffix_map_4.get(prefix)
    if suffixes != None:
        for candidate in suffixes:
            accepted_words.append(candidate)
    return accepted_words

## Build a Sentence

In [6]:
def sentence_builder(suffix_map_1, suffix_map_2, suffix_map_3, suffix_map_4, corpus):
    final_sentence = ""
    try:
        number_of_sentences = int(input("How many sentences do you want? "))
        number_of_markov_chains = int(input("Choose 1, 2, or 3 Markov chains to be applied. How many Markov chains would you want to apply? "))
    except:
        print("You entered something other than integers. Enter only integers.")
        return final_sentence
    stop_characters = [".","!","?"]
    current_sentence = []
    word = random_word(corpus)    
    keep_building = True
    if word != None:
        current_sentence.append(word)         
        for i in range(number_of_sentences):
            keep_building = True      
            while keep_building == True:
                if number_of_markov_chains == 1:                    
                    if current_sentence[-1] == ":" and len(current_sentence) > 1 or current_sentence[-1] == ";" and len(current_sentence) > 1:
                        prefix = current_sentence[-2] + ' ' + current_sentence[-1]
                        word_choices = word_after_double(prefix, suffix_map_2)                        
                    else:                                           
                        word_choices = word_after_single(word, suffix_map_1)
                    word = random.choice(word_choices)
                    current_sentence.append(word)
                    if any(character in word[-1] for character in stop_characters):
                        keep_building = False
                        break
                elif number_of_markov_chains == 2:
                    if len(current_sentence) >= 2:
                        if current_sentence[-2] == ":" and len(current_sentence) > 2 or current_sentence[-2] == ";" and len(current_sentence) > 2:
                            prefix = current_sentence[-3] + ' ' + current_sentence[-2] + ' ' + current_sentence[-1]
                            word_choices = word_after_triple(prefix, suffix_map_3)                        
                        else:   
                            prefix = current_sentence[-2] + ' ' + current_sentence[-1]
                            word_choices = word_after_double(prefix, suffix_map_2)
                        word = random.choice(word_choices)
                        current_sentence.append(word)
                        if any(character in word[-1] for character in stop_characters):
                            keep_building = False
                            break
                    elif len(current_sentence) <= 1:
                        word_choices = word_after_single(word, suffix_map_1)
                        word = random.choice(word_choices)
                        current_sentence.append(word)
                        if any(character in word[-1] for character in stop_characters):
                            keep_building = False
                            break
                elif number_of_markov_chains == 3:
                    if len(current_sentence) >= 3:
                        if current_sentence[-3] == ":" and len(current_sentence) > 3  or current_sentence[-3] == ";" and len(current_sentence) > 3:
                            prefix = current_sentence[-4] + ' ' + current_sentence[-3] + ' ' + current_sentence[-2] + ' ' + current_sentence[-1]
                            word_choices = word_after_quadruple(prefix, suffix_map_4)                        
                        else:   
                            prefix = current_sentence[-3] + ' ' + current_sentence[-2] + ' ' + current_sentence[-1]
                            word_choices = word_after_triple(prefix, suffix_map_3)
                        word = random.choice(word_choices)
                        current_sentence.append(word)
                        if any(character in word[-1] for character in stop_characters):
                            keep_building = False
                            break
                    elif len(current_sentence) == 2:   
                        prefix = current_sentence[-2] + ' ' + current_sentence[-1]
                        word_choices = word_after_double(prefix, suffix_map_2)
                        word = random.choice(word_choices)
                        current_sentence.append(word)
                        if any(character in word[-1] for character in stop_characters):
                            keep_building = False
                            break
                    elif len(current_sentence) <= 1:
                        if current_sentence[-1] == ":" and len(current_sentence) > 1 or current_sentence[-1] == ";" and len(current_sentence) > 1:
                            prefix = current_sentence[-2] + ' ' + current_sentence[-1]
                            word_choices = word_after_double(prefix, suffix_map_2)                        
                        else:                                           
                            word_choices = word_after_single(word, suffix_map_1)
                        word = random.choice(word_choices)
                        current_sentence.append(word)
                        if any(character in word[-1] for character in stop_characters):
                            keep_building = False
                            break
                else:
                    print("You entered an integer of Markov chains either less than 1 or more than 3, which are not available options to choose. Please only choose 1, 2, or 3 Markov chains to be applied.")
    else:
        pass
    for i in current_sentence:
        if final_sentence == "":
            final_sentence = final_sentence + i
        else:
            final_sentence = final_sentence + ' ' + i
    final_sentence = final_sentence.replace(' :', ":")
    final_sentence = final_sentence.replace(' ;', ";")
    final_sentence = final_sentence.replace(' .', ".")
    final_sentence = final_sentence.replace(' ?', "?")
    final_sentence = final_sentence.replace(' !', "!")
    return final_sentence

## Code to Generate Random Sentences

In [7]:
raw_sentences = load_training_file("Frankenstein.txt")
corpus = prep_training(raw_sentences)
suffix_map_1 = map_word_to_word(corpus)
suffix_map_2 = map_2_words_to_word(corpus)
suffix_map_3 = map_3_words_to_word(corpus)
suffix_map_4 = map_4_words_to_word(corpus)

In [8]:
print(sentence_builder(suffix_map_1, suffix_map_2, suffix_map_3, suffix_map_4, corpus))

How many sentences do you want? 30
Choose 1, 2, or 3 Markov chains to be applied. How many Markov chains would you want to apply? 1
Enter a word to start a sentence: the
the fair the tenets of a slight ligaments are disposed to the destruction and sympathise with new hopes yet which you will do not change of meeting with the pursuit of death of britain. he further the fire; then the hundredth part with its creator; he turned adrift homeless penniless and went out fishing tackle and how shall see you mad i escaped to me to seize the heart. it was sunk me to the light vanished when the summits are among the aged trees that he has happened to be the first deprived of science and his prize-money to me with pleasure or if there or friend that the fire. now we ought to waste and caused these good followed and causing me i found a little household. i left in justice i not sufficiently to the forest. thus far north if you follow me at length i drew near a concussion of being. i have not merely

In [9]:
print(sentence_builder(suffix_map_1, suffix_map_2, suffix_map_3, suffix_map_4, corpus))

How many sentences do you want? 30
Choose 1, 2, or 3 Markov chains to be applied. How many Markov chains would you want to apply? 2
Enter a word to start a sentence: the
the bottom of the room had before charmed me; but her gentleness and pleasure that the presence of another month. in the noble art of language. my letter and i may trample you to perish on the habitable globe. its hills are covered with a pail on her deathbed the fortitude and awaken in me the remembrance of the uneasiness of this imprudence were fatal to her. the turk was fixed upon every object the most poignant grief? he meant to please and he my enemy with the friends he had directed their inquiries towards the beings concerning whom i accuse and for whose seizure and punishment i call on you only could he further the execution of his existence and events from which i had first become master of the different branches of physical science from which i have written myself into a cabriolet and bade me be at peace aroun

In [10]:
print(sentence_builder(suffix_map_1, suffix_map_2, suffix_map_3, suffix_map_4, corpus))

How many sentences do you want? 30
Choose 1, 2, or 3 Markov chains to be applied. How many Markov chains would you want to apply? 3
Enter a word to start a sentence: the
the tortures of the accused did not equal mine; she was sustained by innocence but the fangs of remorse tore my bosom and i did right in refusing to create a companion for the first creature. he showed unparalleled malignity and selfishness in evil; he destroyed my friends; he devoted to destruction beings who possessed exquisite sensations happiness and wisdom; nor do i find it blamable. in a thousand ways he smoothed for me the path of knowledge and made the paths of the mountain where the priest and his mistress were overwhelmed by an avalanche and where their dying voices are still said to be harmless; but have you not already shown a degree of renovating warmth which i had endured. after some weeks my wound healed and i continued my route. the sledge was still visible nor did i again lose sight of it would produce