In [2]:
import random
from collections import defaultdict

def build_markov_chain(sample_text: str, chain_length: int) -> dict:
    # Check if sample text is empty
    if not sample_text.strip():
        raise ValueError("Sample text is empty. Cannot build Markov chain.")

    # Tokenize sample text into words
    words = sample_text.split()

    # Build Markov chain dictionary using defaultdict
    markov_chain = defaultdict(list)
    for i in range(len(words) - chain_length):
        key = tuple(words[i:i + chain_length])
        value = words[i + chain_length]
        markov_chain[key].append(value)

    return markov_chain

def generate_sentence(markov_chain: dict, start_words: list[str], num_generated: int) -> str:
    # Generate sentence
    current_words = tuple(start_words)
    generated_sentence = list(current_words)
    while len(generated_sentence) < num_generated:
        next_word = random.choice(markov_chain.get(current_words, ['<end>']))
        if next_word == '<end>':
            break
        generated_sentence.append(next_word)
        current_words = tuple(generated_sentence[-len(current_words):])

    return ' '.join(generated_sentence)

def print_markov_chain(markov_chain: dict, chain_length: int):
    # Print Markov chain dictionary
    print(f"Markov Chain Dictionary (Chain Length: {chain_length}):")
    for key, value in markov_chain.items():
        print(f"{key} -> {value}")

def main():
    # Sample text excerpt
    sample_text = """
    Alice, a young girl, sits bored by a riverbank and spots a White Rabbit with a pocket watch and waistcoat lamenting that he is late. Surprised, Alice follows him down a rabbit hole, which sends her into a lengthy plummet but to a safe landing. Inside a room with a table, she finds a key to a tiny door, beyond which is a garden. While pondering how to fit through the door, she discovers a bottle labelled "Drink me". Alice drinks some of the bottle's contents, and to her astonishment, she shrinks small enough to enter the door. However, she had left the key upon the table and cannot reach it. Alice then discovers and eats a cake labelled "Eat me", which causes her to grow to a tremendous size. Unhappy, Alice bursts into tears, and the passing White Rabbit flees in a panic, dropping a fan and two gloves. Alice uses the fan for herself, which causes her to shrink once more and leaves her swimming in a pool of her own tears. Within the pool, Alice meets various animals and birds, who convene on a bank and engage in a "Caucus Race" to dry themselves. Following the end of the race, Alice inadvertently frightens the animals away by discussing her cat.
    """

    # Test case 1: Single start word at the beginning, and varying chain lengths
    start_word1 = ['Alice']
    chain_lengths1 = [2, 3, 4]
    num_generated1 = 20

    # Test case 2: Start words in the middle, and varying chain lengths
    start_words2 = ['White', 'Rabbit']
    chain_lengths2 = [2, 3, 4]
    num_generated2 = 20

    # Test case 3: Empty sample text
    empty_sample_text = ""

    # Convert start words to tuple for test case 2
    start_words_tuple2 = tuple(start_words2)

    # Generate and print output for test case 1
    print("Test Case 1:")
    for chain_length in chain_lengths1:
        markov_chain1 = build_markov_chain(sample_text, chain_length)
        print_markov_chain(markov_chain1, chain_length)
        generated_sentence1 = generate_sentence(markov_chain1, start_word1, num_generated1)
        print("\nGenerated Sentence:")
        print(generated_sentence1)
        print("-" * 50)

    # Generate and print output for test case 2
    print("\nTest Case 2:")
    for chain_length in chain_lengths2:
        markov_chain2 = build_markov_chain(sample_text, chain_length)
        print_markov_chain(markov_chain2, chain_length)
        generated_sentence2 = generate_sentence(markov_chain2, start_words_tuple2, num_generated2)
        print("\nGenerated Sentence:")
        print(generated_sentence2)
        print("-" * 50)

    # Generate and print output for test case 3
    print("\nTest Case 3 (Empty Sample Text):")
    try:
        markov_chain_empty = build_markov_chain(empty_sample_text, 2)
        print_markov_chain(markov_chain_empty, 2)
        generated_sentence_empty = generate_sentence(markov_chain_empty, ['Alice'], 10)
        print("\nGenerated Sentence:")
        print(generated_sentence_empty)
        print("-" * 50)
    except ValueError as e:
        print(e)

if __name__ == "__main__":
    main()


Test Case 1:
Markov Chain Dictionary (Chain Length: 2):
('Alice,', 'a') -> ['young']
('a', 'young') -> ['girl,']
('young', 'girl,') -> ['sits']
('girl,', 'sits') -> ['bored']
('sits', 'bored') -> ['by']
('bored', 'by') -> ['a']
('by', 'a') -> ['riverbank']
('a', 'riverbank') -> ['and']
('riverbank', 'and') -> ['spots']
('and', 'spots') -> ['a']
('spots', 'a') -> ['White']
('a', 'White') -> ['Rabbit']
('White', 'Rabbit') -> ['with', 'flees']
('Rabbit', 'with') -> ['a']
('with', 'a') -> ['pocket', 'table,']
('a', 'pocket') -> ['watch']
('pocket', 'watch') -> ['and']
('watch', 'and') -> ['waistcoat']
('and', 'waistcoat') -> ['lamenting']
('waistcoat', 'lamenting') -> ['that']
('lamenting', 'that') -> ['he']
('that', 'he') -> ['is']
('he', 'is') -> ['late.']
('is', 'late.') -> ['Surprised,']
('late.', 'Surprised,') -> ['Alice']
('Surprised,', 'Alice') -> ['follows']
('Alice', 'follows') -> ['him']
('follows', 'him') -> ['down']
('him', 'down') -> ['a']
('down', 'a') -> ['rabbit']
('a', 'ra