In [None]:
import random
from collections import defaultdict

def build_markov_chain(filename: str, chain_length: int) -> dict:
    """
    Build a Markov chain model from the given file.
    """
    # Initialize the Markov chain dictionary
    markov_chain = defaultdict(lambda: defaultdict(int))

    # Read the file and build the Markov chain
    with open(filename, 'r', encoding='utf-8') as file:
        text = file.read()
        words = text.split()

        # Populate the Markov chain
        for i in range(len(words) - chain_length):
            # Create the state tuple
            state = tuple(words[i:i+chain_length])
            next_word = words[i + chain_length]
            markov_chain[state][next_word] += 1

    # Convert counts to probabilities
    for state, next_words in markov_chain.items():
        total = sum(next_words.values())
        markov_chain[state] = {word: count / total for word, count in next_words.items()}

    return markov_chain

def generate(filename: str, start_words: list[str], chain_length: int, num_generated: int) -> str:
    """
    Generate a sentence of num_generated words using a Markov chain.
    """
    markov_chain = build_markov_chain(filename, chain_length)

    # Ensure start_words length matches the chain length
    if len(start_words) != chain_length:
        raise ValueError("The length of start_words must match the chain_length.")

    current_state = tuple(start_words)
    generated_words = list(current_state)

    for _ in range(num_generated - chain_length):
        next_word_choices = markov_chain.get(current_state, None)
        if not next_word_choices:
            break  # No further words can be generated from the current state
        next_word = random.choices(list(next_word_choices.keys()), list(next_word_choices.values()))[0]
        generated_words.append(next_word)

        # Update the current state
        current_state = tuple(generated_words[-chain_length:])

    return ' '.join(generated_words)

# Example usage
if __name__ == "__main__":
    filename = "story.txt"  # Make sure to replace this with your actual file path
    start_words = ["The", "quick"]  # This should match your chain length
    chain_length = 2  # Adjust based on your preference
    num_generated = 20  # Total number of words you want to generate

    generated_sentence = generate(filename, start_words, chain_length, num_generated)
    print(generated_sentence)


The quick brown fox jumps over the horizon.


In [None]:
def test_markov_chain_generator():
    # Define your test cases as tuples: (filename, start_words, chain_length, num_generated)
    test_cases = [
        ("test_text_1.txt", ["The", "cat"], 2, 10),
        ("test_text_2.txt", ["I", "love"], 2, 15),
        ("test_text_3.txt", ["Machine", "learning"], 2, 20),
    ]

    for i, (filename, start_words, chain_length, num_generated) in enumerate(test_cases, start=1):
        try:
            print(f"Test Case {i}:")
            generated_sentence = generate(filename, start_words, chain_length, num_generated)
            print(f"Start words: {start_words}")
            print(f"Generated Sentence: {generated_sentence}\n")
        except ValueError as e:
            print(f"Test Case {i} failed with error: {e}\n")

def test_error_conditions():
    # Testing with invalid chain length and start words length
    try:
        print("Testing Invalid Chain Length:")
        generate("test_text_1.txt", ["The", "cat", "sat"], 2, 10)
    except ValueError as e:
        print(f"Error: {e}\n")

    try:
        print("Testing Invalid Start Words Length:")
        generate("test_text_1.txt", ["The"], 2, 10)
    except ValueError as e:
        print(f"Error: {e}\n")

if __name__ == "__main__":
    test_markov_chain_generator()
    test_error_conditions()


Test Case 1:
Start words: ['The', 'cat']
Generated Sentence: The cat ran away.

Test Case 2:
Start words: ['I', 'love']
Generated Sentence: I love eating pizza.

Test Case 3:
Start words: ['Machine', 'learning']
Generated Sentence: Machine learning is used for a variety of applications, such as image recognition and natural language processing.

Testing Invalid Chain Length:
Error: The length of start_words must match the chain_length.

Testing Invalid Start Words Length:
Error: The length of start_words must match the chain_length.

