In [83]:
import random

def generate(filename: str, start_words: list[str], chain_length: int, num_generated: int) -> str:
    with open(filename, 'r', encoding='utf-8') as file:
        text = file.read()

    words = text.split()
    word_dict = {}

    # Build the Markov chain dictionary
    for i in range(len(words) - chain_length):
        key = tuple(words[i:i + chain_length])
        value = words[i + chain_length]
        if key in word_dict:
            word_dict[key].append(value)
        else:
            word_dict[key] = [value]

    # Generate text
    current_words = start_words.copy()
    generated_text = ' '.join(current_words)

    for _ in range(num_generated):
        key = tuple(current_words)
        if key in word_dict:
            next_word = random.choice(word_dict[key])
        else:
            next_word = random.choice(words)  # If the key is not found, choose a random word
        generated_text += ' ' + next_word
        current_words = current_words[1:] + [next_word]

    return generated_text

# Example usage:
filename = 'input.txt'
start_words = ['She', 'sells']
chain_length = 2
num_generated = 10

generated_sentence = generate(filename, start_words, chain_length, num_generated)
print(generated_sentence)


She sells seashells by the seashore. Peter Piper picked a peck of


In [84]:
# Test cases
filename = 'input.txt'

# Test Case 1
start_words_1 = ['She', 'sells']
chain_length_1 = 2
num_generated_1 = 10
expected_output_1 = "She sells seashells by the seashore. Peter Piper picked a peck of"
generated_sentence_1 = generate(filename, start_words_1, chain_length_1, num_generated_1)
assert generated_sentence_1 == expected_output_1

# Test Case 2
start_words_2 = ['The', 'quick']
chain_length_2 = 2
num_generated_2 = 10
expected_output_2 = "The quick brown fox jumps over the lazy dog. She sells seashells"
generated_sentence_2 = generate(filename, start_words_2, chain_length_2, num_generated_2)
assert generated_sentence_2 == expected_output_2

# Test Case 3
start_words_3 = ['How', 'much']
chain_length_3 = 2
num_generated_3 = 4
expected_output_3 = "How much wood would a woodchuck"
generated_sentence_3 = generate(filename, start_words_3, chain_length_3, num_generated_3)
assert generated_sentence_3 == expected_output_3

print("All test cases passed!")



All test cases passed!
