In [5]:
import random
import re

def read_file(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        return file.read()

def preprocess_text(text):
    # Remove special characters and split the text into words
    words = re.findall(r'\b\w+\b', text.lower())
    return words

def build_markov_chain(words, chain_length):
    markov_chain = {}
    for i in range(len(words) - chain_length):
        key = tuple(words[i:i + chain_length])
        value = words[i + chain_length]
        if key in markov_chain:
            markov_chain[key].append(value)
        else:
            markov_chain[key] = [value]
    return markov_chain

def generate_sentence(markov_chain, start_words, chain_length, num_generated):
    sentence = list(start_words)
    current_state = tuple(start_words)

    for _ in range(num_generated):
        next_word = random.choice(markov_chain.get(current_state, ['']))
        sentence.append(next_word)
        current_state = tuple(sentence[-chain_length:])

    return ' '.join(sentence)

def generate(filename: str, start_words: list[str], chain_length: int, num_generated: int) -> str:
    text = read_file(filename)
    words = preprocess_text(text)
    markov_chain = build_markov_chain(words, chain_length)
    return generate_sentence(markov_chain, start_words, chain_length, num_generated)

# Example usage:
filename = "example.txt"
start_words = ["the", "quick", "brown"]
chain_length = 2  # Set to desired chain length
num_generated = 10  # Set to desired number of words in the generated sentence

generated_sentence = generate(filename, start_words, chain_length, num_generated)
print(generated_sentence)


the quick brown          
