In [12]:
import random

def read_file(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

def clean(text):
    return text.replace(".", "  ").replace(",", "  ")

def build_markov_chain(text, chain_length):
    words = text.split()
    chain = {}
    key = ("~",) * chain_length
    for word in words:
        chain[key] = chain.get(key, []) + [word,]
        key = key[1:] + (word,)
    print(chain)
    return chain

def generate(filename: str, start_words: list[str], chain_length: int, num_generated: int) -> str:
    text = read_file(filename)
    text = clean(text);
    markov_chain = build_markov_chain(text, chain_length)
    output = start_words.copy()
    prefix = tuple(start_words)
    for _ in range(num_generated):
        if prefix in markov_chain:
            next_word = random.choice(markov_chain[prefix])
            output.append(next_word)
            prefix = prefix[1:] + (next_word,)
        else:
            break

    return ' '.join(output)


In [16]:
with open("sample.txt", "w") as file:
    file.write("The sun is shining. The cat is sleeping. I like to eat ice cream. The cat is cute. I want to go swimming when it is hot. The sun is right above us. The sun shines brightly. It is hot")

print(generate("sample.txt", ["The", "sun"], 2, 20))

{('~', '~'): ['The'], ('~', 'The'): ['sun'], ('The', 'sun'): ['is', 'is', 'shines'], ('sun', 'is'): ['shining', 'right'], ('is', 'shining'): ['The'], ('shining', 'The'): ['cat'], ('The', 'cat'): ['is', 'is'], ('cat', 'is'): ['sleeping', 'cute'], ('is', 'sleeping'): ['I'], ('sleeping', 'I'): ['like'], ('I', 'like'): ['to'], ('like', 'to'): ['eat'], ('to', 'eat'): ['ice'], ('eat', 'ice'): ['cream'], ('ice', 'cream'): ['The'], ('cream', 'The'): ['cat'], ('is', 'cute'): ['I'], ('cute', 'I'): ['want'], ('I', 'want'): ['to'], ('want', 'to'): ['go'], ('to', 'go'): ['swimming'], ('go', 'swimming'): ['when'], ('swimming', 'when'): ['it'], ('when', 'it'): ['is'], ('it', 'is'): ['hot'], ('is', 'hot'): ['The'], ('hot', 'The'): ['sun'], ('is', 'right'): ['above'], ('right', 'above'): ['us'], ('above', 'us'): ['The'], ('us', 'The'): ['sun'], ('sun', 'shines'): ['brightly'], ('shines', 'brightly'): ['It'], ('brightly', 'It'): ['is'], ('It', 'is'): ['hot']}
The sun is right above us The sun shines bri