In [9]:
import random
from collections import defaultdict, Counter

class MarkovChainTextGenerator:
    def __init__(self):
        
        self.model = defaultdict(Counter)

    def train(self, text):
        
        words = text.split()
        for i in range(len(words) - 1):
            current_word = words[i]
            next_word = words[i + 1]
            self.model[current_word][next_word] += 1

    def get_next_word_probabilities(self, word):
       
        next_words = self.model[word]
        total = sum(next_words.values())
        if total == 0:
            return {}
        probabilities = {next_word: count / total for next_word, count in next_words.items()}
        return probabilities

    def generate_text(self, start_word, length=20):
        
        if start_word not in self.model:
            return "Start word not in model."
        
        result = [start_word]
        current_word = start_word

        for _ in range(length - 1):
            next_words_probs = self.get_next_word_probabilities(current_word)
            if not next_words_probs:
                break
            next_word = random.choices(
                population=list(next_words_probs.keys()),
                weights=list(next_words_probs.values())
            )[0]
            result.append(next_word)
            current_word = next_word
        
        return ' '.join(result)


if __name__ == "__main__":
    
    sample_text = """
    the cat sat on the mat the dog barked at the cat
    the cat ran away the dog chased the cat and the cat climbed the tree
    the dog could not climb the tree so the dog barked again
    """

    generator = MarkovChainTextGenerator()
    generator.train(sample_text)

    word_to_check = "the"

    print(f"\nNext word probabilities for '{word_to_check}':")
    probs = generator.get_next_word_probabilities(word_to_check)
    if probs:
        for word, prob in probs.items():
            print(f"{word}: {prob:.2f}")
    else:
        print(f"No next word found for '{word_to_check}'.")

    
    print("\nGenerated text:")
    generated_sentence = generator.generate_text("the", length=15)
    print(generated_sentence)



Next word probabilities for 'the':
cat: 0.42
mat: 0.08
dog: 0.33
tree: 0.17

Generated text:
the mat the tree so the cat the cat sat on the tree so the
