In [29]:
import random
import string
from collections import defaultdict

# Read file
def read_poem(filename):
    try:
        with open(filename, 'r', encoding='utf-8') as f:
            lines = f.readlines()
        return [line.strip() for line in lines if line.strip()]
    except FileNotFoundError:
        print(f" File '{filename}' not found")
        return []

# Preprocessing
def preprocess_line(line):
    line = line.lower()
    line = line.translate(str.maketrans('', '', string.punctuation))
    return line.split()

In [30]:
# Training
def train_markov(lines):
    initial_counts = defaultdict(int)
    first_order_counts = defaultdict(lambda: defaultdict(int))
    second_order_counts = defaultdict(lambda: defaultdict(int))

    for line in lines:
        words = preprocess_line(line)
        if not words:
            continue

        # Initial word counts
        initial_counts[words[0]] += 1

        # First-order transitions
        for i in range(len(words) - 1):
            first_order_counts[words[i]][words[i + 1]] += 1

        # Second-order transitions
        for i in range(len(words) - 2):
            pair = (words[i], words[i + 1])
            second_order_counts[pair][words[i + 2]] += 1

    return (
        normalize_probs(initial_counts),
        normalize_nested(first_order_counts),
        normalize_nested(second_order_counts),
    )

In [31]:
# functions
def normalize_probs(counts):
    total = sum(counts.values())
    return {word: count / total for word, count in counts.items()}

def normalize_nested(nested_counts):
    probs = {}
    for key, inner in nested_counts.items():
        total = sum(inner.values())
        probs[key] = {word: count / total for word, count in inner.items()}
    return probs

In [32]:
# Generation
def choose_word(prob_dict):
    r = random.random()  # [0,1)
    cumulative = 0
    for word, prob in prob_dict.items():
        cumulative += prob
        if r <= cumulative:
            return word
    return word  # just in case


def generate_poem(initial_probs, first_order, second_order, lines=4, max_words=10):
    poem = []

    for _ in range(lines):
        line = []
        w1 = choose_word(initial_probs)
        line.append(w1)

        if w1 in first_order:
            w2 = choose_word(first_order[w1])
            line.append(w2)
        else:
            poem.append(' '.join(line))
            continue

        for _ in range(max_words - 2):
            pair = (line[-2], line[-1])
            if pair in second_order:
                next_word = choose_word(second_order[pair])
                line.append(next_word)
            else:
                break

        poem.append(' '.join(line))

    return '\n'.join(poem)

In [38]:
if __name__ == "__main__":
    lines = read_poem('robert_frost.txt')
    if not lines:
        exit()

    init_probs, first_order, second_order = train_markov(lines)
    poem = generate_poem(init_probs, first_order, second_order)

    print(" Generated Poem:\n")
    print(poem)

 Generated Poem:

and tell him everything
the outdoor work though as for that the passing there
they were beside the track
by vegetation from above
