In [None]:
import random

def read_text(filename):
    """Reads text from a file and returns it as a single string."""
    with open(filename, 'r', encoding='utf-8') as file:
        return file.read()

def build_markov_chain(text, chain_length):
    """Builds a Markov chain from the input text."""
    words = text.split()
    chain = {}
    for i in range(len(words) - chain_length):
        key = tuple(words[i:i + chain_length])
        value = words[i + chain_length]
        if key in chain:
            chain[key].append(value)
        else:
            chain[key] = [value]
    return chain

def generate(filename: str, start_words: list[str], chain_length: int, num_generated: int) -> str:
    """Generates a sentence using a Markov chain based on the input text."""
    text = read_text(filename)
    chain = build_markov_chain(text, chain_length)

    # Check if start_words length matches chain_length
    if len(start_words) != chain_length:
        raise ValueError("Length of start_words must match chain_length")

    # Initialize the generated sentence with start_words
    current_words = tuple(start_words)
    generated_sentence = list(current_words)

    # Generate the rest of the sentence
    for _ in range(num_generated):
        if current_words not in chain:
            break
        next_word = random.choice(chain[current_words])
        generated_sentence.append(next_word)
        current_words = tuple(generated_sentence[-chain_length:])

    return ' '.join(generated_sentence)



In [None]:
# Test case 1: Basic test with small chain length and number of generated words
filename = "example.txt"
start_words = ["I", "am"]
chain_length = 1
num_generated = 5
print(generate(filename, start_words, chain_length, num_generated))
# Expected output: A sentence generated based on the text in "example.txt" starting with "I am" and consisting of 5 words.

# Test case 2: Test with larger chain length and number of generated words
filename = "example.txt"
start_words = ["I", "am", "not"]
chain_length = 2
num_generated = 10
print(generate(filename, start_words, chain_length, num_generated))
# Expected output: A sentence generated based on the text in "example.txt" starting with "I am not" and consisting of 10 words.

# Test case 3: Test with non-existent start words
filename = "example.txt"
start_words = ["This", "does", "not", "exist"]
chain_length = 2
num_generated = 5
print(generate(filename, start_words, chain_length, num_generated))
# Expected output: Should raise a ValueError because the length of start_words does not match chain_length.

# Test case 4: Test with a large number of generated words
filename = "example.txt"
start_words = ["I", "am"]
chain_length = 2
num_generated = 100
print(generate(filename, start_words, chain_length, num_generated))
# Expected output: A longer sentence generated based on the text in "example.txt" starting with "I am" and consisting of 100 words.


ValueError: Length of start_words must match chain_length

In [None]:
# Test case 1: Basic test with small chain length and number of generated words
filename = "example.txt"
start_words = ["I"]
chain_length = 1
num_generated = 5
print(generate(filename, start_words, chain_length, num_generated))

# Test case 2: Test with larger chain length and number of generated words
filename = "example.txt"
start_words = ["I", "am"]
chain_length = 2
num_generated = 10
print(generate(filename, start_words, chain_length, num_generated))

# Test case 3: Test with non-existent start words
filename = "example.txt"
start_words = ["This", "does"]
chain_length = 2
num_generated = 5
print(generate(filename, start_words, chain_length, num_generated))

# Test case 4: Test with a large number of generated words
filename = "example.txt"
start_words = ["I", "am"]
chain_length = 2
num_generated = 100
print(generate(filename, start_words, chain_length, num_generated))


I am the walrus, coo coo
I am the walrus, coo coo cachoo The quick brown fox jumps
This does
I am the walrus, coo coo cachoo The quick brown fox jumps over the lazy dog To be or not to be, that is the question Lorem ipsum dolor sit amet, consectetur adipiscing elit


In [None]:
# Test case 5: Test with longer chain length and number of generated words
filename = "example1.txt"
start_words = ["I", "am", "the"]
chain_length = 3
num_generated = 15
output_5 = generate(filename, start_words, chain_length, num_generated)
print(output_5)

# Test case 6: Test with non-existent start words
filename = "example1.txt"
start_words = ["These", "words", "do", "not", "exist"]
chain_length = 3
num_generated = 10
output_6 = generate(filename, start_words, chain_length, num_generated)
print(output_6)

# Test case 7: Test with a large number of generated words
filename = "example1.txt"
start_words = ["The", "quick", "brown"]
chain_length = 3
num_generated = 50
output_7 = generate(filename, start_words, chain_length, num_generated)
print(output_7)


I am the walrus, coo coo cachoo This is a test sentence for generating more content An example


ValueError: Length of start_words must match chain_length

In [None]:
# Test case 5: Test with longer chain length and number of generated words
filename = "example1.txt"
start_words = ["I", "am", "the"]
chain_length = 3
num_generated = 15
output_5 = generate(filename, start_words, chain_length, num_generated)
print(output_5)

# Test case 6: Test with non-existent start words
filename = "example1.txt"
start_words = ["These", "words", "do"]
chain_length = 3
num_generated = 10
output_6 = generate(filename, start_words, chain_length, num_generated)
print(output_6)

# Test case 7: Test with a large number of generated words
filename = "example1.txt"
start_words = ["The", "quick", "brown"]
chain_length = 3
num_generated = 50
output_7 = generate(filename, start_words, chain_length, num_generated)
print(output_7)


I am the walrus, coo coo cachoo This is a test sentence for generating more content An example
These words do
The quick brown fox jumps over the lazy dog To be or not to be, that is the question Lorem ipsum dolor sit amet, consectetur adipiscing elit I am the walrus, coo coo cachoo This is a test sentence for generating more content An example of additional text for testing the Markov chain
