In [44]:
import re
import random

# Sample text corpus
sample_text = """
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
"""

def clean_text(text):
    # Remove special characters and extra whitespaces
    cleaned_text = re.sub(r'[^\w\s]', '', text)
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
    return cleaned_text.strip()

def generate_output(text, start_word, output_length):
    cleaned_text = clean_text(text)
    words = cleaned_text.split()
    transitions = {}  # Dictionary to store word transitions

    # Build transitions dictionary
    for i in range(len(words) - 1):
        current_word = words[i]
        next_word = words[i + 1]
        if current_word in transitions:
            transitions[current_word].append(next_word)
        else:
            transitions[current_word] = [next_word]

    output_text = [start_word]
    while len(output_text) < int(output_length):
        current_word = output_text[-1]
        if current_word in transitions:
            next_word_options = transitions[current_word]
            next_word = random.choice(next_word_options)
            output_text.append(next_word)
        else:
            break

    return ' '.join(output_text)

# Example usage
start_word = "Lorem"
output_length = "10"

output_text = generate_output(sample_text, start_word, output_length)
print("Generated Text:")
print(output_text)


Generated Text:
Lorem ipsum dolor sit amet consectetur adipiscing elit Sed do


In [45]:
# Test case 1: Starting word not in sample text
start_word = "Nonexistent"
output_length = "10"
output_text = generate_output(sample_text, start_word, output_length)
print("Test Case 1 Output:")
print(output_text)

# Test case 2: Short output length
start_word = "Lorem"
output_length = "5"
output_text = generate_output(sample_text, start_word, output_length)
print("\nTest Case 2 Output:")
print(output_text)

# Test case 3: Long output length
start_word = "Lorem"
output_length = "100"
output_text = generate_output(sample_text, start_word, output_length)
print("\nTest Case 3 Output:")
print(output_text)

# Test case 4: Random starting word and output length
start_word = random.choice(sample_text.split())
output_length = str(random.randint(5, 30))
output_text = generate_output(sample_text, start_word, output_length)
print("\nTest Case 4 Output:")
print(output_text)


Test Case 1 Output:
Nonexistent

Test Case 2 Output:
Lorem ipsum dolor sit amet

Test Case 3 Output:
Lorem ipsum dolor sit amet consectetur adipiscing elit Sed do eiusmod tempor incididunt ut aliquip ex ea commodo consequat Duis aute irure dolor sit amet consectetur adipiscing elit Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut labore et dolore eu fugiat nulla pariatur Excepteur sint occaecat cupidatat non proident sunt in voluptate velit esse cillum dolore eu fugiat nulla pariatur Excepteur sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim id est laborum

Test Case 4 Output:
in culpa qui officia deserunt mollit
