In [2]:
import random

def create_transition_list(corpus):
    transitions = []

    # Iterate through each sentence in the corpus
    for sentence in corpus:
        words = sentence.split()
        # Iterate through each word in the sentence
        for i in range(len(words) - 1):
            current_word = words[i]
            next_word = words[i + 1]
            # Append a tuple representing the transition (current_word, next_word)
            transitions.append((current_word, next_word))

    return transitions

def generate_text(transitions, start_word, output_length):
    output_text = []

    # If the starting word is empty, select a random starting word from the corpus
    if start_word == '':
        start_word = random.choice([t[0] for t in transitions])

    output_text.append(start_word)
    current_word = start_word

    # Generate text
    for _ in range(output_length - 1):
        possible_transitions = [t for t in transitions if t[0] == current_word]
        if possible_transitions:
            next_word = random.choice(possible_transitions)[1]
            output_text.append(next_word)
            current_word = next_word
        else:
            break

    return ' '.join(output_text)

# Example usage
corpus = [
    "The quick brown fox jumps over the lazy dog.",
    "A stitch in time saves nine.",
    "All that glitters is not gold.",
    "Actions speak louder than words.",
    "Beauty is in the eye of the beholder.",
    "Birds of a feather flock together."
]

transition_list = create_transition_list(corpus)
start_word = 'eye'  # Starting word from the corpus
output_length = 9  # Desired output length

output_text = generate_text(transition_list, start_word, output_length)
print(output_text)


eye of a feather flock together.


In [3]:
import random

def generate_text(word_dict, start_word, output_length, chain_length=10):
  """
  Generates text recursively using a Markov chain model, with a maximum chain length.

  Args:
      word_dict: A dictionary representing word transitions.
      start_word: The word to start the generated text.
      output_length: The desired length of the generated text.
      chain_length: The maximum length of the word chain to consider (default 10).

  Returns:
      A string containing the generated text.
  """


  if output_length <= 0:
    return ""

  if chain_length <= 0:  # Allow chain_length to be 0
    next_word = random.choice(list(word_dict[start_word].keys()))  # Choose a random next word
    return f"{start_word} {generate_text(word_dict, next_word, output_length - 1, chain_length)}"  # Recursive call

  next_word = select_next_word(word_dict, start_word)
  # Limit chain length by stopping recursion if chain_length is reached
  if chain_length == 1:
    return f"{start_word}"
  else:
    return f"{start_word} {generate_text(word_dict, next_word, output_length - 1, chain_length - 1)}"



def select_next_word(word_dict, current_word):
  """Selects the next word based on probabilities or a random choice."""

  if current_word not in word_dict:
    return random.choice(list(word_dict.keys()))  # Random word if not in dict

  word_choices = list(word_dict[current_word].keys())
  word_probs = [word_dict[current_word][word] / sum(word_dict[current_word].values()) for word in word_choices]
  return random.choices(word_choices, word_probs)[0]

# Sample text corpus (replace with your own corpus or file reading logic)
text_corpus = "This is a sample text corpus. It contains various words and phrases that can be used to generate new text. The model will learn the probabilities of word transitions and use them to create a sequence of words that resembles the original text."

# Clean the corpus text
corpus_text = text_corpus.lower()
corpus_text = corpus_text.replace(",", "").replace(".", "").replace("!", "").replace("?", "")
words = corpus_text.split()

# Create a dictionary to store word transitions
word_dict = {}
i = 0
while i < len(words) - 1:
  current_word = words[i]
  if current_word not in word_dict:
    word_dict[current_word] = {}
  next_word = words[i + 1]
  if next_word in word_dict[current_word]:
    word_dict[current_word][next_word] += 1
  else:
    word_dict[current_word][next_word] = 1
  i += 1

# Example usage
text = generate_text(word_dict, start_word="the", output_length=10)
print(text)


the model will learn the probabilities of words and use


In [4]:
# Test case 1: Basic Functionality
text = generate_text(word_dict, start_word="sample", output_length=10, chain_length=10)
print(text)

# Verify the output starts with "sample" and is 10 words long
if text.startswith("sample") and len(text.split()) == 10:
    print("Test case 1 passed: Basic functionality works!")
else:
    print("Test case 1 failed: Output doesn't meet expectations.")

sample text corpus it contains various words that can be
Test case 1 passed: Basic functionality works!


In [5]:
text = generate_text(word_dict, start_word="text", output_length=50, chain_length=10)
print(text)

# Verify the output starts with "text" and is 50 words long
if text.startswith("text") and len(text.split()) == 50:
    print("Test case 2 passed: Longer output generated successfully!")
else:
    print("Test case 2 failed: Output doesn't meet expectations.")

text the probabilities of word transitions and use them to
Test case 2 failed: Output doesn't meet expectations.


In [7]:
# Test case 3: Edge Case - Empty Corpus
empty_word_dict = {}  # Create an empty dictionary to simulate an empty corpus

# Handle the empty corpus case gracefully (e.g., return an error or default text)
try:
  text = generate_text(empty_word_dict, start_word="any", output_length=3)
  print(text)
except (IndexError, KeyError) as e:  # Catch potential errors related to empty dictionary access
  print(f"Test case 3 passed: Code handles empty corpus  (error: {e}).")

Test case 3 passed: Code handles empty corpus  (error: list index out of range).


In [9]:
single_word_corpus = "Hello"
word_dict_single_word = {"Hello": {"World": 1}}  # A dictionary with one word
text = generate_text(word_dict_single_word, start_word="Hello", output_length=10, chain_length=10)
print(text)

# Verify that the output consists of repeated single word
if text == "Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello":
    print("Test case 4 passed: Single word corpus handled correctly!")
else:
    print("Test case 4 failed: Single word corpus not handled properly.")

Hello World Hello World Hello World Hello World Hello World
Test case 4 failed: Single word corpus not handled properly.


In [10]:
# Test case 5: Edge Case - Chain Length 0
text = generate_text(word_dict, start_word="text", output_length=10, chain_length=0)
print(text)

# Verify that the output is random, not considering word transitions
if len(text.split()) == 10:
    print("Test case 5 passed: Chain length 0 handled correctly!")
else:
    print("Test case 5 failed: Chain length 0 not handled properly.")

text corpus it contains various words that can be used 
Test case 5 passed: Chain length 0 handled correctly!


In [11]:
# Test case 6: Edge Case - Output Length 0
text = generate_text(word_dict, start_word="text", output_length=0, chain_length=10)
print(text)

# Verify that an empty string is returned
if text == "":
    print("Test case 6 passed: Output length 0 handled correctly!")
else:
    print("Test case 6 failed: Output length 0 not handled properly.")


Test case 6 passed: Output length 0 handled correctly!
