<a href="https://colab.research.google.com/github/SofiaAkhtar/WE-Module3/blob/main/NLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import random

def preprocess_text(text):
  """Preprocesses the text by splitting it into words and converting them to lowercase."""
  words = text.lower().split()
  return words

def build_transitions(words):
  """Builds a dictionary to store the word transitions.

  Args:
      words: A list of words representing the preprocessed text.

  Returns:
      A dictionary where keys are current words and values are lists of following words.
  """
  transitions = {}
  prev_word = None
  for current_word in words:
    if prev_word is not None:
      if prev_word not in transitions:
        transitions[prev_word] = []
      transitions[prev_word].append(current_word)
    prev_word = current_word
  return transitions

def generate_chain(transitions, start_word, output_length):
  """Generates a markov chain of length output_length starting with start_word.

  Args:
      transitions: A dictionary where keys are current words and values are lists of following words.
      start_word: The word to start the markov chain with.
      output_length: The length of the markov chain to generate.

  Returns:
      A list of words representing the generated markov chain.
  """
  markov_chain = [start_word]
  for _ in range(output_length - 1):
    if start_word not in transitions:
      # If the current word doesn't have any following words, choose a random word
      next_word = random.choice(words)
    else:
      # Choose a random word from the following words based on their probabilities
      next_word = random.choice(transitions[start_word])
    markov_chain.append(next_word)
    start_word = next_word
  return markov_chain

# Example usage
text = "This is an example text to generate a markov chain."
words = preprocess_text(text)
transitions = build_transitions(words)
start_word = "This"
output_length = 10
markov_chain = generate_chain(transitions, start_word, output_length)
print("Markov chain:", markov_chain)


Markov chain: ['This', 'this', 'is', 'an', 'example', 'text', 'to', 'generate', 'a', 'markov']
