In [13]:
import random
import re

def preprocess_text(text):
    if not text:
        raise ValueError("Input text is empty.")

    # Tokenize the text into words
    words = text.split()
    # Clean the text by removing punctuation and special characters
    clean_words = [re.sub(r'[^\w\s]', '', word) for word in words]
    # Remove empty strings
    clean_words = [word for word in clean_words if word]

    if not clean_words:
        raise ValueError("No valid words found in input text.")

    return clean_words

def build_markov_model(words, chain_length):
    if chain_length > len(words):
        raise ValueError("Chain length is greater than the length of the text.")

    markov_model = {}
    for i in range(len(words) - chain_length):
        state = tuple(words[i:i + chain_length])
        next_word = words[i + chain_length]
        if state not in markov_model:
            markov_model[state] = []
        markov_model[state].append(next_word)
    return markov_model

def generate_sentence(markov_model, num_generated):
    current_state = random.choice(list(markov_model.keys()))
    generated_sentence = list(current_state)

    while len(generated_sentence) < num_generated:
        if current_state not in markov_model:
            break
        next_word = random.choice(markov_model[current_state])
        generated_sentence.append(next_word)
        current_state = tuple(generated_sentence[-len(current_state):])

    return ' '.join(generated_sentence)


def main(filename, chain_length, start_words, num_generated):
    try:
        with open(filename, 'r') as file:
            text = file.read()

        words = preprocess_text(text)
        markov_model = build_markov_model(words, chain_length)
        generated_sentence = generate_sentence(markov_model, num_generated)  # Remove start_words argument
        print(generated_sentence)

    except FileNotFoundError:
        print("Error: File not found.")

    except ValueError as e:
        print(f"Error: {e}")



In [17]:
'''sample text that was given in the file ->
The quick brown fox jumps over the lazy dog.
A Markov chain is a stochastic model describing a sequence of possible events in which the probability of each event depends only on the state attained in the previous event.
It is named after the Russian mathematician Andrey Markov.
Markov chains have many applications as statistical models of real-world processes.
'''
filename = 'test.txt'
chain_length = 2
start_words = ['The', 'quick']
num_generated = 10
main(filename, chain_length, start_words, num_generated)

lazy dog A Markov chain is a stochastic model describing


In [18]:
filename = 'test.txt'
chain_length = 2
start_words = ['The', 'quick']
num_generated = 10
main(filename, chain_length, start_words, num_generated)

each event depends only on the state attained in the


In [19]:
filename = 'test.txt'
chain_length = 2
start_words = ['The', 'quick']
num_generated = 10
main(filename, chain_length, start_words, num_generated)

attained in the previous event It is named after the


In [20]:
filename = 'test.txt'
chain_length = 2
start_words = ['The', 'quick']
num_generated = 10
main(filename, chain_length, start_words, num_generated)

over the lazy dog A Markov chain is a stochastic
