<a href="https://colab.research.google.com/github/Nivriti-ctrl/CODECRAFT_GA_03/blob/main/Untitled3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import random
from collections import defaultdict

In [2]:
training_text = """
Markov chains are a powerful statistical tool.
They predict the next state based on the current state.
This can be used for text generation and many other applications.
"""

In [4]:
def build_markov_chain(text, n=2):
    """
    Build a Markov Chain model.

    Parameters:
        text (str): The input text.
        n (int): The size of the n-gram (default: 2 for bigrams).

    Returns:
        dict: A dictionary representing the Markov Chain.
    """
    words = text.split()
    markov_chain = defaultdict(list)

    # Create n-grams
    for i in range(len(words) - n):
        key = tuple(words[i:i+n-1])  # Create a tuple for the key
        next_word = words[i+n-1]    # Get the next word
        markov_chain[key].append(next_word)

    return markov_chain

# Build the model
markov_model = build_markov_chain(training_text, n=2)

In [6]:
def generate_text(chain, n=2, length=50):
    """
    Generate text using a Markov Chain model.

    Parameters:
        chain (dict): The Markov Chain model.
        n (int): The size of the n-gram (default: 2 for bigrams).
        length (int): Number of words to generate.

    Returns:
        str: Generated text.
    """
    # Choose a random starting point
    key = random.choice(list(chain.keys()))
    result = list(key)

    for _ in range(length - n + 1):
        if key not in chain:
            break
        next_word = random.choice(chain[key])
        result.append(next_word)
        key = tuple(result[-(n-1):])  # Update key with the latest n-1 words

    return ' '.join(result)

# Generate text
generated_text = generate_text(markov_model, n=2, length=50)
print("Generated Text:\n", generated_text)

Generated Text:
 based on the next state based on the current state. This can be used for text generation and many other


In [7]:
# Example: Downloading a public domain book (e.g., "Alice's Adventures in Wonderland")
!wget https://www.gutenberg.org/files/11/11-0.txt -O training_text.txt

# Read the downloaded file
with open('training_text.txt', 'r', encoding='utf-8') as file:
    training_text = file.read()

--2025-01-26 09:20:38--  https://www.gutenberg.org/files/11/11-0.txt
Resolving www.gutenberg.org (www.gutenberg.org)... 152.19.134.47, 2610:28:3090:3000:0:bad:cafe:47
Connecting to www.gutenberg.org (www.gutenberg.org)|152.19.134.47|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 154573 (151K) [text/plain]
Saving to: ‘training_text.txt’


2025-01-26 09:20:39 (837 KB/s) - ‘training_text.txt’ saved [154573/154573]



In [8]:
markov_model = build_markov_chain(training_text, n=3)
generated_text = generate_text(markov_model, n=3, length=100)

In [9]:
import re

def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\s+', ' ', text)  # Remove extra whitespace
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    return text

training_text = preprocess_text(training_text)

In [10]:
def build_weighted_markov_chain(text, n=2):
    words = text.split()
    markov_chain = defaultdict(lambda: defaultdict(int))

    # Create n-grams
    for i in range(len(words) - n):
        key = tuple(words[i:i+n-1])
        next_word = words[i+n-1]
        markov_chain[key][next_word] += 1

    # Convert counts to probabilities
    weighted_chain = {key: dict(value) for key, value in markov_chain.items()}
    return weighted_chain

markov_model = build_weighted_markov_chain(training_text, n=3)

def generate_text_weighted(chain, n=2, length=50):
    key = random.choice(list(chain.keys()))
    result = list(key)

    for _ in range(length - n + 1):
        if key not in chain:
            break
        choices, weights = zip(*chain[key].items())
        next_word = random.choices(choices, weights=weights)[0]
        result.append(next_word)
        key = tuple(result[-(n-1):])

    return ' '.join(result)

generated_text = generate_text_weighted(markov_model, n=3, length=100)
print("Generated Text:\n", generated_text)

Generated Text:
 of serpent thats all i almost wish id gone to see it pop down a large plate came skimming out straight at the top of his pocket and pulled out a box of comfits luckily the salt water had not gone we know it to her in an undertone importantunimportantunimportantimportant as if she should meet the real mary ann and be quick about it and as the things i used to come out among the people near the three gardeners but she was not easy to take the place of the _what_ said the hatter you _must_ have meant some


In [11]:
with open('generated_text.txt', 'w') as file:
    file.write(generated_text)

In [12]:
from google.colab import files
files.download('generated_text.txt')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>