In [1]:
# STEP 1: Load and clean text data

# Sample text (You can later replace this with your own)
sample_text = """
Once upon a time, there was a little girl named Red Riding Hood. She lived in a village near the forest.
Whenever she went out, the little girl wore a red riding cloak, so everyone in the village called her Little Red Riding Hood.
"""

# Convert to lowercase and remove line breaks for simplicity
def clean_text(text):
    text = text.lower()
    text = text.replace("\n", " ")
    return text

cleaned_text = clean_text(sample_text)
print("Cleaned text:\n", cleaned_text[:200])


Cleaned text:
  once upon a time, there was a little girl named red riding hood. she lived in a village near the forest. whenever she went out, the little girl wore a red riding cloak, so everyone in the village cal


In [2]:
# STEP 2: Build the Markov Chain Model (word-based)

import random

def build_markov_chain(text):
    words = text.split()
    markov_chain = {}

    for i in range(len(words) - 1):
        curr_word = words[i]
        next_word = words[i + 1]

        if curr_word not in markov_chain:
            markov_chain[curr_word] = []

        markov_chain[curr_word].append(next_word)

    return markov_chain

# Build the model
markov_model = build_markov_chain(cleaned_text)

# Show part of the model
for word in list(markov_model)[:10]:  # Show only 10 entries
    print(f"{word} -> {markov_model[word]}")


once -> ['upon']
upon -> ['a']
a -> ['time,', 'little', 'village', 'red']
time, -> ['there']
there -> ['was']
was -> ['a']
little -> ['girl', 'girl', 'red']
girl -> ['named', 'wore']
named -> ['red']
red -> ['riding', 'riding', 'riding']


In [5]:
# STEP 3: Generate text using the Markov Chain model

def generate_text(chain, start_word, length=20):
    word = start_word
    result = [word]

    for _ in range(length - 1):
        if word in chain:
            next_words = chain[word]
            word = random.choice(next_words)
            result.append(word)
        else:
            break  # Stop if no next word is found

    return ' '.join(result)

# Example generation
generated = generate_text(markov_model, start_word="little", length=25)
print("Generated text:\n", generated)


Generated text:
 little girl wore a red riding hood. she lived in the village called her little girl named red riding hood. she lived in a time,


In [6]:
from google.colab import files

uploaded = files.upload()

# Read the uploaded text file
filename = list(uploaded.keys())[0]

with open(filename, 'r', encoding='utf-8') as f:
    sample_text = f.read()

cleaned_text = clean_text(sample_text)
markov_model = build_markov_chain(cleaned_text)

# Generate text
generated = generate_text(markov_model, start_word="the", length=30)
print("Generated text:\n", generated)


Saving sampledata.txt to sampledata.txt
Generated text:
 the history of valor, invention, and traditions. people of valor, invention, and traditions. people of valor, invention, and traditions. people of india is rich with events of valor, invention, and


In [7]:
# STEP 5: Character-based Markov Chain

def build_char_markov_chain(text):
    markov_chain = {}
    for i in range(len(text) - 1):
        curr_char = text[i]
        next_char = text[i + 1]

        if curr_char not in markov_chain:
            markov_chain[curr_char] = []

        markov_chain[curr_char].append(next_char)

    return markov_chain

def generate_char_text(chain, start_char, length=100):
    char = start_char
    result = [char]

    for _ in range(length - 1):
        if char in chain:
            next_chars = chain[char]
            char = random.choice(next_chars)
            result.append(char)
        else:
            break

    return ''.join(result)

# Build char-based chain
char_markov_model = build_char_markov_chain(cleaned_text)

# Generate character text
generated_chars = generate_char_text(char_markov_model, start_char='l', length=200)
print("Generated character-based text:\n", generated_chars)


Generated character-based text:
 lelond tindiorseres ond va is les. es ioulivereth veth alond hes adiondinstr, h diveveogr, onsstures iad helts. alets diva cureltitivendint a anve rmondigry plopltoff hinverogiof of re peve tiny tithe


In [8]:
import ipywidgets as widgets
from IPython.display import display

# Widgets
text_area = widgets.Textarea(
    value=sample_text,
    placeholder='Paste or type your text here...',
    description='Text:',
    layout=widgets.Layout(width='100%', height='150px')
)

start_word_widget = widgets.Text(
    value='little',
    description='Start word/char:'
)

length_slider = widgets.IntSlider(
    value=30,
    min=5,
    max=300,
    step=5,
    description='Length:'
)

mode_selector = widgets.ToggleButtons(
    options=['Word-based', 'Char-based'],
    description='Mode:',
)

output = widgets.Output()

def on_generate_clicked(b):
    output.clear_output()
    user_text = clean_text(text_area.value)
    length = length_slider.value
    start = start_word_widget.value.strip()

    with output:
        if mode_selector.value == 'Word-based':
            model = build_markov_chain(user_text)
            if start not in model:
                print("Start word not in model.")
                return
            generated = generate_text(model, start, length)
        else:
            model = build_char_markov_chain(user_text)
            if start[0] not in model:
                print("Start character not in model.")
                return
            generated = generate_char_text(model, start[0], length)

        print("Generated Text:\n", generated)

generate_button = widgets.Button(description="Generate Text")
generate_button.on_click(on_generate_clicked)

# Display UI
display(text_area, start_word_widget, length_slider, mode_selector, generate_button, output)


Textarea(value='\nIndia is a vast country with diverse cultures and traditions. People of different religions …

Text(value='little', description='Start word/char:')

IntSlider(value=30, description='Length:', max=300, min=5, step=5)

ToggleButtons(description='Mode:', options=('Word-based', 'Char-based'), value='Word-based')

Button(description='Generate Text', style=ButtonStyle())

Output()