In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
pip install gradio


Collecting gradio
  Downloading gradio-5.35.0-py3-none-any.whl.metadata (16 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.14-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.6.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.10.4 (from gradio)
  Downloading gradio_client-1.10.4-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.12.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (from gradio)
  Downloading safehttpx-0.1.6-py3-none-any.whl.metadata (4.2 kB)
Collecting semantic-version~=2.0 (from gradio)
  Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)
Col

In [5]:
import gradio as gr

# Path to your dataset file
train_data = '/kaggle/input/markov-set/markov_training_data.txt'

In [6]:

# Dictionaries to store probabilities
first_possible_words = {}
second_possible_words = {}
transitions = {}

In [7]:
def expandDict(dictionary, key, value):
    if key not in dictionary:
        dictionary[key] = []
    dictionary[key].append(value)

In [8]:
def get_next_probability(word_list):
    probability_dict = {}
    total = len(word_list)
    for word in word_list:
        probability_dict[word] = probability_dict.get(word, 0) + 1
    for word in probability_dict:
        probability_dict[word] /= total
    return probability_dict

In [9]:
def trainMarkovModel():
    with open(train_data, 'r') as file:
        for line in file:
            tokens = line.strip().lower().split()
            n = len(tokens)
            for i in range(n):
                token = tokens[i]
                if i == 0:
                    first_possible_words[token] = first_possible_words.get(token, 0) + 1
                elif i == 1:
                    expandDict(second_possible_words, tokens[i - 1], token)
                elif i > 1:
                    expandDict(transitions, (tokens[i - 2], tokens[i - 1]), token)
                if i == n - 1 and i > 0:
                    expandDict(transitions, (tokens[i - 1], token), 'END')

    total_first_words = sum(first_possible_words.values())
    for key in first_possible_words:
        first_possible_words[key] /= total_first_words

    for key in second_possible_words:
        second_possible_words[key] = get_next_probability(second_possible_words[key])
    for key in transitions:
        transitions[key] = get_next_probability(transitions[key])


In [10]:
def next_word(context):
    if isinstance(context, str):
        return list(second_possible_words.get(context, {}).keys())
    elif isinstance(context, tuple):
        return list(transitions.get(context, {}).keys())
    return []

In [11]:
def get_suggestions(user_input):
    user_input = user_input.strip().lower()
    tokens = user_input.split()

    if len(tokens) == 0:
        suggestions = list(first_possible_words.keys())[:5]
    elif len(tokens) == 1:
        suggestions = next_word(tokens[0])[:5]
    else:
        suggestions = next_word((tokens[-2], tokens[-1]))[:5]

    return suggestions if suggestions else ["No suggestions found."]

# Train the model
trainMarkovModel()

In [13]:
# Gradio UI
with gr.Blocks(css="""
    body {
    background: #f8fafc;
    font-family: 'Segoe UI', Roboto, Helvetica, Arial, sans-serif;
    color: #1e293b;
    width:600px;
    margin-left: 200px;
}

.suggestion-box {
    display: flex;
    flex-wrap: wrap;
    gap: 12px;
    margin-top: 18px;
    padding: 12px;
    background: #ffffff;
    border-radius: 12px;
    box-shadow: 0 8px 16px rgba(0, 0, 0, 0.06);
    border: 1px solid #e2e8f0;
    justify-content: flex-start;
    align-items: center;
    width:600px;
}

.suggestion-pill {
    background: linear-gradient(135deg, #4f46e5, #6366f1);
    color: white;
    padding: 10px 16px;
    border-radius: 24px;
    font-weight: 600;
    font-size: 15px;
    box-shadow: 0 2px 8px rgba(79, 70, 229, 0.3);
    transition: transform 0.2s ease-in-out;
    cursor: pointer;
}

.suggestion-pill:hover {
    transform: scale(1.05);
    box-shadow: 0 4px 12px rgba(79, 70, 229, 0.4);
}

textarea, input[type="text"] {
    border-radius: 10px !important;
    padding: 12px !important;
    border: 1px solid #cbd5e1 !important;
    background-color: #ffffff !important;
    font-size: 16px !important;
    box-shadow: none !important;
}

textarea:focus, input[type="text"]:focus {
    outline: none !important;
    border: 1px solid #6366f1 !important;
    box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2) !important;
}

.gradio-container, .main, .interface, .wrap, .output_class {
    all: unset;
    width:600px;
    margin-left: 200px;
    margin-top: 100px;
    background: ;
}



""") as demo:
    gr.Markdown("""
    # 🧠 Markov Chain Text Predictor  
    Enter a sentence below and receive AI-generated word predictions based on Markov Chains.
    """)

    with gr.Column():
        text_input = gr.Textbox(
            label="Type your sentence here",
            placeholder="e.g., once upon a",
            show_label=True,
            lines=1
        )

        suggestions_output = gr.HTML("<div class='suggestion-box'></div>")

        

    def update_ui(user_input):
        suggestions = get_suggestions(user_input)
        html = "<div class='suggestion-box'>" + "".join(
            f"<div class='suggestion-pill'>{word}</div>" for word in suggestions
        ) + "</div>"
        return html

    text_input.change(fn=update_ui, inputs=text_input, outputs=[suggestions_output])

demo.launch()


* Running on local URL:  http://127.0.0.1:7862
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

* Running on public URL: https://c48546681ce668ca06.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


