In [None]:
#@title 1. Download dataset tinyshakespeare
!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt

--2025-07-03 00:53:51--  https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1115394 (1.1M) [text/plain]
Saving to: ‘input.txt’


2025-07-03 00:53:51 (126 MB/s) - ‘input.txt’ saved [1115394/1115394]



In [None]:
#@title Install necessary libraries
!pip install spacy markovify nltk -q
!python -m spacy download en_core_web_sm
!pip install streamlit pyngrok

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/235.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.8/235.8 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m50.9 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
Collecting streamlit
  Downloading streamlit-1.46.1-py3-none-any.whl.metadata (9.0 kB)
Collecti

In [None]:
#@title Import required libraries
import spacy
import re
import markovify
import nltk
from nltk.corpus import gutenberg
import warnings
warnings.filterwarnings('ignore')

In [None]:
#@title Load spaCy English model
nlp = spacy.load("en_core_web_sm")

In [None]:
#@title Load the TinyShakespeare dataset
with open("input.txt", "r", encoding="utf-8") as f:
    raw_text = f.read()

In [None]:
#@title Preprocessing function
def preprocess(text):
    """
    Clean and preprocess the input text using regex and spaCy.
    """
    # Remove stage directions and special characters
    text = re.sub(r'\[.*?\]', '', text)
    text = re.sub(r'\s+', ' ', text)

    # Increase the maximum length for spaCy
    nlp.max_length = len(text) + 100

    # Use spaCy for sentence segmentation
    doc = nlp(text)
    sentences = [sent.text.strip() for sent in doc.sents if len(sent.text.strip()) > 1]

    return "\n".join(sentences)

In [None]:
#@title Preprocess the text & Build the Markov chain model
clean_text = preprocess(raw_text)

text_model = markovify.Text(clean_text, state_size=2)  # Trigram model

In [None]:
#@title Generate sample text
print("📜 Generated Text Sample:\n")
for _ in range(5):
    print(text_model.make_sentence(tries=100))

📜 Generated Text Sample:

Servant: Here is a worthy choice?
But stay, here come the Lords of York, be patient.
First if all obstacles were cut away, And that you two are sped.
MENENIUS: You have made peace with God, And art thou slain Tybalt? wilt thou hunt?
But let it come.


In [None]:
#@title Write app.py as streamlit web code for implement Text generation with markov chains
%%writefile app.py
import streamlit as st
import spacy
import re
import markovify
import warnings

warnings.filterwarnings("ignore")

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

@st.cache_data
def load_and_preprocess_text():
    # Load dataset
    with open("input.txt", "r", encoding="utf-8") as f:
        raw_text = f.read()

    # Remove bracketed stage directions and extra spaces
    text = re.sub(r'\[.*?\]', '', raw_text)
    text = re.sub(r'\s+', ' ', text)

    # Increase the maximum length for spaCy within the Streamlit app
    nlp.max_length = len(text) + 100

    # Use spaCy to split into clean sentences
    doc = nlp(text)
    sentences = [sent.text.strip() for sent in doc.sents if len(sent.text.strip()) > 1]

    # Join into a single clean text
    return "\n".join(sentences)

# Load and preprocess dataset
st.title("🎭 Markov Chain Shakespeare Generator")
st.markdown("Generate Shakespeare-style text using a simple Markov Chain model.")

with st.spinner("Loading and preprocessing dataset..."):
    clean_text = load_and_preprocess_text()
    model = markovify.Text(clean_text, state_size=2)  # Trigram Markov model

# Input controls
st.sidebar.header("🔧 Generation Settings")
num_sentences = st.sidebar.slider("Number of sentences", 1, 10, 5)
tries = st.sidebar.slider("Tries per sentence", 10, 200, 100)
seed = st.sidebar.text_input("Seed word (optional)", value="")

# Generate text
if st.button("🪄 Generate Text"):
    st.subheader("📝 Generated Text")
    for _ in range(num_sentences):
        if seed:
            sentence = model.make_sentence_with_start(seed, strict=False, tries=tries)
        else:
            sentence = model.make_sentence(tries=tries)
        if sentence:
            st.write(sentence)
        else:
            st.write("_[Failed to generate sentence]_")

Writing app.py


In [None]:
#@title run streamlit + ngrok running
from pyngrok import ngrok
import os

NGROK_AUTH_TOKEN = "YOUR_NGROK_AUTHTOKEN" #@param {type:"string"}
os.environ["NGROK_AUTH_TOKEN"] = NGROK_AUTH_TOKEN

!ngrok authtoken $NGROK_AUTH_TOKEN

# Hentikan ngrok sebelumnya
ngrok.kill()

# Jalankan streamlit di background
!nohup streamlit run app.py --server.port 8501 > /dev/null 2>&1 &

# Ngrok forwarding
public_url = ngrok.connect(8501)
print("🌐 Web UI:", public_url)

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
🌐 Web UI: NgrokTunnel: "https://b657-34-150-152-14.ngrok-free.app" -> "http://localhost:8501"
