# Repo → Live Microsite (MCP + Gemini)

**Goal:** Turn any public GitHub repo into a simple **microsite** you can view **outside the notebook**.

Workflow:
1) **Discover** docs from a repo (tries *DeepWiki MCP* first; falls back to GitHub API).
2) Ask **Gemini** to **plan** the site structure.
3) **Generate HTML** (one-page site with your branding).
4) **Publish** locally (open in browser) or attempt **EdgeOne Pages MCP** deploy.
5) Save a **site.zip** for easy upload to any static host.

> Minimal setup: a Gemini API key. MCP servers are optional and can be toggled.

## Quick setup

- **Install** dependencies (uncomment and run if needed):
```python
# %pip install google-generativeai ipywidgets httpx rich markdown
# Optional MCP client(s), if you have a preferred library:
# %pip install mcp
# %pip install ipywidgets && jupyter nbextension enable --py widgetsnbextension
```

In [None]:
from google.colab import userdata
try:
    import google.generativeai as genai
    genai.configure(api_key=userdata.get('GOOGLE_API_KEY'))
except Exception:
    print("Install google-generativeai to enable live calls.")
GEMINI_MODEL = "gemini-2.5-flash"

# Config
USE_MCP_DEEPWIKI = False     # True to try DeepWiki MCP first, else use GitHub fallback
USE_MCP_EDGEONE  = False     # True to try EdgeOne Pages MCP for public deploy
DEEPWIKI_MCP_URL = "https://mcp.deepwiki.com/sse"         # change if your endpoint differs
EDGEONE_MCP_URL  = "https://pages.edgeone.dev/mcp"        # change if your endpoint differs

# Default lab inputs
DEFAULT_REPO  = "pydantic/pydantic"   # format: owner/repo
SITE_TITLE    = "DeepWiki Microsite"
TONE          = "executive-brief"
THEME         = "light"

# Location to write the site
import uuid, os
RUN_ID = f"labA-{uuid.uuid4().hex[:8]}"
SITE_DIR = f"./site_{RUN_ID}"
os.makedirs(SITE_DIR, exist_ok=True)
print({"RUN_ID": RUN_ID, "SITE_DIR": SITE_DIR})

## Utilities — robust JSON parsing for Gemini responses

In [None]:
import json, base64, re

def parse_gemini_json(resp):
    if getattr(resp, "text", None):
        t = (resp.text or "").strip()
        if t:
            try:
                return json.loads(t)
            except Exception:
                pass
    for c in getattr(resp, "candidates", []) or []:
        content = getattr(c, "content", None)
        parts = getattr(content, "parts", []) if content else []
        for p in parts:
            if getattr(p, "text", None):
                t = p.text.strip()
                if t:
                    try:
                        return json.loads(t)
                    except Exception:
                        m = re.search(r"\{[\s\S]*\}\s*$", t)
                        if m:
                            return json.loads(m.group(0))
            if hasattr(p, "inline_data"):
                mime = getattr(p.inline_data, "mime_type", "")
                data = getattr(p.inline_data, "data", "")
                if mime == "application/json" and data:
                    decoded = base64.b64decode(data).decode("utf-8")
                    return json.loads(decoded)
    reasons = [getattr(c, "finish_reason", None) for c in getattr(resp, "candidates", []) or []]
    raise RuntimeError(f"No JSON found in Gemini response. finish_reasons={reasons}")

## Utilities — HTML generator & file helpers

In [None]:
from pathlib import Path
from datetime import datetime

def build_html(site_title, sections, theme="light"):
    palette_bg = "#ffffff" if theme == "light" else "#0b0f14"
    palette_fg = "#0b0f14" if theme == "light" else "#e6edf3"
    accent = "#3b82f6"
    css = f"""
    body {{ font-family: -apple-system, Segoe UI, Roboto, sans-serif; background:{palette_bg}; color:{palette_fg}; margin:0; }}
    header {{ padding: 24px 16px; border-bottom: 1px solid #e5e7eb; }}
    h1 {{ margin:0; font-size: 28px; }}
    .container {{ max-width: 980px; margin: 0 auto; padding: 16px; }}
    .section {{ margin: 32px 0; }}
    .section h2 {{ color: {accent}; margin-bottom: 8px; }}
    .footer {{ margin-top: 48px; font-size: 12px; color:#6b7280; }}
    a {{ color: {accent}; text-decoration: none; }}
    a:hover {{ text-decoration: underline; }}
    pre {{ background: rgba(0,0,0,.05); padding: 12px; overflow:auto; border-radius:8px; }}
    code {{ font-family: ui-monospace, Menlo, Monaco, monospace; }}
    """
    body = [f"<header><div class='container'><h1>{site_title}</h1></div></header>", "<main class='container'>"]
    for sec in sections:
        title = sec.get("title","Untitled")
        html = sec.get("html","")
        body.append(f"<section class='section'><h2 id='{re_slug(title)}'>{title}</h2>\n{html}\n</section>")
    body.append(f"<div class='footer'>Generated {datetime.utcnow().isoformat()}Z</div>")
    body.append("</main>")
    return "<!doctype html><html><head><meta charset='utf-8'><meta name='viewport' content='width=device-width,initial-scale=1'>"+           f"<title>{site_title}</title><style>{css}</style></head><body>{''.join(body)}</body></html>"

import re, os, zipfile
def re_slug(s): return re.sub(r"[^a-z0-9]+","-", s.lower()).strip("-")

def write_site_files(site_dir, index_html, extra_files=None):
    Path(site_dir).mkdir(parents=True, exist_ok=True)
    (Path(site_dir) / "index.html").write_text(index_html, encoding="utf-8")
    for rel, content in (extra_files or {}).items():
        p = Path(site_dir) / rel
        p.parent.mkdir(parents=True, exist_ok=True)
        mode = "wb" if isinstance(content, (bytes, bytearray)) else "w"
        with open(p, mode) as f:
            f.write(content)
    return str(Path(site_dir) / "index.html")

def zip_site(site_dir, zip_path):
    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
        for root, _, files in os.walk(site_dir):
            for name in files:
                full = os.path.join(root, name)
                arc  = os.path.relpath(full, site_dir)
                zf.write(full, arcname=arc)
    return zip_path

## Discovery — DeepWiki MCP (optional) → GitHub fallback

In [None]:
import httpx

def discover_repo_docs(repo_slug:str, use_mcp:bool=True):
    owner, repo = repo_slug.split("/", 1)

    # (Optional) DeepWiki MCP attempt — placeholder for your preferred MCP client.
    if use_mcp:
        try:
            # Placeholder: swap in real MCP client calls here if available.
            pass
        except Exception as e:
            print(f"[DeepWiki MCP] unavailable: {e} — using GitHub fallback.")

    # Fallback to GitHub raw + API
    base = f"https://raw.githubusercontent.com/{owner}/{repo}/HEAD"
    items = []
    for candidate in ["README.md", "README.rst", "README.txt"]:
        url = f"{base}/{candidate}"
        try:
            r = httpx.get(url, timeout=20)
            if r.status_code == 200 and r.text.strip():
                items.append({"path": candidate, "content": r.text})
                break
        except Exception:
            pass

    try:
        listing = httpx.get(f"https://api.github.com/repos/{owner}/{repo}/contents/docs", timeout=20)
        if listing.status_code == 200:
            for file in listing.json():
                if file.get("type") == "file" and file.get("name","").lower().endswith((".md",".rst",".txt")):
                    raw_url = file.get("download_url")
                    if raw_url:
                        fr = httpx.get(raw_url, timeout=20)
                        if fr.status_code == 200 and fr.text.strip():
                            items.append({"path": f"docs/{file['name']}", "content": fr.text})
    except Exception:
        pass

    return {"repo": repo_slug, "items": items}

## Planning — use Gemini to propose site sections

In [None]:
def plan_sections_with_gemini(repo_slug, discovered_items, site_title, tone):
    previews=[]
    for it in discovered_items[:8]:
        snippet = it["content"][:1200]
        previews.append({"path": it["path"], "preview": snippet})
    prompt = {
        "instruction": (
            "You are a content planner for a microsite. "
            "Given a repository and short previews of its docs, propose a site outline. "
            "Return STRICT JSON: {{\"sections\": [{{\"title\": str, \"source_path\": str, \"instructions\": str}}]}} "
            "Choose 3-6 sections; source_path must come from the provided previews."
        ),
        "repo": repo_slug,
        "site_title": site_title,
        "tone": tone,
        "previews": previews
    }
    model = genai.GenerativeModel(
                model,
                generation_config={
                    "response_mime_type": "application/json",
                    "temperature": 0.2,
                    "max_output_tokens": 1536,
                })
    resp = model.generate_content(json.dumps(prompt))
    js = parse_gemini_json(resp)
    if "sections" not in js or not isinstance(js["sections"], list):
        raise RuntimeError("Gemini returned JSON without a 'sections' list.")
    return js

## Synthesis — build HTML from planned sections

In [None]:
import markdown as mdx

def md_to_html(md_text:str)->str:
    try:
        return mdx.markdown(md_text, extensions=["fenced_code", "tables"])
    except Exception:
        return "<pre>" + md_text.replace("<","&lt;").replace(">","&gt;") + "</pre>"

def synthesize_sections(plan, discovered_items):
    blob = { it["path"]: it["content"] for it in discovered_items }
    sections=[]
    for s in plan.get("sections", []):
        path = (s.get("source_path") or "README.md").strip()
        raw = blob.get(path, "")
        html = md_to_html(raw) if raw else f"<p><em>Source not found: {path}</em></p>"
        sections.append({"title": s.get("title","Untitled"), "html": html, "source_path": path, "instructions": s.get("instructions","")})
    return sections

## (Optional) Deployment — EdgeOne Pages MCP (best effort)

In [None]:
def try_edgeone_deploy(edgeone_url, site_dir):
    try:
        import httpx, io, zipfile, os
        buf = io.BytesIO()
        with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
            for root, _, files in os.walk(site_dir):
                for name in files:
                    full = os.path.join(root, name)
                    arc  = os.path.relpath(full, site_dir)
                    zf.write(full, arcname=arc)
        buf.seek(0)
        r = httpx.post(edgeone_url, files={"site": ("site.zip", buf.getvalue(), "application/zip")}, timeout=60)
        if r.status_code == 200:
            data = r.json()
            if isinstance(data, dict) and data.get("url"):
                return {"ok": True, "url": data["url"]}
        return {"ok": False, "error": f"unexpected status {r.status_code}: {r.text[:200]}"}
    except Exception as e:
        return {"ok": False, "error": str(e)}

## Run the Lab — Stepper UI

In [None]:
import ipywidgets as widgets
from IPython.display import display, HTML, Markdown
from pathlib import Path
import os, socket, threading, http.server

_state = {
    "repo": DEFAULT_REPO,
    "site_title": SITE_TITLE,
    "tone": TONE,
    "theme": THEME,
    "discovery": None,
    "plan": None,
    "sections": None,
    "index_path": None,
    "public_url": None,
}

repo_in   = widgets.Text(value=DEFAULT_REPO, description="GitHub repo", placeholder="owner/repo")
title_in  = widgets.Text(value=SITE_TITLE, description="Site title")
tone_in   = widgets.Text(value=TONE, description="Tone")
theme_dd  = widgets.Dropdown(options=["light","dark"], value=THEME, description="Theme")

discover_btn = widgets.Button(description="1) Discover", button_style="info")
plan_btn     = widgets.Button(description="2) Plan with Gemini", button_style="info")
build_btn    = widgets.Button(description="3) Build HTML", button_style="success")
serve_btn    = widgets.Button(description="4) Serve Locally", button_style="warning")
deploy_btn   = widgets.Button(description="5) Deploy (EdgeOne)", button_style="warning")
zip_btn      = widgets.Button(description="Download ZIP", button_style="")

log_out = widgets.Output(layout=widgets.Layout(border="1px solid #ddd", min_height="140px"))
preview_out = widgets.Output(layout=widgets.Layout(border="1px solid #ddd", min_height="200px"))

def log(msg):
    with log_out:
        display(Markdown(msg))

def on_discover(_):
    log_out.clear_output(); preview_out.clear_output()
    _state["repo"] = repo_in.value.strip()
    try:
        info = discover_repo_docs(_state["repo"], use_mcp=USE_MCP_DEEPWIKI)
        _state["discovery"] = info
        log(f"**Discovered items:** {len(info['items'])} files")
        if not info["items"]:
            log("> No docs found. Try another repo or ensure it has README/docs.")
        else:
            lines = [f"- {it['path']}" for it in info["items"][:8]]
            log("**Sample files:**" + "\n".join(lines))
    except Exception as e:
        log(f"**Discovery error:** {e}")

def on_plan(_):
    preview_out.clear_output()
    if not _state.get("discovery") or not _state["discovery"]["items"]:
        log("Run **Discover** first."); return
    _state["site_title"] = title_in.value.strip() or "Microsite"
    _state["tone"] = tone_in.value.strip() or "executive-brief"
    try:
        plan = plan_sections_with_gemini(_state["repo"], _state["discovery"]["items"], _state["site_title"], _state["tone"])
        _state["plan"] = plan
        import json as _json
        log("**Plan:**"); log("```json\n" + _json.dumps(plan, indent=2) + "\n```")
    except Exception as e:
        log(f"**Planning error:** {e}")

def on_build(_):
    preview_out.clear_output()
    if not _state.get("plan"):
        log("Run **Plan with Gemini** first."); return
    _state["theme"] = theme_dd.value
    try:
        sections = synthesize_sections(_state["plan"], _state["discovery"]["items"])
        _state["sections"] = sections
        index_html = build_html(_state["site_title"], sections, _state["theme"])
        idx = write_site_files(SITE_DIR, index_html)
        _state["index_path"] = idx
        log(f"**Built site** at: `{idx}`")
        with preview_out:
            display(HTML(index_html[:200000]))
    except Exception as e:
        log(f"**Build error:** {e}")

def on_serve(_):
    if not _state.get("index_path"):
        log("Build the site first."); return
    try:
        class QuietHandler(http.server.SimpleHTTPRequestHandler):
            def log_message(self, format, *args): pass
        site_dir = SITE_DIR
        os.chdir(site_dir)
        s = socket.socket(); s.bind(("127.0.0.1",0)); port = s.getsockname()[1]; s.close()
        def _run():
            with http.server.ThreadingHTTPServer(("127.0.0.1", port), QuietHandler) as httpd:
                httpd.serve_forever()
        t = threading.Thread(target=_run, daemon=True); t.start()
        log(f"**Local server:** open http://127.0.0.1:{port}")
    except Exception as e:
        log(f"**Server error:** {e}")

def on_deploy(_):
    if not _state.get("index_path"):
        log("Build the site first."); return
    if not USE_MCP_EDGEONE:
        log("Enable `USE_MCP_EDGEONE = True` in the config cell to attempt MCP deploy."); return
    res = try_edgeone_deploy(EDGEONE_MCP_URL, SITE_DIR)
    if res.get("ok"):
        _state["public_url"] = res["url"]
        log(f"**Deployed!** Public URL: {res['url']}")
    else:
        log(f"**Deploy failed:** {res.get('error')}")

def on_zip(_):
    zp = f"./{RUN_ID}_site.zip"
    try:
        zip_site(SITE_DIR, zp)
        log(f"**ZIP ready:** {zp}")
    except Exception as e:
        log(f"**ZIP error:** {e}")

discover_btn.on_click(on_discover)
plan_btn.on_click(on_plan)
build_btn.on_click(on_build)
serve_btn.on_click(on_serve)
deploy_btn.on_click(on_deploy)
zip_btn.on_click(on_zip)

display(widgets.VBox([
    widgets.HBox([repo_in, title_in]),
    widgets.HBox([tone_in, theme_dd]),
    widgets.HBox([discover_btn, plan_btn, build_btn, serve_btn, deploy_btn, zip_btn]),
    Markdown("**Logs**"),
    log_out,
    Markdown("**Preview**"),
    preview_out
]))