# LayoutScribe Quickstart

Run LayoutScribe locally to parse documents into Markdown, plain text, and layout JSON, and visualize overlays.

Prerequisites:
- Python 3.10+
- Provider API key(s) set in your environment (OpenAI, Azure OpenAI, Anthropic, or Google via LiteLLM)
- A sample document (`.pdf`, `.pptx`, or `.docx`) on disk

In [5]:
# Install in editable mode (run once in your environment)
# !pip install -e .[dev]

import os
from pathlib import Path
import nest_asyncio

nest_asyncio.apply()

# Set provider keys here or rely on your shell environment
os.environ["OPENAI_API_KEY"] = "sk-***"

#project_root = Path("..").resolve()
#print("Project root:", project_root)


In [6]:
# Basic CLI run (replace with your sample path)
# You can run CLI from notebook using !

SAMPLE_PATH = "./samples/promo.pdf"  # change to your file
LLM_MODEL = "openai/gpt-4.1"          # or azure/<deployment_name>

#!layoutscribe parse "$SAMPLE_PATH" --llm "$LLM_MODEL" --outputs markdown text layout_json --dpi 180 --parallel-pages 4 --trace-mlflow --budget-usd 0.50

In [7]:
# Programmatic API usage

import asyncio
from layoutscribe import api

async def run_parse(path: str, model: str):
  doc = await api.parse(
    path=path,
    outputs=["markdown", "text", "layout_json"],
    llm=model,
    dpi=180,
    parallel_pages=4,
    trace_mlflow=True,
    budget_usd=0.50,
  )
  return doc

doc = asyncio.run(run_parse('./samples/promo.pdf', LLM_MODEL))
print(doc.markdown[:500])
doc.layout_json.pages[0]


FileNotFoundError: [Errno 2] No such file or directory: '/opt/anaconda3/lib/python3.13/site-packages/layoutscribe/schema/layout_page.schema.json'

In [None]:
# Visualize overlays (if produced)
from pathlib import Path
from IPython.display import display
from PIL import Image

artifacts_dir = Path("./artifacts").resolve()
if artifacts_dir.exists():
  # try the most recent subfolder
  latest = sorted(artifacts_dir.glob("*"))[-1] if list(artifacts_dir.glob("*")) else artifacts_dir
  overlays = list((latest / "overlays").glob("*.png")) if (latest / "overlays").exists() else []
  for ov in overlays[:5]:
    print("Overlay:", ov)
    display(Image.open(ov))
else:
  print("No artifacts yet. Run the CLI or API cell above.")

