# Example (CLI): Extract indicators from a PDF to XLSX\n\nThis notebook is the notebook version of `examples/example_cli_extract_pdf_xlsx.py`.\nIt calls the CLI programmatically so the behavior matches the `vsme-extract` command.\n\n## Prerequisites\n\n- Install dependencies: `pip install .`\n- Provide `SCW_API_KEY` (via environment variable or a `.env` file at repo root).\n

In [None]:
from __future__ import annotations

import sys
from pathlib import Path

from dotenv import find_dotenv, load_dotenv


In [None]:
# Ensure we import the local repo package (useful when you also have an installed version).
_CWD = Path.cwd().resolve()
_REPO_ROOT = _CWD if (_CWD / "vsme_extractor").exists() else _CWD.parent
if str(_REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(_REPO_ROOT))

# Load .env if present (do NOT override explicit environment variables).
dotenv_path = find_dotenv(usecwd=True)
load_dotenv(dotenv_path, override=False)
print("Repo root:", _REPO_ROOT)
print(".env found:", bool(dotenv_path), dotenv_path)


## 1) Select a PDF\n\nThe repo may ship example PDFs under `./data/test/`. If you do not have them locally, edit the path below to point to an existing PDF.\n

In [None]:
pdf_path = Path("/your_path/your_file.pdf")  # <-- edit this
if not pdf_path.exists():
    raise FileNotFoundError(f"PDF file does not exist: {pdf_path}")
pdf_path.exists(), pdf_path


## 2) Run the extractor (CLI) and export XLSX\n\nNotes:\n- `--codes` lets you restrict to a small subset for faster testing.\n- Output is written next to the PDF as `*.vsme.xlsx`.\n

In [None]:
# Optional: restrict to a small set of indicators for faster testing (edit as needed)
codes = "B3_1,B3_2"

from vsme_extractor.cli import main as cli_main  # noqa: E402

cli_main([
    str(pdf_path),
    "--no-log-stdout",
    "--output-format",
    "xlsx",
    "--codes",
    codes,
])

out_path = pdf_path.with_suffix(".vsme.xlsx")
print("Export:", out_path)
out_path.exists()
