In [None]:
import sys
import subprocess
from pathlib import Path

ROOT = Path.cwd().resolve()

CANDIDATE_PIPELINES = [
    [ROOT / "notebooks" / "Main.ipynb",   ROOT / "notebooks" / "Merger.ipynb",   ROOT / "notebooks" / "Models.ipynb"],
    [ROOT / "Main.ipynb",                ROOT / "Merger.ipynb",                ROOT / "Models.ipynb"],
]

PIPELINE = None
for cand in CANDIDATE_PIPELINES:
    if all(p.exists() for p in cand):
        PIPELINE = cand
        break

if PIPELINE is None:
    raise FileNotFoundError(
        "Cannot find the notebooks to run.\n"
        "Expected either:\n"
        "  - notebooks/Main.ipynb, notebooks/Merger.ipynb, notebooks/Models.ipynb\n"
        "or:\n"
        "  - Main.ipynb, Merger.ipynb, Models.ipynb\n"
        f"Current working directory: {ROOT}"
    )

print("✅ Pipeline notebooks found:")
for p in PIPELINE:
    print("  -", p)

DATA_RAW = ROOT / "data" / "raw"
DATA_INTER = ROOT / "data" / "intermediate"

if not DATA_RAW.exists():
    raise FileNotFoundError(f"Missing folder: {DATA_RAW}. Your notebooks expect data/raw to exist.")
DATA_INTER.mkdir(parents=True, exist_ok=True)

try:
    subprocess.run([sys.executable, "-m", "jupyter", "--version"], check=True, capture_output=True, text=True)
except Exception as e:
    raise RuntimeError(
        "Jupyter is not available in this Python environment.\n"
        "Fix:\n"
        "  pip install jupyter nbconvert ipykernel\n"
        "Then re-run this notebook."
    ) from e

EXECUTED_DIR = ROOT / "executed_notebooks"
EXECUTED_DIR.mkdir(exist_ok=True)

TIMEOUT_SECONDS = 60 * 60  

def run_nb(nb_path: Path) -> None:
    executed_name = nb_path.stem + "_EXECUTED.ipynb"
    executed_path = EXECUTED_DIR / executed_name

    print("\n" + "=" * 80)
    print(f"▶ Running: {nb_path}")
    print("=" * 80)

    cmd = [
        sys.executable, "-m", "jupyter", "nbconvert",
        "--to", "notebook",
        "--execute",
        "--ExecutePreprocessor.timeout", str(TIMEOUT_SECONDS),
        "--output", executed_path.name,
        "--output-dir", str(EXECUTED_DIR),
        str(nb_path),
    ]

    res = subprocess.run(cmd, capture_output=True, text=True)

    if res.returncode != 0:
        print("\n--- nbconvert STDOUT (tail) ---\n", (res.stdout or "")[-5000:])
        print("\n--- nbconvert STDERR (tail) ---\n", (res.stderr or "")[-5000:])
        raise RuntimeError(f"❌ Failed executing notebook: {nb_path.name}")

    print(f"✅ Done. Executed notebook saved to: {executed_path}")

for nb in PIPELINE:
    run_nb(nb)

print("\n✅ Full pipeline executed successfully.")
print("Executed notebooks are in:", EXECUTED_DIR.resolve())