# [ParslFest 2025](https://parsl-project.org/parslfest/parslfest2025.html)

# [Accelerating QMCpy Notebook Tests with Parsl](https://www.figma.com/slides/k7EUosssNluMihkYTLuh1F/Parsl-Testbook-Speedup?node-id=1-37&t=WnKcu2QYO8JXvtpP-0)

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QMCSoftware/QMCSoftware/blob/develop/demos/talk_paper_demos/parsel_fest_2025/parsl_fest_2025.ipynb)

Joshua Herman, Brandon Sharp, and Sou-Cheng Choi, QMCPy Developers

Aug 28 -- 29, 2025

Updated: Dec 1, 2025


**Requirements**:

* testbook : `pip install testbook==0.4.2`
* Parsl: `pip install parsl==2025.7.28`

In [1]:
try:
    import parsl as pl
except ModuleNotFoundError:
    !pip install -q parsl

## 2. Parsl

1. Install and setup Parcel
2. Run the tests in parallel with Parsl

### 2.1 Parsl Setup and Baseline

In [11]:
import sys
import os
import time

import parsl as pl
from parsl.configs.htex_local import config
import importlib

# ----------------------------------------------------------------------
# Find repo root (directory containing pyproject.toml)
# ----------------------------------------------------------------------
def _find_repo_root(start=None):
    if start is None:
        start = os.getcwd()
    cur = start
    while True:
        if os.path.exists(os.path.join(cur, "pyproject.toml")):
            return cur
        parent = os.path.dirname(cur)
        if parent == cur:
            raise FileNotFoundError("repo root not found")
        cur = parent

repo_root = _find_repo_root()

# ----------------------------------------------------------------------
# Output directory (this notebook's output/)
# ----------------------------------------------------------------------
output_dir = "output"  # relative to parsel_fest_2025 notebook folder
os.makedirs(output_dir, exist_ok=True)

# Read sequential baseline time (from 01_sequential.ipynb)
seq_fname = os.path.join(output_dir, "sequential_time.csv")
with open(seq_fname, "r") as f:
    sequential_time = float(f.read().strip())

# ----------------------------------------------------------------------
# Make sure test/booktests is importable and load parsl_test_runner
# ----------------------------------------------------------------------
tests_dir = os.path.join(repo_root, "test", "booktests")
if tests_dir not in sys.path:
    sys.path.append(tests_dir)

import parsl_test_runner
parsl_test_runner = importlib.reload(parsl_test_runner)  # pick up latest edits

print("parsl_test_runner loaded from:", parsl_test_runner.__file__)
print(f"Repo root detected as: {repo_root}")
print(f"Tests dir: {tests_dir}")
print(f"Sequential baseline time: {sequential_time:.2f} s")




parsl_test_runner loaded from: /workspaces/QMCSoftware/test/booktests/parsl_test_runner.py
Repo root detected as: /workspaces/QMCSoftware
Tests dir: /workspaces/QMCSoftware/test/booktests
Sequential baseline time: 730.49 s


### 2.2 Create a Parsl Test Runner

In [12]:
import parsl_test_runner
import inspect

# See only functions
print("Functions:")
functions = inspect.getmembers(parsl_test_runner, inspect.isfunction)
for name, func in functions:
    print(f"- {name}")
print("\n" + "="*50)

# Get help on specific function
print("Help for execute_parallel_tests:")
help(parsl_test_runner.execute_parallel_tests)

Functions:
- _derive_skipped_modules_from_notebooks
- bash_app
- execute_parallel_tests
- generate_summary_report
- get_runtime
- main
- optimal_schedule
- print_notebook_booktest_coverage

Help for execute_parallel_tests:
Help on function execute_parallel_tests in module parsl_test_runner:

execute_parallel_tests()
    Execute all tb_*.py booktests in parallel using Parsl.



### 2.3 Run the Notebooks in Parallel with Parsl

In [15]:
import os
import time
import subprocess

tests_dir = os.path.join(repo_root, "test", "booktests")

for workers in [1, 2, 4, 8]:
    print("\n" + "=" * 60)
    print(f"=== RUNNING PARSL BOOKTESTS WITH {workers} WORKER(S) ===")

    par_fname = os.path.join(output_dir, f"parallel_times_{workers}.csv")

    if not os.path.exists(par_fname) or force_compute:
        cmd = [
            "python",
            "-u",
            "parsl_test_runner.py",
            "--workers",
            str(workers),
            "--print-coverage",   # prints skipped notebooks + totals once per run
        ]
        print("Command:", " ".join(cmd))
        print("CWD:", tests_dir)

        start_time = time.time()
        proc = subprocess.Popen(
            cmd,
            cwd=tests_dir,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
            bufsize=1,
        )

        # Stream output live
        for line in proc.stdout:
            print(line, end="")

        proc.wait()
        parallel_time = time.time() - start_time

        if proc.returncode != 0:
            print(f"\nParsl test runner failed for workers={workers} "
                  f"with return code {proc.returncode}")
            continue

        speedup = sequential_time / parallel_time
        print(f"\nMeasured wall-clock for workers={workers}: {parallel_time:.2f} s")
        print(f"Measured speedup: {speedup:.2f}x")

        with open(par_fname, "w") as f:
            f.write("workers,times,speedup\n")
            f.write(f"{workers},{parallel_time:.2f},{speedup:.2f}\n")

        print(f"Saved results to {par_fname}")
    else:
        print(f"Skipping run for {workers} workers "
              f"(file already exists: {par_fname})")

print("\nAll Parsl runs completed. You can now run 03_visualize_speedup.ipynb.")




=== RUNNING PARSL BOOKTESTS WITH 1 WORKER(S) ===
Command: python -u parsl_test_runner.py --workers 1 --print-coverage
CWD: /workspaces/QMCSoftware/test/booktests
Checking notebook ↔ booktest coverage...
    Skipping demos/DAKOTA_Genz/dakota_genz.ipynb (requires large manual file / heavy memory use)
    Skipping demos/talk_paper_demos/Argonne_Talk_2023_May/Argonne_2023_Talk_Figures.ipynb (heavy LaTeX + many figures; skipped in booktests_no_docker)
    Skipping demos/talk_paper_demos/MCQMC2022_Article_Figures/MCQMC2022_Article_Figures.ipynb (MCQMC 2022 article figures; not run in CI)
    Skipping demos/talk_paper_demos/ProbFailureSorokinRao/prob_failure_gp_ci.ipynb (prob_failure_gp_ci talk demo; heavy GP / prob. failure example)
    Skipping demos/talk_paper_demos/Purdue_Talk_2023_March/Purdue_Talk_Figures.ipynb (Purdue 2023 talk figures; not a CI booktest target)
    Skipping demos/talk_paper_demos/parsel_fest_2025/01_sequential.ipynb (helper notebook for parsl_fest_2025; not a standal

In [16]:
!date
!ls -ltr output

Wed Dec  3 02:43:01 UTC 2025
total 24
-rw-rw-rw- 1 codespace codespace    8 Dec  3 02:06 sequential_time.csv
-rw-rw-rw- 1 codespace codespace 1708 Dec  3 02:12 sequential_times_detail.csv
-rw-rw-rw- 1 codespace codespace   36 Dec  3 02:20 parallel_times_1.csv
-rw-rw-rw- 1 codespace codespace   36 Dec  3 02:27 parallel_times_2.csv
-rw-rw-rw- 1 codespace codespace   36 Dec  3 02:35 parallel_times_4.csv
-rw-rw-rw- 1 codespace codespace   36 Dec  3 02:42 parallel_times_8.csv


In [17]:
import platform

if platform.system().lower() == 'linux':
    !uname -a
    !nproc --all
    !awk '/MemTotal/ {printf "%.2f GB\n", $2/1024/1024}' /proc/meminfo

Linux codespaces-927f92 6.8.0-1030-azure #35~22.04.1-Ubuntu SMP Mon May 26 18:08:30 UTC 2025 x86_64 x86_64 x86_64 GNU/Linux
16
62.80 GB
