
# [ParslFest 2025](https://parsl-project.org/parslfest/parslfest2025.html)

# [Accelerating QMCpy Notebook Tests with Parsl](https://www.figma.com/slides/k7EUosssNluMihkYTLuh1F/Parsl-Testbook-Speedup?node-id=1-37&t=WnKcu2QYO8JXvtpP-0)

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QMCSoftware/QMCSoftware/blob/develop/demos/parsl_fest_2025.ipynb)

Joshua Herman and Sou-Cheng Choi, QMCPy Developers

Aug 28 -- 29, 2025

Updated: Sep 9, 2025

**Requirements**:

* QMCPy: `pip install qmcpy==2.1`
* LaTeX: `sudo apt update && sudo apt install -y texlive-full`
* testbook : `pip install testbook==0.4.2`
* Parsl: `pip install parsl==2025.7.28`

This notebook can be run interactively or in command line mode. To run in command line mode, use:
```bash
    conda activate qmcpy && cd demos && jupyter execute --kernel_name qmcpy parsl_fest_2025.ipynb
```

Our presentation slides for ParslFest are available at [Figma](https://www.figma.com/slides/k7EUosssNluMihkYTLuh1F/Parsl-Testbook-Speedup?node-id=174-95&t=t3jENVMltXWwdLdb-0).

In [1]:
try:
    import qmcpy as qp
except ModuleNotFoundError:
    !pip install -q qmcpy

try:
    import testbook as tb
except ModuleNotFoundError:
    !pip install -q testbook

In [2]:
import sys
import os
import time
import inspect
import parsl as pl
from parsl.configs.htex_local import config

## 1. Sequential Execution

In [None]:
import subprocess
import re

start_time = time.time()

# Run the command and capture output
result = subprocess.run(
    ['make', 'booktests_no_docker'],
    cwd='..',
    capture_output=True,
    text=True
)

# Filter output to show only important lines
output_lines = result.stdout.split('\n')
important_lines = []
for line in output_lines:
    # Show test results and summary
    if any(keyword in line for keyword in [
        'Total notebooks:', 'Total test files:', 'Ran ', 'OK', 'FAILED',
        'test_', '...', 'Memory used:', 'Test time:'
    ]) and 'for f in' not in line:
        important_lines.append(line)

# Show concise output
print('\n'.join(important_lines[-50:]))  # Last 50 important lines

sequential_time = time.time() - start_time
print(f"\nTotal sequential execution time: {sequential_time:.2f} seconds")

## 2. Parsl

1. Install and Configure Parsl
2. Run the tests in parallel with Parsl

### 2.1 Install and Configure Parsl

In [3]:
try:
    import parsl as pl
except ModuleNotFoundError:
    !pip install -q parsl
    
config.max_workers = 8  
if not pl.config:
    pl.load(config)
    print("Parsl loaded with memory limits")
else:
    print("Parsl already configured")

Parsl already configured


### 2.2 Create a Parsl Test Runner

In [5]:
# Ensure the path to the booktests directory is included
sys.path.append(os.path.join(os.getcwd(), '..', 'test', 'booktests'))

In [6]:
import parsl_test_runner
import inspect

# See only functions
print("Functions:")
functions = inspect.getmembers(parsl_test_runner, inspect.isfunction)
for name, func in functions:
    print(f"- {name}")
print("\n" + "="*50)

# Get help on specific function
print("Help for execute_parallel_tests:")
help(parsl_test_runner.execute_parallel_tests)

Functions:
- bash_app
- execute_parallel_tests
- generate_summary_report
- main

Help for execute_parallel_tests:
Help on function execute_parallel_tests in module parsl_test_runner:

execute_parallel_tests()
    Execute all testbook tests in parallel using Parsl



### 2.3 Run the Notebooks in Parallel with Parsl

In [None]:
import subprocess
import re

start_time2 = time.time()

# Run the command and capture output
result = subprocess.run(
    ['make', 'booktests_parallel_no_docker'],
    cwd='..',
    capture_output=True,
    text=True
)

# Filter output to show only important lines
output_lines = result.stdout.split('\n')
important_lines = []
for line in output_lines:
    # Show test progress and summary
    if any(keyword in line for keyword in [
        'Total notebooks:', 'Total test files:', 'Ran ', 'OK', 'FAILED',
        '[', '/32]', 'PASSED', 'Parsl', 'Found '
    ]) and 'for f in' not in line:
        important_lines.append(line)

# Show concise output
print('\n'.join(important_lines))  # All important lines

parallel_time = time.time() - start_time2
print(f"\nTotal parallel execution time: {parallel_time:.2f} seconds")

# Check for failures
if result.returncode != 0:
    print(f"\nWarning: Command exited with code {result.returncode}")
    # Show error output if available
    if result.stderr:
        print("Error output (last 20 lines):")
        print('\n'.join(result.stderr.split('\n')[-20:]))

In [None]:
speedup = sequential_time / parallel_time
print(f"Sequential time:  {sequential_time:.2f} seconds")
print(f"Parallel time:    {parallel_time:.2f} seconds")
print(f"Speedup by Parsl: {speedup:.2f}")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Create speedup visualization
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Plot 1: Execution times comparison
methods = ['Sequential', 'Parallel\n(Parsl)']
times = [sequential_time, parallel_time]
colors = ['#1f77b4', '#ff7f0e']

bars = ax1.bar(methods, times, color=colors, alpha=0.7, edgecolor='black')
ax1.set_ylabel('Execution Time (seconds)', fontsize=12, fontweight='bold')
ax1.set_title('Notebook Test Execution Times', fontsize=14, fontweight='bold')
ax1.grid(axis='y', alpha=0.3, linestyle='--')

# Add value labels on bars
for i, (bar, time_val) in enumerate(zip(bars, times)):
    height = bar.get_height()
    ax1.text(bar.get_x() + bar.get_width()/2., height,
            f'{time_val:.1f}s\n({time_val/60:.1f}min)',
            ha='center', va='bottom', fontsize=11, fontweight='bold')

# Plot 2: Speedup visualization
speedup_val = sequential_time / parallel_time
ax2.barh(['Speedup'], [speedup_val], color='#2ca02c', alpha=0.7, edgecolor='black')
ax2.set_xlabel('Speedup Factor', fontsize=12, fontweight='bold')
ax2.set_title(f'Parsl Speedup: {speedup_val:.2f}x', fontsize=14, fontweight='bold')
ax2.grid(axis='x', alpha=0.3, linestyle='--')

# Add value label
ax2.text(speedup_val, 0, f'  {speedup_val:.2f}x', 
        va='center', fontsize=14, fontweight='bold', color='#2ca02c')

# Add ideal speedup reference lines
max_workers = 8  # From parsl config
ax2.axvline(x=max_workers, color='red', linestyle='--', alpha=0.5, 
           label=f'Ideal (8 workers)')
ax2.legend()

plt.tight_layout()

# Save the plot
output_path = 'parsl_speedup_plot.png'
plt.savefig(output_path, dpi=300, bbox_inches='tight')
print(f"\nSpeedup plot saved to: {output_path}")
plt.show()

# Print summary statistics
print(f"\n{'='*60}")
print(f"PERFORMANCE SUMMARY")
print(f"{'='*60}")
print(f"Sequential execution: {sequential_time:.2f}s ({sequential_time/60:.2f} minutes)")
print(f"Parallel execution:   {parallel_time:.2f}s ({parallel_time/60:.2f} minutes)")
print(f"Time saved:           {sequential_time - parallel_time:.2f}s ({(sequential_time - parallel_time)/60:.2f} minutes)")
print(f"Speedup factor:       {speedup_val:.2f}x")
print(f"Efficiency:           {(speedup_val/max_workers)*100:.1f}% (vs {max_workers} workers)")
print(f"{'='*60}")

In [9]:
import platform

if platform.system().lower() == 'linux':
    !uname -a
    !nproc --all
    !awk '/MemTotal/ {printf "%.2f GB\n", $2/1024/1024}' /proc/meminfo

Linux codespaces-2c03bb 6.8.0-1030-azure #35~22.04.1-Ubuntu SMP Mon May 26 18:08:30 UTC 2025 x86_64 x86_64 x86_64 GNU/Linux
16
62.80 GB
