# Overview

This notebook illustrates how to use Prefect to schedule and keep track of WarpX simulations running at NERSC.

Prefect is used to track the jobs, and to handle asynchronicity.

In [1]:
from sfapi_client import Client
from sfapi_client.compute import Machine
import prefect
from pathlib import Path

In [6]:
# Create Perlmutter job scripts

def create_warpx_job_script( TOD, z_pos ):
    """
    Creates a job script that runs a WarpX IP2 simulation

    Parameters:
    -----------
    TOD: float
        Value of the TOD in fs^3
    z_pos: float
        Value of the position in microns
    """
    import random
    job_id = random.randint(0, 1000000)
    rundir = "run_%08d" %job_id
    
    script = """#!/bin/bash -l

#SBATCH -t 00:30:00
#SBATCH -N 4
#SBATCH -J WarpX
#SBATCH -A m3239_g
#SBATCH -C gpu
#SBATCH -q debug
#SBATCH --exclusive
#SBATCH --gpu-bind=none
#SBATCH --ntasks-per-node=4
#SBATCH --gpus-per-node=4

cd /pscratch/sd/r/rlehe/ip2/
mkdir %s
cd %s

python ../templates/prepare_simulation.py --TOD=%f --z_pos=%f

export SRUN_CPUS_PER_TASK=16
export EXE="../templates/warpx.2d.MPI.CUDA.DP.PDP.OPMD.EB.QED"
export INPUTS="../templates/template_inputs_2d"

# GPU-aware MPI optimizations
GPU_AWARE_MPI="amrex.use_gpu_aware_mpi=1"
srun --cpu-bind=cores bash -c "
    export CUDA_VISIBLE_DEVICES=\$((3-SLURM_LOCALID));
    ${EXE} ${INPUTS} ${GPU_AWARE_MPI}"
""" %(rundir, rundir, TOD, z_pos)
    return script

  script = """#!/bin/bash -l


In [7]:
print(create_warpx_job_script(1000, 0))

#!/bin/bash -l

#SBATCH -t 00:30:00
#SBATCH -N 4
#SBATCH -J WarpX
#SBATCH -A m3239_g
#SBATCH -C gpu
#SBATCH -q regular
#SBATCH --exclusive
#SBATCH --gpu-bind=none
#SBATCH --ntasks-per-node=4
#SBATCH --gpus-per-node=4

cd /pscratch/sd/r/rlehe/ip2/
mkdir run_00476847
cd run_00476847

python ../templates/prepare_simulation.py --TOD=1000.000000 --z_pos=0.000000

export SRUN_CPUS_PER_TASK=16
export EXE="../templates/warpx.2d.MPI.CUDA.DP.PDP.OPMD.EB.QED"
export INPUTS="../templates/template_inputs_2d"

# GPU-aware MPI optimizations
GPU_AWARE_MPI="amrex.use_gpu_aware_mpi=1"
srun --cpu-bind=cores bash -c "
    export CUDA_VISIBLE_DEVICES=\$((3-SLURM_LOCALID));
    ${EXE} ${INPUTS} ${GPU_AWARE_MPI}"



In [8]:
# Connect to Perlmutter with the SFAPI
key = Path("./priv_key.pem")
client = Client(key=key)
perlmutter = client.compute(Machine.perlmutter)

@prefect.task
def launch_script_on_perlmutter( script ):
    """
    Launches a script on Perlmutter using the SFAPI,
    and track it with Prefect (using prefect.task)
    """
    # Launch a job with the SFAPI and wait until it finishes
    sfapi_job = perlmutter.submit_job( script )
    # Blocking command that waits for the job to complete 
    sfapi_job.complete()
    return 

@prefect.flow
def run_jobs_serial():
    launch_script_on_perlmutter( create_warpx_job_script(0, 0) )

@prefect.flow
def run_jobs_async():
    async_jobs = [ 
        launch_script_on_perlmutter.submit( random_number_job_script(0) ),
        launch_script_on_perlmutter.submit( random_number_job_script(3) ),
        launch_script_on_perlmutter.submit( random_number_job_script(7) )
    ]
    # Wait for asynchronous jobs to finish
    for r in async_jobs:
        r.result()
    launch_script_on_perlmutter( print_all_script )

In [None]:
# Launch the job and track it with Prefect
run_jobs_serial()

In [None]:
# Launch the jobs asynchronously and track them with Prefect
run_jobs_async()