diff --git a/.github/workflows/run.yml b/.github/workflows/run.yml index a74c124d9..fc14b7989 100644 --- a/.github/workflows/run.yml +++ b/.github/workflows/run.yml @@ -132,7 +132,14 @@ jobs: on \ --executor '{execute_experiment}' \ --where '{n_nodes} == 1' - - name: Upload coverage to Codecov + - name: Test aggregate Command + run: | + ./bin/benchpark system init --dest=dane llnl-cluster cluster=dane + ./bin/benchpark experiment init --dest=saxpy-agg --system=dane saxpy n_repeats=5 + ./bin/benchpark setup saxpy-agg workspace2/ + ./workspace2/ramble/bin/ramble --workspace-dir /home/runner/work/benchpark/benchpark/workspace2/saxpy-agg/dane/workspace workspace setup --dry-run + BENCHPARK_RUN_COVERAGE=aggregate ./bin/benchpark aggregate --dest agg workspace2 # Aggregate multi-trial workspace + - name: Upload binary coverage to Codecov uses: codecov/codecov-action@v4 with: token: ${{ secrets.BENCHPARK_CODECOV_TOKEN }} @@ -140,6 +147,14 @@ jobs: flags: dryrunexperiments-binary verbose: true fail_ci_if_error: true + - name: Upload aggregate coverage to Codecov + uses: codecov/codecov-action@v4 + with: + token: ${{ secrets.BENCHPARK_CODECOV_TOKEN }} + directory: ./coverage-data-aggregate + flags: dryrunexperiments-aggregate + verbose: true + fail_ci_if_error: true dryrunexperiments: runs-on: ubuntu-24.04 strategy: diff --git a/codecov.yml b/codecov.yml index ebf9e1603..246a2b4c8 100644 --- a/codecov.yml +++ b/codecov.yml @@ -7,4 +7,4 @@ comment: require_changes: false require_base: false require_head: true - after_n_builds: 10 # Number of SEPARATE codecov uploads expected, e.g. "Upload coverage to Codecov" jobs + after_n_builds: 11 # Number of SEPARATE codecov uploads expected, e.g. "Upload coverage to Codecov" jobs diff --git a/docs/run-experiment.rst b/docs/run-experiment.rst index 500c35eb2..997e39154 100644 --- a/docs/run-experiment.rst +++ b/docs/run-experiment.rst @@ -48,4 +48,23 @@ the experiments benchpark would run in the second run. Generally, we would advis user to remove the ``$workspace/experiments`` directory before re-running the experiments using ``ramble --workspace-dir . on``. +Running Different Experiments in the Same Allocation +==================================================== + +The ``benchpark aggregate`` command provides the functionality to batch experiments from +the same workspace or multiple workspaces together. To use ``benchpark aggregate``, +provide a destination for the job script, and the workspaces as positional arguments: + +:: + + $ benchpark aggregate --dest agg/ workspace1/ workspace2/ ... + +Then, submit the newly created job script to the scheduler: + +:: + $ flux batch agg/0.sh + +Output from the experiments will still be written to the respective experiment +directory. + Once you have run your experiment you can try :doc:`analyze-experiment`. diff --git a/lib/benchpark/cmd/aggregate.py b/lib/benchpark/cmd/aggregate.py new file mode 100644 index 000000000..fc60eff51 --- /dev/null +++ b/lib/benchpark/cmd/aggregate.py @@ -0,0 +1,121 @@ +# Copyright 2025 Lawrence Livermore National Security, LLC +# and other Benchpark Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: Apache-2.0 + +import os +import re +from collections import defaultdict + +from benchpark.error import BenchparkError + + +def _make_aggregate_script(script_path, batch_lines, to_aggregate): + with open(script_path, "w") as f: + for line in batch_lines: + f.write(line.rstrip("\n") + "\n") + for experiment in to_aggregate: + f.write(os.path.abspath(experiment) + "\n") + + +def _collect_scheduler_options(experiment_script): + batch_patterns = [ + r"\s*#\s*(flux:.*$)", # flux-style header (kept verbatim) + r"\s*#SBATCH\s+(.*)$", # SLURM + ] + + batch_opts = [] + batch_lines = [] + + with open(experiment_script, "r") as f: + for line in f: + for p in batch_patterns: + m = re.match(p, line) + if m: + batch_opts.append(tuple(m.group(1).strip().split())) + batch_lines.append(line.strip()) + + return tuple(sorted(batch_opts)), batch_lines + + +def _collect_experiments(workspace_dir): + if not os.path.isdir(workspace_dir): + raise BenchparkError( + f"Workspace path does not exist or is not a directory: {workspace_dir}" + ) + + experiments = [] + skip_roots = {"spack", "ramble", "spack-packages"} + + for entry in os.listdir(workspace_dir): + entry_path = os.path.join(workspace_dir, entry) + if entry in skip_roots or not os.path.isdir(entry_path): + continue + + for dirpath, _dirnames, filenames in os.walk(entry_path): + for fname in filenames: + if fname == "execute_experiment": + experiments.append(os.path.join(dirpath, fname)) + + return experiments + + +def _aggregate(args): + output_dir = args.dest + workspaces = args.workspaces + + if not workspaces: + raise BenchparkError("No workspaces provided.") + + if os.path.exists(output_dir): + raise BenchparkError(f"Directory must not already exist: {output_dir}") + + experiments = [] + for ws in workspaces: + experiments.extend(_collect_experiments(ws)) + + if not experiments: + raise BenchparkError( + "No 'execute_experiment' scripts found in the given workspaces." + ) + + opts_to_request = {} + opts_to_scripts = defaultdict(list) + + for experiment_script in experiments: + batch_opts, batch_lines = _collect_scheduler_options(experiment_script) + if not batch_opts: + raise BenchparkError(f"Not expected: no batch opts in {experiment_script}") + + opts_to_scripts[batch_opts].append(experiment_script) + if batch_opts not in opts_to_request: + opts_to_request[batch_opts] = batch_lines + + os.mkdir(output_dir) + script_id = 0 + for opts_group, scripts in opts_to_scripts.items(): + script_path = os.path.join(output_dir, f"{script_id}.sh") + _make_aggregate_script(script_path, opts_to_request[opts_group], scripts) + script_id += 1 + + +def setup_parser(root_parser): + """ + Register arguments for `benchpark aggregate` directly (no subcommands). + Usage: + benchpark aggregate --dest OUTDIR WS1 [WS2 ...] + """ + root_parser.add_argument( + "--dest", + required=True, + help="Directory to generate aggregate scripts in", + ) + root_parser.add_argument( + "workspaces", + nargs="+", + help="One or more Benchpark workspace directories", + ) + + +def command(args): + _aggregate(args) diff --git a/lib/main.py b/lib/main.py index fc98b8322..a9bcb57fb 100755 --- a/lib/main.py +++ b/lib/main.py @@ -17,7 +17,7 @@ if "-V" in sys.argv or "--version" in sys.argv: print(__version__) exit() -helpstr = """usage: main.py [-h] [-V] {tags,system,experiment,setup,unit-test,audit,mirror,info,show-build,list,bootstrap,analyze,configure} ... +helpstr = """usage: main.py [-h] [-V] {tags,system,experiment,setup,unit-test,audit,mirror,info,show-build,list,bootstrap,analyze,aggregate,configure} ... Benchpark @@ -26,7 +26,7 @@ -V, --version show version number and exit Subcommands: - {tags,system,experiment,setup,unit-test,audit,mirror,info,show-build,list,bootstrap,analyze,configure} + {tags,system,experiment,setup,unit-test,audit,mirror,info,show-build,list,bootstrap,analyze,aggregate,configure} tags Tags in Benchpark experiments system Initialize a system config experiment Interact with experiments @@ -39,6 +39,7 @@ list List experiments, systems, benchmarks, and modifiers bootstrap Bootstrap benchpark or update an existing bootstrap analyze Perform pre-defined analysis on the performance data (caliper files) after 'ramble on' + aggregate Aggregate multiple experiments (even across workspaces) into the same submission script configure Configure options relating to the Benchpark environment """ if len(sys.argv) == 1 or "-h" == sys.argv[1] or "--help" == sys.argv[1]: @@ -64,6 +65,7 @@ bootstrapper = RuntimeResources(benchpark.paths.benchpark_home) # noqa bootstrapper.bootstrap() # noqa +import benchpark.cmd.aggregate # noqa: E402 import benchpark.cmd.audit # noqa: E402 import benchpark.cmd.bootstrap # noqa: E402 import benchpark.cmd.experiment # noqa: E402 @@ -240,6 +242,12 @@ def init_commands(subparsers, actions_dict): help="Perform pre-defined analysis on the performance data (caliper files) after 'ramble on'", ) + aggregate_parser = subparsers.add_parser( + "aggregate", + help="Aggregate multiple experiments (even across workspaces) into the same submission script", + ) + benchpark.cmd.aggregate.setup_parser(aggregate_parser) + actions_dict["system"] = benchpark.cmd.system.command actions_dict["experiment"] = benchpark.cmd.experiment.command actions_dict["setup"] = benchpark.cmd.setup.command @@ -250,6 +258,7 @@ def init_commands(subparsers, actions_dict): actions_dict["show-build"] = benchpark.cmd.show_build.command actions_dict["list"] = benchpark.cmd.list.command actions_dict["bootstrap"] = benchpark.cmd.bootstrap.command + actions_dict["aggregate"] = benchpark.cmd.aggregate.command if analyze_installed: benchpark.cmd.analyze.setup_parser(analyze_parser) actions_dict["analyze"] = benchpark.cmd.analyze.command