LLNL · michaelmckinsey1 · Jun 27, 2025 · Jun 27, 2025 · Jun 28, 2025 · Jun 30, 2025
diff --git a/.github/workflows/run.yml b/.github/workflows/run.yml
@@ -132,14 +132,29 @@ jobs:
             on \
             --executor '{execute_experiment}' \
             --where '{n_nodes} == 1'
-      - name: Upload coverage to Codecov
+      - name: Test aggregate Command
+        run: |
+          ./bin/benchpark system init --dest=dane llnl-cluster cluster=dane
+          ./bin/benchpark experiment init --dest=saxpy-agg --system=dane saxpy n_repeats=5
+          ./bin/benchpark setup saxpy-agg workspace2/
+          ./workspace2/ramble/bin/ramble --workspace-dir /home/runner/work/benchpark/benchpark/workspace2/saxpy-agg/dane/workspace workspace setup --dry-run
+          BENCHPARK_RUN_COVERAGE=aggregate ./bin/benchpark aggregate --dest agg workspace2  # Aggregate multi-trial workspace
+      - name: Upload binary coverage to Codecov
         uses: codecov/codecov-action@v4
         with:
           token: ${{ secrets.BENCHPARK_CODECOV_TOKEN }}
           directory: ./coverage-data-binary
           flags: dryrunexperiments-binary
           verbose: true
           fail_ci_if_error: true
+      - name: Upload aggregate coverage to Codecov
+        uses: codecov/codecov-action@v4
+        with:
+          token: ${{ secrets.BENCHPARK_CODECOV_TOKEN }}
+          directory: ./coverage-data-aggregate
+          flags: dryrunexperiments-aggregate
+          verbose: true
+          fail_ci_if_error: true
   dryrunexperiments:
     runs-on: ubuntu-24.04
     strategy:

diff --git a/codecov.yml b/codecov.yml
@@ -7,4 +7,4 @@ comment:
   require_changes: false
   require_base: false
   require_head: true
-  after_n_builds: 10  # Number of SEPARATE codecov uploads expected, e.g. "Upload coverage to Codecov" jobs
+  after_n_builds: 11  # Number of SEPARATE codecov uploads expected, e.g. "Upload coverage to Codecov" jobs
diff --git a/docs/run-experiment.rst b/docs/run-experiment.rst
@@ -48,4 +48,23 @@ the experiments benchpark would run in the second run. Generally, we would advis
 user to remove the ``$workspace/experiments`` directory before re-running the
 experiments using ``ramble --workspace-dir . on``.
 
+Running Different Experiments in the Same Allocation
+====================================================
+
+The ``benchpark aggregate`` command provides the functionality to batch experiments from
+the same workspace or multiple workspaces together. To use ``benchpark aggregate``,
+provide a destination for the job script, and the workspaces as positional arguments:
+
+::
+
+    $ benchpark aggregate --dest agg/ workspace1/ workspace2/ ...
+
+Then, submit the newly created job script to the scheduler:
+
+::
+    $ flux batch agg/0.sh
+
+Output from the experiments will still be written to the respective experiment
+directory.
+
 Once you have run your experiment you can try :doc:`analyze-experiment`.
diff --git a/lib/benchpark/cmd/aggregate.py b/lib/benchpark/cmd/aggregate.py
@@ -0,0 +1,121 @@
+# Copyright 2025 Lawrence Livermore National Security, LLC
+# and other Benchpark Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import re
+from collections import defaultdict
+
+from benchpark.error import BenchparkError
+
+
+def _make_aggregate_script(script_path, batch_lines, to_aggregate):
+    with open(script_path, "w") as f:
+        for line in batch_lines:
+            f.write(line.rstrip("\n") + "\n")
+        for experiment in to_aggregate:
+            f.write(os.path.abspath(experiment) + "\n")
+
+
+def _collect_scheduler_options(experiment_script):
+    batch_patterns = [
+        r"\s*#\s*(flux:.*$)",  # flux-style header (kept verbatim)
+        r"\s*#SBATCH\s+(.*)$",  # SLURM
+    ]
+
+    batch_opts = []
+    batch_lines = []
+
+    with open(experiment_script, "r") as f:
+        for line in f:
+            for p in batch_patterns:
+                m = re.match(p, line)
+                if m:
+                    batch_opts.append(tuple(m.group(1).strip().split()))
+                    batch_lines.append(line.strip())
+
+    return tuple(sorted(batch_opts)), batch_lines
+
+
+def _collect_experiments(workspace_dir):
+    if not os.path.isdir(workspace_dir):
+        raise BenchparkError(
+            f"Workspace path does not exist or is not a directory: {workspace_dir}"
+        )
+
+    experiments = []
+    skip_roots = {"spack", "ramble", "spack-packages"}
+
+    for entry in os.listdir(workspace_dir):
+        entry_path = os.path.join(workspace_dir, entry)
+        if entry in skip_roots or not os.path.isdir(entry_path):
+            continue
+
+        for dirpath, _dirnames, filenames in os.walk(entry_path):
+            for fname in filenames:
+                if fname == "execute_experiment":
+                    experiments.append(os.path.join(dirpath, fname))
+
+    return experiments
+
+
+def _aggregate(args):
+    output_dir = args.dest
+    workspaces = args.workspaces
+
+    if not workspaces:
+        raise BenchparkError("No workspaces provided.")
+
+    if os.path.exists(output_dir):
+        raise BenchparkError(f"Directory must not already exist: {output_dir}")
+
+    experiments = []
+    for ws in workspaces:
+        experiments.extend(_collect_experiments(ws))
+
+    if not experiments:
+        raise BenchparkError(
+            "No 'execute_experiment' scripts found in the given workspaces."
+        )
+
+    opts_to_request = {}
+    opts_to_scripts = defaultdict(list)
+
+    for experiment_script in experiments:
+        batch_opts, batch_lines = _collect_scheduler_options(experiment_script)
+        if not batch_opts:
+            raise BenchparkError(f"Not expected: no batch opts in {experiment_script}")
+
+        opts_to_scripts[batch_opts].append(experiment_script)
+        if batch_opts not in opts_to_request:
+            opts_to_request[batch_opts] = batch_lines
+
+    os.mkdir(output_dir)
+    script_id = 0
+    for opts_group, scripts in opts_to_scripts.items():
+        script_path = os.path.join(output_dir, f"{script_id}.sh")
+        _make_aggregate_script(script_path, opts_to_request[opts_group], scripts)
+        script_id += 1
+
+
+def setup_parser(root_parser):
+    """
+    Register arguments for `benchpark aggregate` directly (no subcommands).
+    Usage:
+        benchpark aggregate --dest OUTDIR WS1 [WS2 ...]
+    """
+    root_parser.add_argument(
+        "--dest",
+        required=True,
+        help="Directory to generate aggregate scripts in",
+    )
+    root_parser.add_argument(
+        "workspaces",
+        nargs="+",
+        help="One or more Benchpark workspace directories",
+    )
+
+
+def command(args):
+    _aggregate(args)
diff --git a/lib/main.py b/lib/main.py
@@ -17,7 +17,7 @@
 if "-V" in sys.argv or "--version" in sys.argv:
     print(__version__)
     exit()
-helpstr = """usage: main.py [-h] [-V] {tags,system,experiment,setup,unit-test,audit,mirror,info,show-build,list,bootstrap,analyze,configure} ...
+helpstr = """usage: main.py [-h] [-V] {tags,system,experiment,setup,unit-test,audit,mirror,info,show-build,list,bootstrap,analyze,aggregate,configure} ...
 
 Benchpark
 
@@ -26,7 +26,7 @@
   -V, --version         show version number and exit
 
 Subcommands:
-  {tags,system,experiment,setup,unit-test,audit,mirror,info,show-build,list,bootstrap,analyze,configure}
+  {tags,system,experiment,setup,unit-test,audit,mirror,info,show-build,list,bootstrap,analyze,aggregate,configure}
     tags                Tags in Benchpark experiments
     system              Initialize a system config
     experiment          Interact with experiments
@@ -39,6 +39,7 @@
     list                List experiments, systems, benchmarks, and modifiers
     bootstrap           Bootstrap benchpark or update an existing bootstrap
     analyze             Perform pre-defined analysis on the performance data (caliper files) after 'ramble on'
+    aggregate           Aggregate multiple experiments (even across workspaces) into the same submission script
     configure           Configure options relating to the Benchpark environment
     """
 if len(sys.argv) == 1 or "-h" == sys.argv[1] or "--help" == sys.argv[1]:
@@ -64,6 +65,7 @@
 bootstrapper = RuntimeResources(benchpark.paths.benchpark_home)  # noqa
 bootstrapper.bootstrap()  # noqa
 
+import benchpark.cmd.aggregate  # noqa: E402
 import benchpark.cmd.audit  # noqa: E402
 import benchpark.cmd.bootstrap  # noqa: E402
 import benchpark.cmd.experiment  # noqa: E402
@@ -240,6 +242,12 @@ def init_commands(subparsers, actions_dict):
         help="Perform pre-defined analysis on the performance data (caliper files) after 'ramble on'",
     )
 
+    aggregate_parser = subparsers.add_parser(
+        "aggregate",
+        help="Aggregate multiple experiments (even across workspaces) into the same submission script",
+    )
+    benchpark.cmd.aggregate.setup_parser(aggregate_parser)
+
     actions_dict["system"] = benchpark.cmd.system.command
     actions_dict["experiment"] = benchpark.cmd.experiment.command
     actions_dict["setup"] = benchpark.cmd.setup.command
@@ -250,6 +258,7 @@ def init_commands(subparsers, actions_dict):
     actions_dict["show-build"] = benchpark.cmd.show_build.command
     actions_dict["list"] = benchpark.cmd.list.command
     actions_dict["bootstrap"] = benchpark.cmd.bootstrap.command
+    actions_dict["aggregate"] = benchpark.cmd.aggregate.command
     if analyze_installed:
         benchpark.cmd.analyze.setup_parser(analyze_parser)
         actions_dict["analyze"] = benchpark.cmd.analyze.command