Benchmark (#364)

* add benchmark and benchmark workflow * fix calculation of delay from ranges without files * add init file * update target times * Update benchmark_sed.py Update target values * Update benchmark_sed.py Fix targets * update target * remove benchmark branch * move benchmark targets to yaml file, and add mechanism for update * stricter update rules * update pull request action, and trigger setting of new targets * Update benchmark targets * remove benchmark branch trigger
OpenCOMPES · Mar 20, 2024 · 2c18343 · 2c18343
1 parent ee5499e
commit 2c18343
Show file tree

Hide file tree

Showing 5 changed files with 235 additions and 1 deletion.
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -0,0 +1,56 @@
+name: benchmark
+
+# Triggers the workflow on push to the pullrequest-token branches, and to main
+on:
+    workflow_dispatch:
+    push:
+        branches: [ main, create-pull-request/patch ]
+
+jobs:
+  benchmark:
+    runs-on: ubuntu-latest
+    steps:
+      # Check out repo and set up Python
+      - name: Check out the repository
+        uses: actions/checkout@v4
+        with:
+          lfs: true
+
+      - uses: tibdex/github-app-token@v1
+        id: generate-token
+        with:
+          app_id: ${{ secrets.APP_ID }}
+          private_key: ${{ secrets.APP_PRIVATE_KEY }}
+
+      # Use cached python and dependencies, install poetry
+      - name: "Setup Python, Poetry and Dependencies"
+        uses: packetcoders/action-setup-cache-python-poetry@main
+        with:
+          python-version: 3.8
+          poetry-version: 1.2.2
+
+      # Run benchmakrs
+      - name: Run benchmarks on python 3.8
+        run: |
+          poetry run pytest --full-trace --show-capture=no -sv benchmarks/benchmark_*.py
+
+      - name: Obtain git status
+        id: status
+        run: |
+          exec 5>&1
+          STATUS=$(git status|tee >(cat - >&5))
+          echo "STATUS<<EOF" >> $GITHUB_OUTPUT
+          echo "$STATUS" >> $GITHUB_OUTPUT
+          echo "EOF" >> $GITHUB_OUTPUT
+
+      # create pull request if necessary
+      - name: "Create Pull Request"
+        uses: peter-evans/create-pull-request@v6
+        if: ${{ contains(steps.status.outputs.STATUS, 'benchmark_targets.yaml')}}
+        with:
+          token: ${{ steps.generate-token.outputs.token }}
+          commit-message: Update benchmark targets
+          title: "Update benchmark targets"
+          branch: "update_benchmark_targets"
+          body: |
+            Generated new benchmark targets.
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
diff --git a/benchmarks/benchmark_sed.py b/benchmarks/benchmark_sed.py
@@ -0,0 +1,173 @@
+"""This file contains code that performs benchmarks for the processor workflows
+"""
+import os
+import timeit
+from importlib.util import find_spec
+
+import dask
+import numpy as np
+import psutil
+
+from sed import SedProcessor
+from sed.binning.binning import bin_dataframe
+from sed.core.config import load_config
+from sed.core.config import save_config
+
+package_dir = os.path.dirname(find_spec("sed").origin)
+
+
+num_cores = min(20, psutil.cpu_count())
+# use fix random numbers for comparability
+np.random.seed(42)
+# 100 Billion events, ~ 3 GByte.
+n_pts = 100000000
+ranges = np.array([[0, 2048], [0, 2048], [60000, 120000], [2000, 20000]])
+axes = ["X", "Y", "t", "ADC"]
+array = (
+    dask.array.random.random((n_pts, len(ranges))) * (ranges[:, 1] - ranges[:, 0]) + ranges[:, 0]
+)
+dataframe = dask.dataframe.from_dask_array(array, columns=axes)
+
+
+targets = load_config(package_dir + "/../benchmarks/benchmark_targets.yaml")
+
+
+def test_binning_1d() -> None:
+    """Run a benchmark for 1d binning of artificial data"""
+    bins_ = [1000]
+    axes_ = ["t"]
+    ranges_ = [(60000, 120000)]
+    bin_dataframe(df=dataframe.copy(), bins=bins_, axes=axes_, ranges=ranges_, n_cores=num_cores)
+    command = (
+        "bin_dataframe(df=dataframe.copy(), bins=bins_, axes=axes_, "
+        "ranges=ranges_, n_cores=num_cores)"
+    )
+    timer = timeit.Timer(
+        command,
+        globals={**globals(), **locals()},
+    )
+    result = timer.repeat(5, number=1)
+    print(result)
+    assert min(result) < targets["binning_1d"]
+    # update targets if substantial improvement occurs
+    if np.mean(result) < 0.8 * targets["binning_1d"]:
+        print(f"Updating targets for 'binning_1d' to {float(np.mean(result) * 1.2)}")
+        targets["binning_1d"] = float(np.mean(result) * 1.2)
+        save_config(targets, package_dir + "/../benchmarks/benchmark_targets.yaml")
+
+
+def test_binning_4d() -> None:
+    """Run a benchmark for 4d binning of artificial data"""
+    bins_ = [100, 100, 100, 100]
+    axes_ = axes
+    ranges_ = [(0, 2048), (0, 2048), (60000, 120000), (2000, 20000)]
+    bin_dataframe(df=dataframe.copy(), bins=bins_, axes=axes_, ranges=ranges_, n_cores=num_cores)
+    command = (
+        "bin_dataframe(df=dataframe.copy(), bins=bins_, axes=axes_, "
+        "ranges=ranges_, n_cores=num_cores)"
+    )
+    timer = timeit.Timer(
+        command,
+        globals={**globals(), **locals()},
+    )
+    result = timer.repeat(5, number=1)
+    print(result)
+    assert min(result) < targets["binning_4d"]
+    # update targets if substantial improvement occurs
+    if np.mean(result) < 0.8 * targets["binning_4d"]:
+        print(f"Updating targets for 'binning_4d' to {float(np.mean(result) * 1.2)}")
+        targets["binning_4d"] = float(np.mean(result) * 1.2)
+        save_config(targets, package_dir + "/../benchmarks/benchmark_targets.yaml")
+
+
+def test_splinewarp() -> None:
+    """Run a benchmark for the generation of the inverse dfield correction"""
+    processor = SedProcessor(
+        dataframe=dataframe.copy(),
+        config=package_dir + "/config/mpes_example_config.yaml",
+        folder_config={},
+        user_config={},
+        system_config={},
+        verbose=True,
+    )
+    processor.apply_momentum_correction()
+    timer = timeit.Timer(
+        "processor.mc.dfield_updated=True; processor.apply_momentum_correction()",
+        globals={**globals(), **locals()},
+    )
+    result = timer.repeat(5, number=1)
+    print(result)
+    assert min(result) < targets["inv_dfield"]
+    # update targets if substantial improvement occurs
+    if np.mean(result) < 0.8 * targets["inv_dfield"]:
+        print(f"Updating targets for 'inv_dfield' to {float(np.mean(result) * 1.2)}")
+        targets["inv_dfield"] = float(np.mean(result) * 1.2)
+        save_config(targets, package_dir + "/../benchmarks/benchmark_targets.yaml")
+
+
+def test_workflow_1d() -> None:
+    """Run a benchmark for 1d binning of converted data"""
+    processor = SedProcessor(
+        dataframe=dataframe.copy(),
+        config=package_dir + "/config/mpes_example_config.yaml",
+        folder_config={},
+        user_config={},
+        system_config={},
+        verbose=True,
+    )
+    processor.add_jitter()
+    processor.apply_momentum_correction()
+    processor.apply_momentum_calibration()
+    processor.apply_energy_correction()
+    processor.append_energy_axis()
+    processor.calibrate_delay_axis(delay_range=(-500, 1500))
+    bins_ = [1000]
+    axes_ = ["energy"]
+    ranges_ = [(-10, 10)]
+    processor.compute(bins=bins_, axes=axes_, ranges=ranges_)
+    timer = timeit.Timer(
+        "processor.compute(bins=bins_, axes=axes_, ranges=ranges_)",
+        globals={**globals(), **locals()},
+    )
+    result = timer.repeat(5, number=1)
+    print(result)
+    assert min(result) < targets["workflow_1d"]
+    # update targets if substantial improvement occurs
+    if np.mean(result) < 0.8 * targets["workflow_1d"]:
+        print(f"Updating targets for 'workflow_1d' to {float(np.mean(result) * 1.2)}")
+        targets["workflow_1d"] = float(np.mean(result) * 1.2)
+        save_config(targets, package_dir + "/../benchmarks/benchmark_targets.yaml")
+
+
+def test_workflow_4d() -> None:
+    """Run a benchmark for 4d binning of converted data"""
+    processor = SedProcessor(
+        dataframe=dataframe.copy(),
+        config=package_dir + "/config/mpes_example_config.yaml",
+        folder_config={},
+        user_config={},
+        system_config={},
+        verbose=True,
+    )
+    processor.add_jitter()
+    processor.apply_momentum_correction()
+    processor.apply_momentum_calibration()
+    processor.apply_energy_correction()
+    processor.append_energy_axis()
+    processor.calibrate_delay_axis(delay_range=(-500, 1500))
+    bins_ = [100, 100, 100, 100]
+    axes_ = ["kx", "ky", "energy", "delay"]
+    ranges_ = [(-2, 2), (-2, 2), (-10, 10), (-1000, 1000)]
+    processor.compute(bins=bins_, axes=axes_, ranges=ranges_)
+    timer = timeit.Timer(
+        "processor.compute(bins=bins_, axes=axes_, ranges=ranges_)",
+        globals={**globals(), **locals()},
+    )
+    result = timer.repeat(5, number=1)
+    print(result)
+    assert min(result) < targets["workflow_4d"]
+    # update targets if substantial improvement occurs
+    if np.mean(result) < 0.8 * targets["workflow_4d"]:
+        print(f"Updating targets for 'workflow_4d' to {float(np.mean(result) * 1.2)}")
+        targets["workflow_4d"] = float(np.mean(result) * 1.2)
+        save_config(targets, package_dir + "/../benchmarks/benchmark_targets.yaml")
diff --git a/benchmarks/benchmark_targets.yaml b/benchmarks/benchmark_targets.yaml
@@ -0,0 +1,5 @@
+binning_1d: 3.1223518816799785
+binning_4d: 9.514051519199997
+inv_dfield: 7.265958606239991
+workflow_1d: 18.886161206160004
+workflow_4d: 22.608196924320012
diff --git a/sed/core/processor.py b/sed/core/processor.py
@@ -1705,7 +1705,7 @@ def calibrate_delay_axis(
             if verbose:
                 print("Adding delay column to dataframe:")
 
-            if datafile is None:
+            if delay_range is None and datafile is None:
                 if len(self.dc.calibration) == 0:
                     try:
                         datafile = self._files[0]