Skip to content

Commit

Permalink
Benchmark (#364)
Browse files Browse the repository at this point in the history
* add benchmark and benchmark workflow

* fix calculation of delay from ranges without files

* add init file

* update target times

* Update benchmark_sed.py

Update target values

* Update benchmark_sed.py

Fix targets

* update target

* remove benchmark branch

* move benchmark targets to yaml file, and add mechanism for update

* stricter update rules

* update pull request action, and trigger setting of new targets

* Update benchmark targets

* remove benchmark branch trigger
  • Loading branch information
rettigl committed Mar 20, 2024
1 parent ee5499e commit 2c18343
Show file tree
Hide file tree
Showing 5 changed files with 235 additions and 1 deletion.
56 changes: 56 additions & 0 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
name: benchmark

# Triggers the workflow on push to the pullrequest-token branches, and to main
on:
workflow_dispatch:
push:
branches: [ main, create-pull-request/patch ]

jobs:
benchmark:
runs-on: ubuntu-latest
steps:
# Check out repo and set up Python
- name: Check out the repository
uses: actions/checkout@v4
with:
lfs: true

- uses: tibdex/github-app-token@v1
id: generate-token
with:
app_id: ${{ secrets.APP_ID }}
private_key: ${{ secrets.APP_PRIVATE_KEY }}

# Use cached python and dependencies, install poetry
- name: "Setup Python, Poetry and Dependencies"
uses: packetcoders/action-setup-cache-python-poetry@main
with:
python-version: 3.8
poetry-version: 1.2.2

# Run benchmakrs
- name: Run benchmarks on python 3.8
run: |
poetry run pytest --full-trace --show-capture=no -sv benchmarks/benchmark_*.py
- name: Obtain git status
id: status
run: |
exec 5>&1
STATUS=$(git status|tee >(cat - >&5))
echo "STATUS<<EOF" >> $GITHUB_OUTPUT
echo "$STATUS" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
# create pull request if necessary
- name: "Create Pull Request"
uses: peter-evans/create-pull-request@v6
if: ${{ contains(steps.status.outputs.STATUS, 'benchmark_targets.yaml')}}
with:
token: ${{ steps.generate-token.outputs.token }}
commit-message: Update benchmark targets
title: "Update benchmark targets"
branch: "update_benchmark_targets"
body: |
Generated new benchmark targets.
Empty file added benchmarks/__init__.py
Empty file.
173 changes: 173 additions & 0 deletions benchmarks/benchmark_sed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
"""This file contains code that performs benchmarks for the processor workflows
"""
import os
import timeit
from importlib.util import find_spec

import dask
import numpy as np
import psutil

from sed import SedProcessor
from sed.binning.binning import bin_dataframe
from sed.core.config import load_config
from sed.core.config import save_config

package_dir = os.path.dirname(find_spec("sed").origin)


num_cores = min(20, psutil.cpu_count())
# use fix random numbers for comparability
np.random.seed(42)
# 100 Billion events, ~ 3 GByte.
n_pts = 100000000
ranges = np.array([[0, 2048], [0, 2048], [60000, 120000], [2000, 20000]])
axes = ["X", "Y", "t", "ADC"]
array = (
dask.array.random.random((n_pts, len(ranges))) * (ranges[:, 1] - ranges[:, 0]) + ranges[:, 0]
)
dataframe = dask.dataframe.from_dask_array(array, columns=axes)


targets = load_config(package_dir + "/../benchmarks/benchmark_targets.yaml")


def test_binning_1d() -> None:
"""Run a benchmark for 1d binning of artificial data"""
bins_ = [1000]
axes_ = ["t"]
ranges_ = [(60000, 120000)]
bin_dataframe(df=dataframe.copy(), bins=bins_, axes=axes_, ranges=ranges_, n_cores=num_cores)
command = (
"bin_dataframe(df=dataframe.copy(), bins=bins_, axes=axes_, "
"ranges=ranges_, n_cores=num_cores)"
)
timer = timeit.Timer(
command,
globals={**globals(), **locals()},
)
result = timer.repeat(5, number=1)
print(result)
assert min(result) < targets["binning_1d"]
# update targets if substantial improvement occurs
if np.mean(result) < 0.8 * targets["binning_1d"]:
print(f"Updating targets for 'binning_1d' to {float(np.mean(result) * 1.2)}")
targets["binning_1d"] = float(np.mean(result) * 1.2)
save_config(targets, package_dir + "/../benchmarks/benchmark_targets.yaml")


def test_binning_4d() -> None:
"""Run a benchmark for 4d binning of artificial data"""
bins_ = [100, 100, 100, 100]
axes_ = axes
ranges_ = [(0, 2048), (0, 2048), (60000, 120000), (2000, 20000)]
bin_dataframe(df=dataframe.copy(), bins=bins_, axes=axes_, ranges=ranges_, n_cores=num_cores)
command = (
"bin_dataframe(df=dataframe.copy(), bins=bins_, axes=axes_, "
"ranges=ranges_, n_cores=num_cores)"
)
timer = timeit.Timer(
command,
globals={**globals(), **locals()},
)
result = timer.repeat(5, number=1)
print(result)
assert min(result) < targets["binning_4d"]
# update targets if substantial improvement occurs
if np.mean(result) < 0.8 * targets["binning_4d"]:
print(f"Updating targets for 'binning_4d' to {float(np.mean(result) * 1.2)}")
targets["binning_4d"] = float(np.mean(result) * 1.2)
save_config(targets, package_dir + "/../benchmarks/benchmark_targets.yaml")


def test_splinewarp() -> None:
"""Run a benchmark for the generation of the inverse dfield correction"""
processor = SedProcessor(
dataframe=dataframe.copy(),
config=package_dir + "/config/mpes_example_config.yaml",
folder_config={},
user_config={},
system_config={},
verbose=True,
)
processor.apply_momentum_correction()
timer = timeit.Timer(
"processor.mc.dfield_updated=True; processor.apply_momentum_correction()",
globals={**globals(), **locals()},
)
result = timer.repeat(5, number=1)
print(result)
assert min(result) < targets["inv_dfield"]
# update targets if substantial improvement occurs
if np.mean(result) < 0.8 * targets["inv_dfield"]:
print(f"Updating targets for 'inv_dfield' to {float(np.mean(result) * 1.2)}")
targets["inv_dfield"] = float(np.mean(result) * 1.2)
save_config(targets, package_dir + "/../benchmarks/benchmark_targets.yaml")


def test_workflow_1d() -> None:
"""Run a benchmark for 1d binning of converted data"""
processor = SedProcessor(
dataframe=dataframe.copy(),
config=package_dir + "/config/mpes_example_config.yaml",
folder_config={},
user_config={},
system_config={},
verbose=True,
)
processor.add_jitter()
processor.apply_momentum_correction()
processor.apply_momentum_calibration()
processor.apply_energy_correction()
processor.append_energy_axis()
processor.calibrate_delay_axis(delay_range=(-500, 1500))
bins_ = [1000]
axes_ = ["energy"]
ranges_ = [(-10, 10)]
processor.compute(bins=bins_, axes=axes_, ranges=ranges_)
timer = timeit.Timer(
"processor.compute(bins=bins_, axes=axes_, ranges=ranges_)",
globals={**globals(), **locals()},
)
result = timer.repeat(5, number=1)
print(result)
assert min(result) < targets["workflow_1d"]
# update targets if substantial improvement occurs
if np.mean(result) < 0.8 * targets["workflow_1d"]:
print(f"Updating targets for 'workflow_1d' to {float(np.mean(result) * 1.2)}")
targets["workflow_1d"] = float(np.mean(result) * 1.2)
save_config(targets, package_dir + "/../benchmarks/benchmark_targets.yaml")


def test_workflow_4d() -> None:
"""Run a benchmark for 4d binning of converted data"""
processor = SedProcessor(
dataframe=dataframe.copy(),
config=package_dir + "/config/mpes_example_config.yaml",
folder_config={},
user_config={},
system_config={},
verbose=True,
)
processor.add_jitter()
processor.apply_momentum_correction()
processor.apply_momentum_calibration()
processor.apply_energy_correction()
processor.append_energy_axis()
processor.calibrate_delay_axis(delay_range=(-500, 1500))
bins_ = [100, 100, 100, 100]
axes_ = ["kx", "ky", "energy", "delay"]
ranges_ = [(-2, 2), (-2, 2), (-10, 10), (-1000, 1000)]
processor.compute(bins=bins_, axes=axes_, ranges=ranges_)
timer = timeit.Timer(
"processor.compute(bins=bins_, axes=axes_, ranges=ranges_)",
globals={**globals(), **locals()},
)
result = timer.repeat(5, number=1)
print(result)
assert min(result) < targets["workflow_4d"]
# update targets if substantial improvement occurs
if np.mean(result) < 0.8 * targets["workflow_4d"]:
print(f"Updating targets for 'workflow_4d' to {float(np.mean(result) * 1.2)}")
targets["workflow_4d"] = float(np.mean(result) * 1.2)
save_config(targets, package_dir + "/../benchmarks/benchmark_targets.yaml")
5 changes: 5 additions & 0 deletions benchmarks/benchmark_targets.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
binning_1d: 3.1223518816799785
binning_4d: 9.514051519199997
inv_dfield: 7.265958606239991
workflow_1d: 18.886161206160004
workflow_4d: 22.608196924320012
2 changes: 1 addition & 1 deletion sed/core/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1705,7 +1705,7 @@ def calibrate_delay_axis(
if verbose:
print("Adding delay column to dataframe:")

if datafile is None:
if delay_range is None and datafile is None:
if len(self.dc.calibration) == 0:
try:
datafile = self._files[0]
Expand Down

0 comments on commit 2c18343

Please sign in to comment.