Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Benchmark #364

Merged
merged 14 commits into from
Mar 20, 2024
56 changes: 56 additions & 0 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
name: benchmark
zain-sohail marked this conversation as resolved.
Show resolved Hide resolved

# Triggers the workflow on push to the pullrequest-token branches, and to main
on:
workflow_dispatch:
push:
branches: [ main, create-pull-request/patch ]

jobs:
benchmark:
runs-on: ubuntu-latest
steps:
# Check out repo and set up Python
- name: Check out the repository
uses: actions/checkout@v4
with:
lfs: true

- uses: tibdex/github-app-token@v1
id: generate-token
with:
app_id: ${{ secrets.APP_ID }}
private_key: ${{ secrets.APP_PRIVATE_KEY }}

# Use cached python and dependencies, install poetry
- name: "Setup Python, Poetry and Dependencies"
uses: packetcoders/action-setup-cache-python-poetry@main
with:
python-version: 3.8
poetry-version: 1.2.2

# Run benchmakrs
- name: Run benchmarks on python 3.8
run: |
poetry run pytest --full-trace --show-capture=no -sv benchmarks/benchmark_*.py

- name: Obtain git status
id: status
run: |
exec 5>&1
STATUS=$(git status|tee >(cat - >&5))
echo "STATUS<<EOF" >> $GITHUB_OUTPUT
echo "$STATUS" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT

# create pull request if necessary
- name: "Create Pull Request"
uses: peter-evans/create-pull-request@v6
if: ${{ contains(steps.status.outputs.STATUS, 'benchmark_targets.yaml')}}
with:
token: ${{ steps.generate-token.outputs.token }}
commit-message: Update benchmark targets
title: "Update benchmark targets"
branch: "update_benchmark_targets"
body: |
Generated new benchmark targets.
Empty file added benchmarks/__init__.py
Empty file.
173 changes: 173 additions & 0 deletions benchmarks/benchmark_sed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
"""This file contains code that performs benchmarks for the processor workflows
"""
import os
import timeit
from importlib.util import find_spec

import dask
import numpy as np
import psutil

from sed import SedProcessor
from sed.binning.binning import bin_dataframe
from sed.core.config import load_config
from sed.core.config import save_config

package_dir = os.path.dirname(find_spec("sed").origin)


num_cores = min(20, psutil.cpu_count())
# use fix random numbers for comparability
np.random.seed(42)
# 100 Billion events, ~ 3 GByte.
n_pts = 100000000
ranges = np.array([[0, 2048], [0, 2048], [60000, 120000], [2000, 20000]])
axes = ["X", "Y", "t", "ADC"]
array = (
dask.array.random.random((n_pts, len(ranges))) * (ranges[:, 1] - ranges[:, 0]) + ranges[:, 0]
)
dataframe = dask.dataframe.from_dask_array(array, columns=axes)


targets = load_config(package_dir + "/../benchmarks/benchmark_targets.yaml")


def test_binning_1d() -> None:
"""Run a benchmark for 1d binning of artificial data"""
bins_ = [1000]
axes_ = ["t"]
ranges_ = [(60000, 120000)]
bin_dataframe(df=dataframe.copy(), bins=bins_, axes=axes_, ranges=ranges_, n_cores=num_cores)
command = (
"bin_dataframe(df=dataframe.copy(), bins=bins_, axes=axes_, "
"ranges=ranges_, n_cores=num_cores)"
)
timer = timeit.Timer(
command,
globals={**globals(), **locals()},
)
result = timer.repeat(5, number=1)
print(result)
assert min(result) < targets["binning_1d"]
# update targets if substantial improvement occurs
if np.mean(result) < 0.8 * targets["binning_1d"]:
print(f"Updating targets for 'binning_1d' to {float(np.mean(result) * 1.2)}")
targets["binning_1d"] = float(np.mean(result) * 1.2)
save_config(targets, package_dir + "/../benchmarks/benchmark_targets.yaml")


def test_binning_4d() -> None:
"""Run a benchmark for 4d binning of artificial data"""
bins_ = [100, 100, 100, 100]
axes_ = axes
ranges_ = [(0, 2048), (0, 2048), (60000, 120000), (2000, 20000)]
bin_dataframe(df=dataframe.copy(), bins=bins_, axes=axes_, ranges=ranges_, n_cores=num_cores)
command = (
"bin_dataframe(df=dataframe.copy(), bins=bins_, axes=axes_, "
"ranges=ranges_, n_cores=num_cores)"
)
timer = timeit.Timer(
command,
globals={**globals(), **locals()},
)
result = timer.repeat(5, number=1)
print(result)
assert min(result) < targets["binning_4d"]
# update targets if substantial improvement occurs
if np.mean(result) < 0.8 * targets["binning_4d"]:
print(f"Updating targets for 'binning_4d' to {float(np.mean(result) * 1.2)}")
targets["binning_4d"] = float(np.mean(result) * 1.2)
save_config(targets, package_dir + "/../benchmarks/benchmark_targets.yaml")


def test_splinewarp() -> None:
"""Run a benchmark for the generation of the inverse dfield correction"""
processor = SedProcessor(
dataframe=dataframe.copy(),
config=package_dir + "/config/mpes_example_config.yaml",
folder_config={},
user_config={},
system_config={},
verbose=True,
)
processor.apply_momentum_correction()
timer = timeit.Timer(
"processor.mc.dfield_updated=True; processor.apply_momentum_correction()",
globals={**globals(), **locals()},
)
result = timer.repeat(5, number=1)
print(result)
assert min(result) < targets["inv_dfield"]
# update targets if substantial improvement occurs
if np.mean(result) < 0.8 * targets["inv_dfield"]:
print(f"Updating targets for 'inv_dfield' to {float(np.mean(result) * 1.2)}")
targets["inv_dfield"] = float(np.mean(result) * 1.2)
save_config(targets, package_dir + "/../benchmarks/benchmark_targets.yaml")


def test_workflow_1d() -> None:
"""Run a benchmark for 1d binning of converted data"""
processor = SedProcessor(
dataframe=dataframe.copy(),
config=package_dir + "/config/mpes_example_config.yaml",
folder_config={},
user_config={},
system_config={},
verbose=True,
)
processor.add_jitter()
processor.apply_momentum_correction()
processor.apply_momentum_calibration()
processor.apply_energy_correction()
processor.append_energy_axis()
processor.calibrate_delay_axis(delay_range=(-500, 1500))
bins_ = [1000]
axes_ = ["energy"]
ranges_ = [(-10, 10)]
processor.compute(bins=bins_, axes=axes_, ranges=ranges_)
timer = timeit.Timer(
"processor.compute(bins=bins_, axes=axes_, ranges=ranges_)",
globals={**globals(), **locals()},
)
result = timer.repeat(5, number=1)
print(result)
assert min(result) < targets["workflow_1d"]
# update targets if substantial improvement occurs
if np.mean(result) < 0.8 * targets["workflow_1d"]:
print(f"Updating targets for 'workflow_1d' to {float(np.mean(result) * 1.2)}")
targets["workflow_1d"] = float(np.mean(result) * 1.2)
save_config(targets, package_dir + "/../benchmarks/benchmark_targets.yaml")


def test_workflow_4d() -> None:
"""Run a benchmark for 4d binning of converted data"""
processor = SedProcessor(
dataframe=dataframe.copy(),
config=package_dir + "/config/mpes_example_config.yaml",
folder_config={},
user_config={},
system_config={},
verbose=True,
)
processor.add_jitter()
processor.apply_momentum_correction()
processor.apply_momentum_calibration()
processor.apply_energy_correction()
processor.append_energy_axis()
processor.calibrate_delay_axis(delay_range=(-500, 1500))
bins_ = [100, 100, 100, 100]
axes_ = ["kx", "ky", "energy", "delay"]
ranges_ = [(-2, 2), (-2, 2), (-10, 10), (-1000, 1000)]
processor.compute(bins=bins_, axes=axes_, ranges=ranges_)
timer = timeit.Timer(
"processor.compute(bins=bins_, axes=axes_, ranges=ranges_)",
globals={**globals(), **locals()},
)
result = timer.repeat(5, number=1)
print(result)
assert min(result) < targets["workflow_4d"]
# update targets if substantial improvement occurs
if np.mean(result) < 0.8 * targets["workflow_4d"]:
print(f"Updating targets for 'workflow_4d' to {float(np.mean(result) * 1.2)}")
targets["workflow_4d"] = float(np.mean(result) * 1.2)
save_config(targets, package_dir + "/../benchmarks/benchmark_targets.yaml")
5 changes: 5 additions & 0 deletions benchmarks/benchmark_targets.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
binning_1d: 3.1223518816799785
binning_4d: 9.514051519199997
inv_dfield: 7.265958606239991
workflow_1d: 18.886161206160004
workflow_4d: 22.608196924320012
2 changes: 1 addition & 1 deletion sed/core/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1705,7 +1705,7 @@ def calibrate_delay_axis(
if verbose:
print("Adding delay column to dataframe:")

if datafile is None:
if delay_range is None and datafile is None:
if len(self.dc.calibration) == 0:
try:
datafile = self._files[0]
Expand Down
Loading