Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Benchmark #364

Merged
merged 14 commits into from
Mar 20, 2024
29 changes: 29 additions & 0 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: benchmark
zain-sohail marked this conversation as resolved.
Show resolved Hide resolved

# Triggers the workflow on push to the pullrequest-token branches, and to main
on:
workflow_dispatch:
push:
branches: [ main, create-pull-request/patch ]

jobs:
benchmark:
runs-on: ubuntu-latest
steps:
# Check out repo and set up Python
- name: Check out the repository
uses: actions/checkout@v4
with:
lfs: true

# Use cached python and dependencies, install poetry
- name: "Setup Python, Poetry and Dependencies"
uses: packetcoders/action-setup-cache-python-poetry@main
with:
python-version: 3.8
poetry-version: 1.2.2

# Run benchmakrs
- name: Run benchmakrs on python 3.8
run: |
poetry run pytest --full-trace --show-capture=no -sv benchmarks/benchmark_*.py
Empty file added benchmarks/__init__.py
Empty file.
150 changes: 150 additions & 0 deletions benchmarks/benchmark_sed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
"""This file contains code that performs benchmarks for the processor workflows
"""
import os
import timeit
from importlib.util import find_spec

import dask
import numpy as np
import psutil

from sed import SedProcessor
from sed.binning.binning import bin_dataframe

package_dir = os.path.dirname(find_spec("sed").origin)


num_cores = min(20, psutil.cpu_count())
# use fix random numbers for comparability
np.random.seed(42)
# 100 Billion events, ~ 3 GByte.
n_pts = 100000000
ranges = np.array([[0, 2048], [0, 2048], [60000, 120000], [2000, 20000]])
axes = ["X", "Y", "t", "ADC"]
array = (
dask.array.random.random((n_pts, len(ranges))) * (ranges[:, 1] - ranges[:, 0]) + ranges[:, 0]
)
dataframe = dask.dataframe.from_dask_array(array, columns=axes)


target_artificial_1d = 2.68
zain-sohail marked this conversation as resolved.
Show resolved Hide resolved
target_artificial_4d = 8.15
target_inv_dfield = 6.1
target_binning_1d = 16.6
target_binning_4d = 22.0


def test_artificial_1d() -> None:
"""Run a benchmark for 1d binning of artificial data"""
bins_ = [1000]
axes_ = ["t"]
ranges_ = [(60000, 120000)]
bin_dataframe(df=dataframe.copy(), bins=bins_, axes=axes_, ranges=ranges_, n_cores=num_cores)
command = (
"bin_dataframe(df=dataframe.copy(), bins=bins_, axes=axes_, "
"ranges=ranges_, n_cores=num_cores)"
)
timer = timeit.Timer(
command,
globals={**globals(), **locals()},
)
result = timer.repeat(5, number=1)
print(result)
assert min(result) < target_artificial_1d


def test_artificial_4d() -> None:
"""Run a benchmark for 4d binning of artificial data"""
bins_ = [100, 100, 100, 100]
axes_ = axes
ranges_ = [(0, 2048), (0, 2048), (60000, 120000), (2000, 20000)]
bin_dataframe(df=dataframe.copy(), bins=bins_, axes=axes_, ranges=ranges_, n_cores=num_cores)
command = (
"bin_dataframe(df=dataframe.copy(), bins=bins_, axes=axes_, "
"ranges=ranges_, n_cores=num_cores)"
)
timer = timeit.Timer(
command,
globals={**globals(), **locals()},
)
result = timer.repeat(5, number=1)
print(result)
assert min(result) < target_artificial_4d


def test_splinewarp() -> None:
"""Run a benchmark for the generation of the inverse dfield correction"""
processor = SedProcessor(
dataframe=dataframe.copy(),
config=package_dir + "/config/mpes_example_config.yaml",
folder_config={},
user_config={},
system_config={},
verbose=True,
)
processor.apply_momentum_correction()
timer = timeit.Timer(
"processor.mc.dfield_updated=True; processor.apply_momentum_correction()",
globals={**globals(), **locals()},
)
result = timer.repeat(5, number=1)
print(result)
assert min(result) < target_inv_dfield


def test_workflow_1d() -> None:
"""Run a benchmark for 1d binning of converted data"""
processor = SedProcessor(
dataframe=dataframe.copy(),
config=package_dir + "/config/mpes_example_config.yaml",
folder_config={},
user_config={},
system_config={},
verbose=True,
)
processor.add_jitter()
processor.apply_momentum_correction()
processor.apply_momentum_calibration()
processor.apply_energy_correction()
processor.append_energy_axis()
processor.calibrate_delay_axis(delay_range=(-500, 1500))
bins_ = [1000]
axes_ = ["energy"]
ranges_ = [(-10, 10)]
processor.compute(bins=bins_, axes=axes_, ranges=ranges_)
timer = timeit.Timer(
"processor.compute(bins=bins_, axes=axes_, ranges=ranges_)",
globals={**globals(), **locals()},
)
result = timer.repeat(5, number=1)
print(result)
assert min(result) < target_binning_1d


def test_workflow_4d() -> None:
"""Run a benchmark for 4d binning of converted data"""
processor = SedProcessor(
dataframe=dataframe.copy(),
config=package_dir + "/config/mpes_example_config.yaml",
folder_config={},
user_config={},
system_config={},
verbose=True,
)
processor.add_jitter()
processor.apply_momentum_correction()
processor.apply_momentum_calibration()
processor.apply_energy_correction()
processor.append_energy_axis()
processor.calibrate_delay_axis(delay_range=(-500, 1500))
bins_ = [100, 100, 100, 100]
axes_ = ["kx", "ky", "energy", "delay"]
ranges_ = [(-2, 2), (-2, 2), (-10, 10), (-1000, 1000)]
processor.compute(bins=bins_, axes=axes_, ranges=ranges_)
timer = timeit.Timer(
"processor.compute(bins=bins_, axes=axes_, ranges=ranges_)",
globals={**globals(), **locals()},
)
result = timer.repeat(5, number=1)
print(result)
assert min(result) < target_binning_4d
2 changes: 1 addition & 1 deletion sed/core/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1705,7 +1705,7 @@ def calibrate_delay_axis(
if verbose:
print("Adding delay column to dataframe:")

if datafile is None:
if delay_range is None and datafile is None:
if len(self.dc.calibration) == 0:
try:
datafile = self._files[0]
Expand Down
Loading