Skip to content

Commit

Permalink
libpython: Add helper library for benchmarking (#1670)
Browse files Browse the repository at this point in the history
An experimental collection of simple functions to help benchmarking
and reduce code duplication between benchmarks.

The design ideas are: be minimalist, pragmatic with no API promises,
but provide convenient functions for writing benchmarking scripts.
The functions are meant for developers tracking the latest development version.

The plotting functions can be imported with missing dependencies for convenience,
but running them requires Matplotlib.

Co-authored-by: Aaron Saw Min Sern <aaronsms@u.nus.edu>
  • Loading branch information
wenzeslaus and aaronsms committed Jul 6, 2021
1 parent ca15512 commit e334471
Show file tree
Hide file tree
Showing 6 changed files with 336 additions and 1 deletion.
2 changes: 1 addition & 1 deletion python/grass/Makefile
Expand Up @@ -5,7 +5,7 @@ include $(MODULE_TOPDIR)/include/Make/Python.make

PYDIR = $(ETC)/python/grass

SUBDIRS = app exceptions script ctypes grassdb temporal pygrass pydispatch imaging gunittest bandref jupyter
SUBDIRS = app benchmark exceptions script ctypes grassdb temporal pygrass pydispatch imaging gunittest bandref jupyter

default: $(PYDIR)/__init__.py
$(MAKE) subdirs
Expand Down
19 changes: 19 additions & 0 deletions python/grass/benchmark/Makefile
@@ -0,0 +1,19 @@
MODULE_TOPDIR = ../../..

include $(MODULE_TOPDIR)/include/Make/Other.make
include $(MODULE_TOPDIR)/include/Make/Python.make

DSTDIR = $(ETC)/python/grass/benchmark

MODULES = runners plots

PYFILES := $(patsubst %,$(DSTDIR)/%.py,$(MODULES) __init__)
PYCFILES := $(patsubst %,$(DSTDIR)/%.pyc,$(MODULES) __init__)

default: $(PYFILES) $(PYCFILES)

$(DSTDIR):
$(MKDIR) $@

$(DSTDIR)/%: % | $(DSTDIR)
$(INSTALL_DATA) $< $@
9 changes: 9 additions & 0 deletions python/grass/benchmark/__init__.py
@@ -0,0 +1,9 @@
"""Benchmarking for GRASS GIS modules
This subpackage of the grass package is experimental and the API can change anytime.
The API of the package is defined by what is imported in the top-level ``__init__.py``
file of the subpackage.
"""

from .plots import nprocs_plot, num_cells_plot
from .runners import benchmark_nprocs, benchmark_resolutions
122 changes: 122 additions & 0 deletions python/grass/benchmark/plots.py
@@ -0,0 +1,122 @@
# MODULE: grass.benchmark
#
# AUTHOR(S): Vaclav Petras <wenzeslaus gmail com>
#
# PURPOSE: Benchmarking for GRASS GIS modules
#
# COPYRIGHT: (C) 2021 Vaclav Petras, and by the GRASS Development Team
#
# This program is free software under the GNU General Public
# License (>=v2). Read the file COPYING that comes with GRASS
# for details.


"""Plotting functionality for benchmark results"""


def get_pyplot(to_file):
"""Get pyplot from matplotlib
Lazy import to easily run code importing this function on limited installations.
Only actual call to this function requires matplotlib.
The *to_file* parameter can be set to True to avoid tkinter dependency
if the interactive show method is not needed.
"""
import matplotlib # pylint: disable=import-outside-toplevel

if to_file:
backend = "agg"
else:
backend = None
if backend:
matplotlib.use(backend)

import matplotlib.pyplot as plt # pylint: disable=import-outside-toplevel

return plt


def nprocs_plot(results, filename=None):
"""Plot results from a multiple nprocs (thread) benchmarks.
*results* is a list of individual results from separate benchmars.
One result is required to have attributes: *nprocs*, *times*, *label*.
The *nprocs* attribute is a list of all processing elements
(cores, threads, processes) used in the benchmark.
The *times* attribute is a list of corresponding times for each value
from the *nprocs* list.
The *label* attribute identifies the benchmark in the legend.
Optionally, result can have an *all_times* attribute which is a list
of lists. One sublist is all times recorded for each value of nprocs.
Each result can come with a different list of nprocs, i.e., benchmarks
which used different values for nprocs can be combined in one plot.
"""
plt = get_pyplot(to_file=bool(filename))
axes = plt.gca()

x_ticks = set() # gather x values
for result in results:
x = result.nprocs
x_ticks.update(x)
plt.plot(x, result.times, label=result.label)
if hasattr(result, "all_times"):
mins = [min(i) for i in result.all_times]
maxes = [max(i) for i in result.all_times]
plt.fill_between(x, mins, maxes, color="gray", alpha=0.3)
plt.legend()
axes.set(xticks=sorted(x_ticks))
plt.xlabel("Number of cores (threads, processes)")
plt.ylabel("Time [s]")
if filename:
plt.savefig(filename)
else:
plt.show()


def num_cells_plot(results, filename=None, show_resolution=False):
"""Plot results from a multiple raster grid size benchmarks.
*results* is a list of individual results from separate benchmars
with one result being similar to the :func:`nprocs_plot` function.
The result is required to have *times* and *label* attributes
and may have an *all_times* attribute.
Further, it is required to have *cells* attribute, or,
when ``show_resolution=True``, it needs to have a *resolutions* attribute.
Each result can come with a different list of nprocs, i.e., benchmarks
which used different values for nprocs can be combined in one plot.
"""
plt = get_pyplot(to_file=bool(filename))
axes = plt.gca()
if show_resolution:
axes.invert_xaxis()

x_ticks = set()
for result in results:
if show_resolution:
x = result.resolutions
else:
x = result.cells
x_ticks.update(x)
plt.plot(x, result.times, label=result.label)
if hasattr(result, "all_times"):
mins = [min(i) for i in result.all_times]
maxes = [max(i) for i in result.all_times]
plt.fill_between(x, mins, maxes, color="gray", alpha=0.3)

plt.legend()
axes.set(xticks=sorted(x_ticks))
if not show_resolution:
axes.ticklabel_format(axis="x", style="scientific", scilimits=(0, 0))
if show_resolution:
plt.xlabel("Resolution [map units]")
else:
plt.xlabel("Number of cells")
plt.ylabel("Time [s]")
if filename:
plt.savefig(filename)
else:
plt.show()
129 changes: 129 additions & 0 deletions python/grass/benchmark/runners.py
@@ -0,0 +1,129 @@
# MODULE: grass.benchmark
#
# AUTHOR(S): Aaron Saw Min Sern <aaronsms u nus edu>
# Vaclav Petras <wenzeslaus gmail com>
#
# PURPOSE: Benchmarking for GRASS GIS modules
#
# COPYRIGHT: (C) 2021 Vaclav Petras, and by the GRASS Development Team
#
# This program is free software under the GNU General Public
# License (>=v2). Read the file COPYING that comes with GRASS
# for details.


"""Basic functions for benchmarking modules"""

import shutil
from types import SimpleNamespace

import grass.script as gs


def benchmark_nprocs(module, label, max_nprocs, repeat):
"""Benchmark module using values of nprocs up to *max_nprocs*.
*module* is an instance of PyGRASS Module class.
The module is executed used to generate range of values from 1 up to *max_nprocs*.
*repeat* sets how many times the each run is repeated.
So, the module will run ``max_nprocs * repeat`` times.
*label* is a text to add to the result (for user-facing display).
Optional *nprocs* is passed to the module if present.
Returns an object with attributes *times* (list of average execution times),
*all_times* (list of lists of measured execution times), *nprocs*
(list of *nprocs* values used), and *label* (the provided parameter as is).
"""
term_size = shutil.get_terminal_size()
print(module.get_bash())

min_avg = float("inf")
min_time = 1
avg_times = []
all_times = []
nprocs_list = list(range(1, max_nprocs + 1))
for nprocs in nprocs_list:
print("\u2500" * term_size.columns)
print(f"Benchmark with {nprocs} thread(s)...\n")
time_sum = 0
measured_times = []
for _ in range(repeat):
module(nprocs=nprocs).run()
print(f"{module.time}s")
time_sum += module.time
measured_times.append(module.time)

avg = time_sum / repeat
avg_times.append(avg)
all_times.append(measured_times)
if nprocs == 1:
serial_avg = avg
if avg < min_avg:
min_avg = avg
min_time = nprocs
print(f"\nResult - {avg}s")

print("\u2500" * term_size.columns)
print(f"\nSerial average time - {serial_avg}s")
print(f"Best average time - {min_avg}s ({min_time} threads)\n")

return SimpleNamespace(
all_times=all_times,
times=avg_times,
nprocs=nprocs_list,
label=label,
)


def benchmark_resolutions(module, resolutions, label, repeat=5, nprocs=None):
"""Benchmark module using different resolutions.
*module* is an instance of PyGRASS Module class.
*resolutions* is a list of resolutions to set (current region is currently
used and changed but that may change in the future).
*repeat* sets how many times the each run is repeated.
So, the module will run ``len(resolutions) * repeat`` times.
*label* is a text to add to the result (for user-facing display).
Optional *nprocs* is passed to the module if present.
Returns an object with attributes *times* (list of average execution times),
*all_times* (list of lists of measured execution times), *resolutions*
(the provided parameter as is), *cells* (number of cells in the region),
and *label* (the provided parameter as is).
"""
term_size = shutil.get_terminal_size()
print(module.get_bash())

avg_times = []
all_times = []
n_cells = []
for resolution in resolutions:
gs.run_command("g.region", res=resolution)
region = gs.region()
n_cells.append(region["cells"])
print("\u2500" * term_size.columns)
print(f"Benchmark with {resolution} resolution...\n")
time_sum = 0
measured_times = []
for _ in range(repeat):
if nprocs:
module(nprocs=nprocs)
module.run()
print(f"{module.time}s")
time_sum += module.time
measured_times.append(module.time)

avg = time_sum / repeat
avg_times.append(avg)
all_times.append(measured_times)
print(f"\nResult - {avg}s")

return SimpleNamespace(
all_times=all_times,
times=avg_times,
resolutions=resolutions,
cells=n_cells,
label=label,
)
56 changes: 56 additions & 0 deletions python/grass/benchmark/testsuite/test_benchmark.py
@@ -0,0 +1,56 @@
# MODULE: Test of grass.benchmark
#
# AUTHOR(S): Vaclav Petras <wenzeslaus gmail com>
#
# PURPOSE: Benchmarking for GRASS GIS modules
#
# COPYRIGHT: (C) 2021 Vaclav Petras, and by the GRASS Development Team
#
# This program is free software under the GNU General Public
# License (>=v2). Read the file COPYING that comes with GRASS
# for details.

"""Basic tests of grass.benchmark"""

from pathlib import Path
from subprocess import DEVNULL

from grass.benchmark import benchmark_resolutions, num_cells_plot
from grass.gunittest.case import TestCase
from grass.gunittest.main import test
from grass.pygrass.modules import Module


class TestBenchmarksRun(TestCase):
"""Tests that functions for benchmarking can run"""

def test_resolutions(self):
"""Test that resolution tests runs without nprocs and plots to file"""
benchmarks = [
dict(
module=Module("r.univar", map="elevation", stdout_=DEVNULL, run_=False),
label="Standard output",
),
dict(
module=Module(
"r.univar", map="elevation", flags="g", stdout_=DEVNULL, run_=False
),
label="Standard output",
),
]
resolutions = [300, 200, 100]
results = []
for benchmark in benchmarks:
results.append(
benchmark_resolutions(
**benchmark,
resolutions=resolutions,
)
)
plot_file = "test_res_plot.png"
num_cells_plot(results, filename=plot_file)
self.assertTrue(Path(plot_file).is_file())


if __name__ == "__main__":
test()

0 comments on commit e334471

Please sign in to comment.