Skip to content

Commit

Permalink
Address comments and speed up tests
Browse files Browse the repository at this point in the history
  • Loading branch information
segsell committed May 2, 2023
1 parent 63a7b5f commit 0f73abd
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 80 deletions.
64 changes: 38 additions & 26 deletions src/estimagic/benchmarking/benchmark_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
process_benchmark_results,
)

from estimagic.visualization.profile_plot import create_solution_times


def convergence_report(
problems, results, *, stopping_criterion="y", x_precision=1e-4, y_precision=1e-4
Expand Down Expand Up @@ -31,7 +33,7 @@ def convergence_report(
Returns:
pandas.DataFrame: columns are the algorithms and the dimensionality of the
benchmark problems, indexes are the problems. For the algorithms columns,
the values are strings that are either "success" "failed", or "error".
the values are strings that are either "success", "failed", or "error".
For the dimensionality column, the values denote the number of dimensions
of the problem.
Expand All @@ -44,11 +46,7 @@ def convergence_report(
y_precision=y_precision,
)

convergence_report = converged_info.replace({True: "success", False: "failed"})

for key, value in results.items():
if isinstance(value["solution"], str):
convergence_report.at[key] = "error"
convergence_report = _get_success_info(results, converged_info)

dim = {problem: len(problems[problem]["inputs"]["params"]) for problem in problems}
convergence_report["dimensionality"] = convergence_report.index.map(dim)
Expand All @@ -61,7 +59,6 @@ def rank_report(
results,
*,
runtime_measure="n_evaluations",
normalize_runtime=False,
stopping_criterion="y",
x_precision=1e-4,
y_precision=1e-4,
Expand All @@ -80,10 +77,6 @@ def rank_report(
This is the runtime until the desired convergence was reached by an
algorithm. This is called performance measure by Moré and Wild (2009).
Default is "n_evaluations".
normalize_runtime (bool): If True the runtime each algorithm needed for each
problem is scaled by the time the fastest algorithm needed. If True, the
resulting plot is what Moré and Wild (2009) called data profiles.
Default is False.
stopping_criterion (str): one of "x_and_y", "x_or_y", "x", "y". Determines
how convergence is determined from the two precisions.
x_precision (float or None): how close an algorithm must have gotten to the
Expand Down Expand Up @@ -111,22 +104,15 @@ def rank_report(
x_precision=x_precision,
y_precision=y_precision,
)
scenarios = list({algo[1] for algo in results.keys()})

success_info = converged_info.replace({True: "success", False: "failed"})
for key, value in results.items():
if isinstance(value["solution"], str):
success_info.at[key] = "error"

solution_times = histories.groupby(["problem", "algorithm"])[runtime_measure].max()
solution_times = create_solution_times(histories, runtime_measure, converged_info)
solution_times = solution_times.stack().reset_index()
solution_times = solution_times.rename(
columns={solution_times.columns[2]: runtime_measure}
)

if normalize_runtime:
solution_times = solution_times.unstack()
solution_times = solution_times.divide(solution_times.min(axis=1), axis=0)
solution_times = solution_times.stack(dropna=False)
solution_times.name = runtime_measure
success_info = _get_success_info(results, converged_info)

solution_times = solution_times.reset_index()
solution_times["rank"] = (
solution_times.groupby("problem")[runtime_measure].rank(
method="dense", ascending=True
Expand All @@ -135,8 +121,8 @@ def rank_report(
).astype("Int64")

df_wide = solution_times.pivot(index="problem", columns="algorithm", values="rank")
rank_report = df_wide.astype(str)[scenarios]
rank_report[~converged_info] = success_info[scenarios]
rank_report = df_wide.astype(str)
rank_report[~converged_info] = success_info

return rank_report

Expand Down Expand Up @@ -169,3 +155,29 @@ def traceback_report(results):
traceback_report = pd.DataFrame.from_dict(tracebacks, orient="columns")

return traceback_report


def _get_success_info(results, converged_info):
"""Create a DataFrame with information on whether an algorithm succeeded or not.
Args:
results (dict): estimagic benchmarking results dictionary. Keys are
tuples of the form (problem, algorithm), values are dictionaries of the
collected information on the benchmark run, including 'criterion_history'
and 'time_history'.
converged_info (pandas.DataFrame): columns are the algorithms, indexes are the
problems. The values are boolean and True when the algorithm arrived at
the solution with the desired precision.
Returns:
pandas.DataFrame: columns are the algorithms, indexes are the problems.
values are strings that are either "success", "failed", or "error".
"""
success_info = converged_info.replace({True: "success", False: "failed"})

for key, value in results.items():
if isinstance(value["solution"], str):
success_info.at[key] = "error"

return success_info
4 changes: 2 additions & 2 deletions src/estimagic/visualization/profile_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def profile_plot(
y_precision=y_precision,
)

solution_times = _create_solution_times(
solution_times = create_solution_times(
df,
runtime_measure=runtime_measure,
converged_info=converged_info,
Expand Down Expand Up @@ -139,7 +139,7 @@ def profile_plot(
return fig


def _create_solution_times(df, runtime_measure, converged_info):
def create_solution_times(df, runtime_measure, converged_info):
"""Find the solution time for each algorithm and problem.
Args:
Expand Down
166 changes: 120 additions & 46 deletions tests/benchmarking/test_benchmark_reports.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,131 @@
import pytest
from itertools import product
import numpy as np

from estimagic import get_benchmark_problems
from estimagic.benchmarking.run_benchmark import run_benchmark
from estimagic.benchmarking.benchmark_reports import (
convergence_report,
rank_report,
traceback_report,
)
from estimagic import OptimizeResult


@pytest.fixture
def benchmark_example():
problems = get_benchmark_problems("example")
stop_after_10 = {
_stop_after_10 = {
"stopping_max_criterion_evaluations": 10,
"stopping_max_iterations": 10,
}
optimizers = {
"lbfgsb": {"algorithm": "scipy_lbfgsb", "algo_options": stop_after_10},
"nm": {"algorithm": "scipy_neldermead", "algo_options": stop_after_10},
"lbfgsb": {"algorithm": "scipy_lbfgsb", "algo_options": _stop_after_10},
"nm": {"algorithm": "scipy_neldermead", "algo_options": _stop_after_10},
}

problems = {
"bard_good_start": {
"inputs": {"params": np.array([1, 1, 1])},
"solution": {
"params": np.array([0.08241056, 1.13303608, 2.34369519]),
"value": 0.00821487730657897,
},
"noisy": False,
"info": {},
"start_criterion": 41.6817,
},
"box_3d": {
"inputs": {"params": np.array([0, 10, 20])},
"solution": {"params": np.array([1, 10, 1]), "value": 0},
"noisy": False,
"info": {},
"start_criterion": 1031.154,
},
}

results = {
("bard_good_start", "lbfgsb"): {
"params_history": [
[1.0, 1.0, 1.0],
[0.48286315298120086, 1.6129119244711858, 1.5974181569859445],
[0.09754340799557773, 1.7558262514618663, 1.7403560082627973],
],
"criterion_history": np.array(
[
4.16816959e01,
3.20813118e00,
9.97263708e-03,
]
),
"time_history": [
0.0,
0.0003762839987757616,
0.0007037959985609632,
],
"batches_history": [0, 1, 2],
"solution": OptimizeResult, # success
},
("box_3d", "lbfgsb"): {
"params_history": [
[0.0, 10.0, 20.0],
[-0.6579976970071755, 10.014197643614924, 19.247113914560085],
[-3.2899884850358774, 10.070988218074623, 16.235569572800433],
],
"criterion_history": np.array(
[
1.03115381e03,
8.73640769e02,
9.35093416e02,
]
),
"time_history": [
0.0,
0.000555748996703187,
0.0009771709992492106,
],
"batches_history": [0, 1, 2],
"solution": OptimizeResult, # failed
},
("bard_good_start", "nm"): {
"params_history": [
[1.0, 1.0, 1.0],
[1.05, 1.0, 1.0],
[0.7999999999999998, 1.1999999999999993, 1.0499999999999994],
[0.08241056, 1.13303608, 2.34369519],
],
"criterion_history": np.array(
[
41.68169586,
43.90748158,
23.92563745,
0.00821487730657897,
]
),
"time_history": [
0.0,
3.603900040616281e-05,
0.0004506860022956971,
0.00015319500016630627,
],
"batches_history": [0, 1, 2, 4],
"solution": OptimizeResult, # success
},
("box_3d", "nm"): {
"params_history": [
[0.0, 10.0, 20.0],
[0.025, 10.0, 20.0],
[0.0, 10.5, 20.0],
],
"criterion_history": np.array(
[1031.15381061, 1031.17836473, 1030.15033678]
),
"time_history": [
0.0,
5.73799989069812e-05,
0.00010679600018193014,
],
"batches_history": [0, 1, 2],
"solution": "some traceback", # error
},
}
results = run_benchmark(
problems,
optimizers,
n_cores=1, # must be 1 for the test to work
)

return problems, optimizers, results

Expand Down Expand Up @@ -54,29 +154,18 @@ def test_convergence_report(options, benchmark_example):
assert df.shape == (len(problems), len(expected_columns))
assert set(df.columns) == set(expected_columns)


def test_convergence_report_with_failed_and_error(benchmark_example):
problems, _, results = benchmark_example
failed_problem = ("bard_good_start", "nm")
error_problem = ("box_3d", "nm")
results[error_problem]["solution"] = "some traceback"

df = convergence_report(problems=problems, results=results)

assert df[failed_problem[1]].loc[failed_problem[0]] == "failed"
assert df[error_problem[1]].loc[error_problem[0]] == "error"
assert df["lbfgsb"].loc["box_3d"] == "failed"
assert df["nm"].loc["box_3d"] == "error"


# ====================================================================================
# Rank report
# ====================================================================================

keys = ["runtime_measure", "normalize_runtime", "stopping_criterion"]
keys = ["runtime_measure", "stopping_criterion"]
runtime_measure = ["n_evaluations", "walltime", "n_batches"]
normalize_runtime = [True, False]
RANK_REPORT_OPTIONS = [
dict(zip(keys, value))
for value in product(runtime_measure, normalize_runtime, stopping_criterion)
dict(zip(keys, value)) for value in product(runtime_measure, stopping_criterion)
]


Expand All @@ -89,34 +178,19 @@ def test_rank_report(options, benchmark_example):
assert df.shape == (len(problems), len(optimizers))
assert set(df.columns) == set(optimizers.keys())


def test_rank_report_with_failed_and_error(benchmark_example):
problems, _, results = benchmark_example
failed_problem = ("bard_good_start", "nm")
error_problem = ("box_3d", "nm")
results[error_problem]["solution"] = "some traceback"

df = rank_report(problems=problems, results=results)

assert df[failed_problem[1]].loc[failed_problem[0]] == "failed"
assert df[error_problem[1]].loc[error_problem[0]] == "error"
assert df["lbfgsb"].loc["box_3d"] == "failed"
assert df["nm"].loc["box_3d"] == "error"


# ====================================================================================
# Traceback report
# ====================================================================================


def test_traceback_report_no_error(benchmark_example):
def test_traceback_report(benchmark_example):
_, optimizers, results = benchmark_example

df = traceback_report(results=results)
assert df.shape == (0, len(optimizers))


def test_traceback_report_with_error(benchmark_example):
*_, results = benchmark_example
results[("box_3d", "nm")]["solution"] = "some traceback"

df = traceback_report(results=results)
assert df.shape[0] > 0
assert df.shape == (1, len(optimizers))
assert np.isnan(df.at["box_3d", "lbfgsb"])
10 changes: 4 additions & 6 deletions tests/visualization/test_profile_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from estimagic import get_benchmark_problems
from estimagic.benchmarking.run_benchmark import run_benchmark
from estimagic.visualization.profile_plot import (
_create_solution_times,
create_solution_times,
_determine_alpha_grid,
_find_switch_points,
profile_plot,
Expand Down Expand Up @@ -64,7 +64,7 @@ def test_create_solution_times_n_evaluations():
)
expected.columns.name = "algorithm"

res = _create_solution_times(
res = create_solution_times(
df=df, runtime_measure="n_evaluations", converged_info=info
)
pd.testing.assert_frame_equal(res, expected)
Expand Down Expand Up @@ -102,9 +102,7 @@ def test_create_solution_times_n_batches():
)
expected.columns.name = "algorithm"

res = _create_solution_times(
df=df, runtime_measure="n_batches", converged_info=info
)
res = create_solution_times(df=df, runtime_measure="n_batches", converged_info=info)
pd.testing.assert_frame_equal(res, expected)


Expand Down Expand Up @@ -140,7 +138,7 @@ def test_create_solution_times_walltime():
)
expected.columns.name = "algorithm"

res = _create_solution_times(df=df, runtime_measure="walltime", converged_info=info)
res = create_solution_times(df=df, runtime_measure="walltime", converged_info=info)
pd.testing.assert_frame_equal(res, expected)


Expand Down

0 comments on commit 0f73abd

Please sign in to comment.