Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow custom functions to be used for computing cache keys #284

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 58 additions & 49 deletions cgp/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,22 +55,45 @@ def __find_args_and_return_value_for_consistency_check(fn: str) -> Union[Dict[st
return None


def __compute_key_from_args(*args: Any, **kwargs: Any) -> str:
"""Compute a key from the arguments passed to the decorated
function.
def compute_key_from_sympy_expr_and_args(*args: Any, **kwargs: Any) -> str:
"""Compute a key from the sympy expression encoded in an individual
and the remaining arguments passed to the decorated function.

"""

s: str = str(args) + str(kwargs)
if not (
isinstance(args[0], IndividualSingleGenome) or isinstance(args[0], IndividualMultiGenome)
):
raise ValueError("first argument of decorated function must be an Individual instance")

s: str = str(args[0].to_sympy()) + str(args[1:]) + str(kwargs)
return hashlib.sha1(s.encode("utf-8")).hexdigest()


def __compute_key_from_evaluation_and_args(
seed: int, min_value: float, max_value: float, batch_size: int, *args: Any, **kwargs: Any
def compute_key_from_numpy_evaluation_and_args(
*args: Any,
_seed: int = 0,
_min_value: float = -100.0,
_max_value: float = 100.0,
_batch_size: int = 10,
**kwargs: Any,
) -> str:
"""Compute a key for the function encoded in an individual by
evaluating it's NumPy expression on random input samples and
hashing the output values.
"""Compute a key from the function encoded in an individual
and the remaining arguments passed to the decorated function.

The function is evaluated on random inputs and a key is generated
by hashing the corresponding output values.

Parameters
----------
_seed : int, optional
Seed value for fec. Defaults to 0.
_min_value : float, optional
Minimal value for fec input samples. Defaults to -100.0.
_max_value : float, optional
Maximal value for fec input samples. Defaults to 100.0.
_batch_size : int, optional
Number of fec input samples. Defaults to 10.

"""

Expand All @@ -79,18 +102,18 @@ def __compute_key_from_evaluation_and_args(
):
raise ValueError("first argument of decorated function must be an Individual instance")

rng = np.random.RandomState(seed=seed)
rng = np.random.RandomState(seed=_seed)
ind = args[0]
if isinstance(ind, IndividualSingleGenome):
f_single = ind.to_numpy()
x = rng.uniform(min_value, max_value, (batch_size, ind.genome._n_inputs))
x = rng.uniform(_min_value, _max_value, (_batch_size, ind.genome._n_inputs))
y = f_single(x)
s = np.array_str(y, precision=15)
elif isinstance(ind, IndividualMultiGenome):
f_multi = ind.to_numpy()
s = ""
for i in range(len(ind.genome)):
x = rng.uniform(min_value, max_value, (batch_size, ind.genome[i]._n_inputs))
x = rng.uniform(_min_value, _max_value, (_batch_size, ind.genome[i]._n_inputs))
y = f_multi[i](x)
s += np.array_str(y, precision=15)
else:
Expand Down Expand Up @@ -147,39 +170,37 @@ def __store_new_cache_entry(
def disk_cache(
fn: str,
*,
use_fec: bool = False,
fec_seed: int = 0,
fec_min_value: float = -100.0,
fec_max_value: float = 100.0,
fec_batch_size: int = 10,
compute_key: Callable[..., str] = compute_key_from_numpy_evaluation_and_args,
file_lock: Union[None, "mp.synchronize.Lock"] = None,
) -> Callable[[Callable[..., float]], Callable[..., float]]:
"""Cache function return values on disk.

Decorator that caches a function's return values on disk. Next time the
decorated function is called with the same arguments it returns the stored
values from disk instead of executing the function.
Decorator that caches a function's return values on disk. Next
time the decorated function is called with the same arguments it
returns the stored values from disk instead of executing the
function. The first argument of the decorated function *must* be
an IndividualBase instance.

Consistency of the cache is checked upon decorating the function
by making sure the it returns the same value as the first
argument/keyword argument combination found in the cache.

If `use_fec` is `False` (default) the arguments of the decorated
function are used to compute a hash. If `use_fec` is `True` the
decorator uses functional equivalance checking [Real et al.,
2020]: It generates a NumPy-compatible expression from the
function's first argument (*must* be an `IndividualSingleGenome`
or `IndividualMultiGenome` instance) and evaluates it on randomly
generated values. The output values are then used to compute a
hash.
The `compute_key` parameter is a function receiving the aguments
and keyword arguments of the decorated function and must return a
unique key. By default, the decorator uses functional equivalance
checking [Real et al., 2020]: It generates a NumPy-compatible
expression from the function's first argument (*must* be an
`IndividualSingleGenome` or `IndividualMultiGenome` instance) and
evaluates it on randomly generated values. The output values are
then used to compute a hash.

WARNING: this implementation is neither optimized for speed nor storage
space and does not limit the size of the cache file.

WARNING: the consistency check may pass incorrectly if the
decorated function happens to return a consistent value for the
first argument from the cache although it returns different values
for other arguments.
arguments from the cache although it returns different values for
other arguments.

WARNING: avoid using the decorator on nested functions as the
consistency check will be applied on each decoration thus doubling
Expand All @@ -195,16 +216,10 @@ def disk_cache(
----------
fn : str
Name of the cache file.
use_fec : bool, optional
Whether to use functional equivalance checking. Defaults to False.
fec_seed : int, optional
Seed value for fec. Defaults to 0.
fec_min_value : float, optional
Minimal value for fec input samples. Defaults to -100.0.
fec_max_value : float, optional
Maximal value for fec input samples. Defaults to 100.0.
fec_batch_size : int, optional
Number of fec input samples. Defaults to 10.
compute_key : Callable[..., str], optional
Function to compute a unique key from an individual and the
remaining function arguments. Defaults to
`compute_key_from_numpy_evaluation_and_args`.
file_lock : multiprocessing.synchronize.Lock, optional
Lock to make sure only a single process reads from/write to
cache file. Defaults to None.
Expand All @@ -220,15 +235,9 @@ def decorator(func: Callable[..., float]) -> Callable[..., float]:
__check_cache_consistency(fn, func)

@functools.wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> Union[float, None]:

key: str
if use_fec:
key = __compute_key_from_evaluation_and_args(
fec_seed, fec_min_value, fec_max_value, fec_batch_size, *args, **kwargs
)
else:
key = __compute_key_from_args(*args, **kwargs)
def wrapper(*args: Any, **kwargs: Any) -> float:

key: str = compute_key(*args, **kwargs)

if file_lock is not None:
file_lock.acquire()
Expand Down
20 changes: 11 additions & 9 deletions examples/example_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,20 @@ def f_target(x):
# function to compute (or retrieve from cache) the fitness of the individual.


@cgp.utils.disk_cache("example_caching_cache.pkl")
def inner_objective(expr):
@cgp.utils.disk_cache(
"example_caching_cache.pkl", compute_key=cgp.utils.compute_key_from_sympy_expr_and_args
)
def inner_objective(ind):
"""The caching decorator uses the function parameters to identify
identical function calls. Here, as many different genotypes
produce the same simplified SymPy expression we can use such
expressions as an argument to the decorated function to avoid
reevaluating functionally identical individuals.
Note that caching only makes sense for deterministic objective
functions, as it assumes that identical expressions will always
return the same fitness values.
produce the same simplified SymPy expression we can use these
avoid reevaluating functionally identical individuals. Note that
caching only makes sense for deterministic objective functions, as
it assumes that identical expressions will always return the same
fitness values.

"""
expr = ind.to_sympy()
loss = []
for x0 in np.linspace(-2.0, 2.0, 100):
y = float(expr[0].subs({"x_0": x0}).evalf())
Expand All @@ -56,7 +58,7 @@ def objective(individual):
if not individual.fitness_is_None():
return individual

individual.fitness = -inner_objective(individual.to_sympy())
individual.fitness = -inner_objective(individual)

return individual

Expand Down
13 changes: 8 additions & 5 deletions examples/example_fec_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

"""

import functools
import multiprocessing as mp
import time

Expand Down Expand Up @@ -38,11 +39,13 @@ def f_target(x):

@cgp.utils.disk_cache(
"example_fec_caching_cache.pkl",
use_fec=True,
fec_seed=12345,
fec_min_value=-10.0,
fec_max_value=10.0,
fec_batch_size=5,
compute_key=functools.partial(
cgp.utils.compute_key_from_numpy_evaluation_and_args,
_seed=12345,
_min_value=-10.0,
_max_value=10.0,
_batch_size=5,
),
file_lock=mp.Lock(),
)
def inner_objective(ind):
Expand Down
7 changes: 7 additions & 0 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,10 @@ def n_offsprings():
@fixture
def tournament_size():
return 2


@fixture
def individual(genome_params, rng):
g = cgp.Genome(**genome_params)
g.randomize(rng)
return cgp.IndividualSingleGenome(g)
Loading