Skip to content

Commit

Permalink
Merge pull request #655 from ICB-DCM/develop
Browse files Browse the repository at this point in the history
Release 0.2.6
  • Loading branch information
yannikschaelte committed May 17, 2021
2 parents 90ed83c + f447d62 commit 72488fb
Show file tree
Hide file tree
Showing 40 changed files with 1,973 additions and 336 deletions.
8 changes: 8 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,11 @@ repos:
description: Replace or check mixed line endings
- id: trailing-whitespace
description: Trim trailing whitespaces
- repo: local
hooks:
- id: style
name: Check style
description: Check style
entry: tox -e project,flake8
language: python
types: [python]
31 changes: 31 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,37 @@ Release notes
..........


0.2.6 (2021-05-17)
------------------

* Objective:
* Basic finite differences (#666)
* Fix factor 2 in res/fval values (#619)

* Optimization:
* Sort optimization results when appending (#668)
* Read optimizer result from HDF5 (previously only CSV) (#663)

* Storage:
* Load ensemble from HDF5 (#640)

* CI:
* Add flake8 checks as pre-commit hook (#662)
* Add efficient biological conversion reaction test model (#619)

* General:
* No automatic import of the predict module (#657)
* Assert unique problem parameter names (#665)
* Load ensemble from optimization result with and without history usage
(#640)
* Calculate validation profile significance (#658)
* Set pypesto screen logger to "INFO" by default (#667)

* Minor fixes:
* Fix axis variable overwriting in `visualize.sampling_parameter_traces`
(#665)


0.2.5 (2021-05-04)
------------------

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ parameter estimation.
[![Coverage](https://codecov.io/gh/ICB-DCM/pyPESTO/branch/master/graph/badge.svg)](https://codecov.io/gh/ICB-DCM/pyPESTO)
[![Quality](https://api.codacy.com/project/badge/Grade/134432ddad0e464b8494587ff370f661)](https://www.codacy.com/app/dweindl/pyPESTO?utm_source=github.com&utm_medium=referral&utm_content=ICB-DCM/pyPESTO&utm_campaign=Badge_Grade)
[![Documentation](https://readthedocs.org/projects/pypesto/badge/?version=latest)](https://pypesto.readthedocs.io)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4614930.svg)](https://doi.org/10.5281/zenodo.4614930)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2553546.svg)](https://doi.org/10.5281/zenodo.2553546)

## Feature overview

Expand Down
116 changes: 83 additions & 33 deletions doc/example/amici_import.ipynb

Large diffs are not rendered by default.

8 changes: 3 additions & 5 deletions pypesto/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,7 @@
Objective,
ObjectiveBase,
OptimizerHistory,
)
from .predict import (
AmiciPredictor,
PredictionConditionResult,
PredictionResult,
FD,
)
from .problem import Problem
from .result import (
Expand All @@ -41,3 +37,5 @@
from . import startpoint
from . import store
from . import visualize

logging.log()
4 changes: 4 additions & 0 deletions pypesto/ensemble/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
PREDICTION_ARRAYS = 'prediction_arrays'
PREDICTION_SUMMARY = 'prediction_summary'

HISTORY = 'history'
OPTIMIZE = 'optimize'
SAMPLE = 'sample'

MEAN = 'mean'
MEDIAN = 'median'
STANDARD_DEVIATION = 'std'
Expand Down
251 changes: 250 additions & 1 deletion pypesto/ensemble/ensemble.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import logging
from functools import partial
import numpy as np
import pandas as pd
from typing import Sequence, Tuple, Callable, Dict
from typing import Sequence, Tuple, Callable, Dict, List

from .. import Result
from ..engine import (
Expand All @@ -24,6 +25,8 @@
STANDARD_DEVIATION, SUMMARY, LOWER_BOUND,
UPPER_BOUND, get_percentile_label)

logger = logging.getLogger(__name__)


class EnsemblePrediction:
"""
Expand Down Expand Up @@ -367,6 +370,158 @@ def from_sample(
x_vectors = x_vectors.T
return Ensemble(x_vectors, **kwargs)

@staticmethod
def from_optimization_endpoints(
result: Result,
cutoff: float = np.inf,
max_size: int = np.inf,
**kwargs,
):
"""Construct an ensemble from an optimization result.
Parameters
----------
result:
A pyPESTO result that contains an optimization result.
cutoff:
Exclude parameters from the optimization if the
nllh is higher than the `cutoff`.
max_size:
The maximum size the ensemble should be.
Returns
-------
The ensemble.
"""
x_vectors = []
vector_tags = []

for start in result.optimize_result.list:
# add the parameters from the next start as long as we
# did not reach maximum size and the next value is still
# lower than the cutoff value
if start['fval'] <= cutoff and len(x_vectors) < max_size:
x_vectors.append(start['x'])

# the vector tag will be a -1 to indicate it is the last step
vector_tags.append((int(start['id']), -1))
else:
break

# print a warning if there are no vectors within the ensemble
if len(x_vectors) == 0:
raise ValueError('The ensemble does not contain any vectors. '
'Either the cutoff value was too small\n or the '
'result.optimize_result object might be empty.')
elif len(x_vectors) < max_size:
logger.info(f'The ensemble contains {len(x_vectors)} parameter '
'vectors, which is less than the maximum size.\nIf '
'you want to include more \nvectors, you can consider '
'raising the cutoff value or including parameters '
'from \nthe history with the `from_history` function.')

x_vectors = np.stack(x_vectors, axis=1)
return Ensemble(x_vectors=x_vectors,
x_names=result.problem.x_names,
vector_tags=vector_tags,
lower_bound=result.problem.lb_full,
upper_bound=result.problem.ub_full,
**kwargs)

@staticmethod
def from_optimization_history(
result: Result,
cutoff: float = np.inf,
max_size: int = np.inf,
max_per_start: int = np.inf,
distribute: bool = True,
**kwargs,
):
"""Construct an ensemble from the history of an optimization.
Parameters
----------
result:
A pyPESTO result that contains an optimization result
with history recorded.
cutoff:
Exclude parameters from the optimization if the nllh
is higher than the `cutoff`.
max_size:
The maximum size the ensemble should be.
max_per_start:
The maximum number of vectors to be included from a
single optimization start.
distribute:
Boolean flag, whether the best (False) values from the
start should be taken or whether the indices should be
more evenly distributed.
Returns
-------
The ensemble.
"""
if not result.optimize_result.list[0].history.options['trace_record']:
logger.warning('The optimize result has no trace. The Ensemble '
'will automatically be created through '
'from_optimization_endpoints().')
return Ensemble.from_optimization_endpoints(result=result,
cutoff=cutoff,
max_size=max_size,
**kwargs)
x_vectors = []
vector_tags = []
x_names = result.problem.x_names
lb = result.problem.lb_full
ub = result.problem.ub_full

# calculate the number of starts whose final nllh is below cutoff
n_starts = sum(start['fval'] <= cutoff
for start in result.optimize_result.list)

fval_trace = [
np.array(
result.optimize_result.list[i_ms]['history'].get_fval_trace()
)
for i_ms in range(n_starts)
]
x_trace = [
result.optimize_result.list[i_ms]['history'].get_x_trace()
for i_ms in range(n_starts)
]

# calculate the number of iterations included from each start
n_per_starts = entries_per_start(fval_traces=fval_trace,
cutoff=cutoff,
max_per_start=max_per_start,
max_size=max_size)
# determine x_vectors from each start
for start in range(n_starts):
indices = get_vector_indices(trace_start=fval_trace[start],
cutoff=cutoff,
n_vectors=n_per_starts[start],
distribute=distribute)
x_vectors.extend([x_trace[start][ind] for ind in indices])
vector_tags.extend([
(int(result.optimize_result.list[start]['id']), ind)
for ind in indices
])

# raise a `ValueError` if there are no vectors within the ensemble
if len(x_vectors) == 0:
raise ValueError('The ensemble does not contain any vectors. '
'Either the `cutoff` value was too \nsmall '
'or the `result.optimize_result` object might '
'be empty.')

x_vectors = np.stack(x_vectors, axis=1)
return Ensemble(x_vectors=x_vectors,
x_names=x_names,
vector_tags=vector_tags,
lower_bound=lb,
upper_bound=ub,
**kwargs)

def __iter__(self):
"""
__iter__ makes the instances of the class iterable objects, allowing to
Expand Down Expand Up @@ -594,3 +749,97 @@ def check_identifiability(self) -> pd.DataFrame:
parameter_identifiability['parameterId']

return parameter_identifiability


def entries_per_start(fval_traces: List['np.ndarray'],
cutoff: float,
max_size: int,
max_per_start: int, ):
"""
Creates the indices of each start that will be included
in the ensemble.
Parameters
----------
fval_traces:
the fval-trace of each start.
cutoff:
Exclude parameters from the optimization if the nllh
is higher than the `cutoff`.
max_size:
The maximum size the ensemble should be.
max_per_start:
The maximum number of vectors to be included from a
single optimization start.
Returns
-------
A list of number of candidates per start that are to
be included in the ensemble.
"""
# choose possible candidates
ens_ind = [np.flatnonzero(fval <= cutoff) for fval in fval_traces]

# count the number of candidates per start
n_per_start = np.array([len(start) for start in ens_ind])

# if all possible indices can be included, return
if (n_per_start < max_per_start).all() and sum(n_per_start) < max_size:
return ens_ind

# trimm down starts that exceed the limit:
n_per_start = [min(n, max_per_start) for n in n_per_start]

# trimm down more until it fits the max size
decr = 0
while(sum(n_per_start) > max_size):
n_per_start = [min(n, max_per_start-decr)
for n in n_per_start]
decr += 1
# TODO: Possibility. With this implementation we could
# in a scenario, where we have more candidates than
# max size end up with an ensemble of size
# `max_size - len(n_starts)` in the worst case. We could introduce
# a flag which would be `force_max`, that indicates
# whether those remaining free slots should be filled by
# entries from certain starts. This would brng up the
# discussion which starts to choose. One obvious choice
# would be the best starts based on their endpoint.

return n_per_start


def get_vector_indices(trace_start: np.ndarray,
cutoff: float,
n_vectors: int,
distribute: bool, ):
"""
Returns the indices to be taken into an ensemble.
Parameters
----------
trace_start:
The fval_trace of a single start.
cutoff:
Exclude parameters from the optimization if the nllh
is higher than the `cutoff`.
n_vectors:
The number of indices to be included from one start.
distribute:
Boolean flag, whether the best (False) values from the
start should be taken or whether the indices should be
more evenly distributed.
Returns
-------
The indices to include in the ensemble.
"""

candidates = np.flatnonzero(trace_start <= cutoff)

if distribute:
indices = np.round(np.linspace(0, len(candidates) - 1, n_vectors))
return candidates[indices.astype(int)]
else:
return candidates[:n_vectors]
Loading

0 comments on commit 72488fb

Please sign in to comment.