Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Save predictions to sacc #349

Merged
merged 34 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
d7f51ab
working prototype
tilmantroester Dec 6, 2023
acee539
Add test, rename computed to computed_theory_vector
tilmantroester Dec 6, 2023
08597ff
fix spelling
tilmantroester Dec 7, 2023
aa379cc
Merge branch 'master' into save_predictions
vitenti Jan 10, 2024
52c8222
Fixed linter issues.
vitenti Jan 10, 2024
4f77feb
Improve type usage, now UpdatableCollection allows the user to specif…
vitenti Jan 11, 2024
8acdb04
Moved type definition to __init__.
vitenti Jan 11, 2024
dd85f19
Reorganized theory_vector and data_vector set/get and computation.
vitenti Jan 13, 2024
043bcfb
Merge branch 'master' into save_predictions
vitenti Jan 13, 2024
d93848d
Combining new state machine and new methods.
vitenti Jan 13, 2024
5973790
Fixed reset in cosmosis connector, must be called after all possible …
vitenti Jan 13, 2024
fba2463
Cleaning all quantities computed after updated.
vitenti Jan 13, 2024
1207889
More tests for UpdatableCollection.
vitenti Jan 13, 2024
92b3916
Testing new methods of GaussFamily (and older not tested ones).
vitenti Jan 13, 2024
5d9b462
Adding noise to the realizations and testing it.
vitenti Jan 13, 2024
16904b8
* Reorganizing and renaming.
vitenti Jan 13, 2024
4bd3e4b
* More documentation updates.
vitenti Jan 13, 2024
af11932
* More documentation fix.
vitenti Jan 13, 2024
6962a58
Normalizing make_realization parameters.
vitenti Jan 13, 2024
0798278
Factoring make_realization_vector, which returns a new realization da…
vitenti Jan 13, 2024
123724e
Updated documentation.
vitenti Jan 13, 2024
2c6ca9e
Removed redundant checks.
vitenti Jan 13, 2024
86be99d
Removing more redundancies.
vitenti Jan 13, 2024
04825dd
Merge branch 'master' into save_predictions
marcpaterno Jan 25, 2024
55df315
Apply black
marcpaterno Jan 25, 2024
a155d99
Delete repeated test
marcpaterno Jan 25, 2024
f38f762
Simplify checking of RE match
marcpaterno Jan 25, 2024
3a0c3af
Add COMPUTED state to GaussFamily
marcpaterno Jan 25, 2024
bdc3575
Address failure to test line gauss_family:202
marcpaterno Jan 26, 2024
447c24d
Require 100% coverage on changed lines
marcpaterno Jan 26, 2024
af14c76
Remove needless call to super().__init__
marcpaterno Jan 26, 2024
a0f32d6
Support getting covariance for list of statistics
marcpaterno Jan 28, 2024
2a6134e
Merge branch 'master' into save_predictions
marcpaterno Jan 29, 2024
ef3b08e
Apply black
marcpaterno Jan 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions examples/des_y1_3x2pt/des_y1_3x2pt_PT.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ class CclSetup:

@dataclass
class CElls:
"""A package of related C_ell values, to reduce the number of variables
used in the :meth:`run_likelihood` method."""

GG: np.ndarray
GI: np.ndarray
II: np.ndarray
Expand Down Expand Up @@ -234,6 +237,7 @@ def run_likelihood() -> None:
assert likelihood.cov is not None

stat0 = likelihood.statistics[0].statistic
assert isinstance(stat0, TwoPoint)

# x = likelihood.statistics[0].ell_or_theta_
# y_data = likelihood.statistics[0].measured_statistic_
Expand All @@ -243,11 +247,12 @@ def run_likelihood() -> None:

print(list(stat0.cells.keys()))

stat2 = likelihood.statistics[2].statistic
stat2 = likelihood.statistics[2].statistic # pylint: disable=no-member
assert isinstance(stat2, TwoPoint)
print(list(stat2.cells.keys()))

stat3 = likelihood.statistics[3].statistic
stat3 = likelihood.statistics[3].statistic # pylint: disable=no-member
assert isinstance(stat3, TwoPoint)
print(list(stat3.cells.keys()))

plot_predicted_and_measured_statistics(
Expand Down
1 change: 1 addition & 0 deletions examples/des_y1_3x2pt/des_y1_cosmic_shear_TATT.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ def run_likelihood() -> None:
print(f"Log-like = {log_like:.1f}")

# Plot the predicted and measured statistic
assert isinstance(likelihood, ConstGaussian)
two_point_0 = likelihood.statistics[0].statistic
assert isinstance(two_point_0, TwoPoint)

Expand Down
3 changes: 1 addition & 2 deletions examples/des_y1_3x2pt/des_y1_cosmic_shear_pk_modifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,9 @@ def run_likelihood() -> None:
print(f"Log-like = {log_like:.1f}")

# Plot the predicted and measured statistic
assert isinstance(likelihood, ConstGaussian)
two_point_0 = likelihood.statistics[0].statistic
assert isinstance(two_point_0, TwoPoint)

assert isinstance(likelihood, ConstGaussian)
assert likelihood.cov is not None

# Predict CCL Cl
Expand Down
46 changes: 27 additions & 19 deletions firecrown/connector/cosmosis/likelihood.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,6 @@ class FirecrownLikelihood:
:param config: current CosmoSIS datablock
"""

likelihood: Likelihood
map: MappingCosmoSIS

def __init__(self, config: cosmosis.datablock):
"""Create the FirecrownLikelihood object from the given configuration."""
likelihood_source = config.get_string(option_section, "likelihood_source", "")
Expand All @@ -60,6 +57,7 @@ def __init__(self, config: cosmosis.datablock):

self.firecrown_module_name = option_section
self.sampling_sections = sections
self.likelihood: Likelihood
try:
self.likelihood, self.tools = load_likelihood(
likelihood_source, build_parameters
Expand All @@ -69,7 +67,7 @@ def __init__(self, config: cosmosis.datablock):
print(f"The Firecrown likelihood needs a required parameter: {err}")
print("*" * 30)
raise
self.map = mapping_builder(
self.map: MappingCosmoSIS = mapping_builder(
input_style="CosmoSIS", require_nonlinear_pk=require_nonlinear_pk
)

Expand Down Expand Up @@ -126,23 +124,31 @@ def execute(self, sample: cosmosis.datablock) -> int:
for section, name, val in derived_params_collection:
sample.put(section, name, val)

self.likelihood.reset()
self.tools.reset()
if not isinstance(self.likelihood, GaussFamily):
self.likelihood.reset()
self.tools.reset()
return 0

# Save concatenated data vector and inverse covariance to enable support
# If we get here, we have a GaussFamily likelihood, and we need to
# save concatenated data vector and inverse covariance to enable support
# for the CosmoSIS Fisher sampler. This can only work for likelihoods
# that have these quantities. Currently, this is only GaussFamily.

if isinstance(self.likelihood, GaussFamily):
sample.put(
"data_vector", "firecrown_theory", self.likelihood.predicted_data_vector
)
sample.put(
"data_vector", "firecrown_data", self.likelihood.measured_data_vector
)
sample.put(
"data_vector", "firecrown_inverse_covariance", self.likelihood.inv_cov
)
sample.put(
"data_vector",
"firecrown_theory",
self.likelihood.get_theory_vector(),
)
sample.put(
"data_vector",
"firecrown_data",
self.likelihood.get_data_vector(),
)
sample.put(
"data_vector",
"firecrown_inverse_covariance",
self.likelihood.inv_cov,
)

# Write out theory and data vectors to the data block the ease
# debugging.
Expand All @@ -163,14 +169,16 @@ def execute(self, sample: cosmosis.datablock) -> int:
sample.put(
"data_vector",
f"theory_{stat.sacc_data_type}_{tracer}",
stat.predicted_statistic_,
stat.get_theory_vector(),
)
sample.put(
"data_vector",
f"data_{stat.sacc_data_type}_{tracer}",
stat.measured_statistic_,
stat.get_data_vector(),
)

self.likelihood.reset()
self.tools.reset()
return 0

def form_error_message(self, exc: MissingSamplerParameterError) -> str:
Expand Down
117 changes: 102 additions & 15 deletions firecrown/likelihood/gauss_family/gauss_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
"""

from __future__ import annotations

from enum import Enum
from typing import List, Optional, Tuple, Sequence
from typing import List, Optional, Tuple, Sequence, Dict
from typing import final
import warnings

Expand Down Expand Up @@ -61,12 +62,24 @@ def __init__(
self.state: State = State.INITIALIZED
if len(statistics) == 0:
raise ValueError("GaussFamily requires at least one statistic")
self.statistics: UpdatableCollection = UpdatableCollection(

for i, s in enumerate(statistics):
if not isinstance(s, Statistic):
raise ValueError(
f"statistics[{i}] is not an instance of Statistic: {s}"
f"it is a {type(s)} instead."
)

self.statistics: UpdatableCollection[GuardedStatistic] = UpdatableCollection(
GuardedStatistic(s) for s in statistics
)
self.cov: Optional[npt.NDArray[np.float64]] = None
self.cholesky: Optional[npt.NDArray[np.float64]] = None
self.inv_cov: Optional[npt.NDArray[np.float64]] = None
self.cov_index_map: Optional[Dict[int, int]] = None
self.computed_theory_vector = False
self.theory_vector: Optional[npt.NDArray[np.double]] = None
self.data_vector: Optional[npt.NDArray[np.double]] = None

def _update(self, _: ParamsMap) -> None:
"""Handle the state resetting required by :class:`GaussFamily`
Expand All @@ -84,6 +97,10 @@ def _reset(self) -> None:
at the start of the method, and change the state at the end of the
method."""
assert self.state == State.UPDATED, "update() must be called before reset()"

self.computed_theory_vector = False
self.theory_vector = None

self.state = State.READY

def read(self, sacc_data: sacc.Sacc) -> None:
Expand All @@ -98,28 +115,50 @@ def read(self, sacc_data: sacc.Sacc) -> None:
raise RuntimeError(msg)

covariance = sacc_data.covariance.dense

indices_list = []
data_vector_list = []
for stat in self.statistics:
stat.read(sacc_data)
if stat.statistic.sacc_indices is None:
raise RuntimeError(
f"The statistic {stat.statistic} has no sacc_indices."
)
indices_list.append(stat.statistic.sacc_indices.copy())
data_vector_list.append(stat.statistic.get_data_vector())

indices_list = [s.statistic.sacc_indices.copy() for s in self.statistics]
indices = np.concatenate(indices_list)
data_vector = np.concatenate(data_vector_list)
cov = np.zeros((len(indices), len(indices)))

for new_i, old_i in enumerate(indices):
for new_j, old_j in enumerate(indices):
cov[new_i, new_j] = covariance[old_i, old_j]

self.data_vector = data_vector
self.cov_index_map = {old_i: new_i for new_i, old_i in enumerate(indices)}
self.cov = cov
self.cholesky = scipy.linalg.cholesky(self.cov, lower=True)
self.inv_cov = np.linalg.inv(cov)

self.state = State.READY

@final
def get_cov(self) -> npt.NDArray[np.float64]:
"""Gets the current covariance matrix."""
def get_cov(self, statistic: Optional[Statistic] = None) -> npt.NDArray[np.float64]:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We would prefer to pass the numpy array of indices that corresponds to the sub-matrix desired.
This would allow the caller to obtain the sub-matrix for two or more statistics, when that is desired.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The application I wrote this for was to get error bars when plotting the data vector. The idea was specifically to abstract away the indices and instead use the statistics, since that's what the user interacts with. I see the use of passing a list of statistics though, to get their corresponding sub-matrix.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will expand the interface to have a single method get_cov that can accept:

  1. a single Statistic
  2. a list of Statistic
  3. a single np.ndarray (the indices)
  4. a list of np.ndarray (a list of indices)

In the case when a stat or list of stats is passed in, we need also to make sure that the stat (or all the stats) that have been passed are in the likelihood object on which we've called the get_cov method. We will make the code verify this.

We also have to specify the order in which entries appear in the returned matrix. We propose to respect the order of the entries of the list of statistics (or of numpy arrays), so that the user-specified order of the list controls the ordering of the elements in the returned matrix result, rather than the order of the entires in the SACC data object controlling the ordering of the entries in the returned matrix.

For example, if we pass a list of stats of length 3 (note the order of the entries in this passed to get_cov:
stats1 -> 0:9, stats2 -> 10:19, stats3 -> 20:29 => get_cov([stats1,stats3,stats2]) -> 0:9 + 20:29 + 10:19

"""Gets the current covariance matrix.

:param statistic: The statistic for which the sub-covariance matrix
should be return. If not specified, return the covariance of all
statistics.
"""
assert self._is_ready(), "read() must be called before get_cov()"
assert self.cov is not None
if statistic is not None:
assert statistic.sacc_indices is not None
assert self.cov_index_map is not None
idx = [self.cov_index_map[idx] for idx in statistic.sacc_indices]
# We do not change the state.
return self.cov[np.ix_(idx, idx)]
# We do not change the state.
return self.cov

Expand All @@ -129,11 +168,8 @@ def get_data_vector(self) -> npt.NDArray[np.float64]:
order."""
assert self._is_ready(), "read() must be called before get_data_vector()"

data_vector_list: List[npt.NDArray[np.float64]] = [
stat.get_data_vector() for stat in self.statistics
]
# We do not change the state.
return np.concatenate(data_vector_list)
assert self.data_vector is not None
return self.data_vector

@final
def compute_theory_vector(self, tools: ModelingTools) -> npt.NDArray[np.float64]:
Expand All @@ -148,8 +184,30 @@ def compute_theory_vector(self, tools: ModelingTools) -> npt.NDArray[np.float64]
theory_vector_list: List[npt.NDArray[np.float64]] = [
stat.compute_theory_vector(tools) for stat in self.statistics
]
# We do not change the state
return np.concatenate(theory_vector_list)
self.computed_theory_vector = True
self.theory_vector = np.concatenate(theory_vector_list)

return self.theory_vector

@final
def get_theory_vector(self) -> npt.NDArray[np.float64]:
"""Get the theory vector from all statistics and concatenate in the right
order."""

assert (
self.state == State.UPDATED
), "update() must be called before get_theory_vector()"

if not self.computed_theory_vector:
raise RuntimeError(
"The theory vector has not been computed yet. "
"Call compute_theory_vector first."
)
assert self.theory_vector is not None, (
"Implementation error, "
"computed_theory_vector is True but theory_vector is None"
)
return self.theory_vector

@final
def compute(
Expand Down Expand Up @@ -186,9 +244,6 @@ def compute_chisq(self, tools: ModelingTools) -> float:
assert len(data_vector) == len(theory_vector)
residuals = data_vector - theory_vector

self.predicted_data_vector: npt.NDArray[np.float64] = theory_vector
self.measured_data_vector: npt.NDArray[np.float64] = data_vector

x = scipy.linalg.solve_triangular(self.cholesky, residuals, lower=True)
chisq = np.dot(x, x)

Expand All @@ -198,3 +253,35 @@ def compute_chisq(self, tools: ModelingTools) -> float:
def _is_ready(self) -> bool:
"""Return True if the state is either READY or UPDATED."""
return self.state in (State.READY, State.UPDATED)

def make_realization(
self, sacc_data: sacc.Sacc, add_noise: bool = True, strict: bool = True
) -> sacc.Sacc:
new_sacc = sacc_data.copy()

sacc_indices_list = []
for stat in self.statistics:
assert stat.statistic.sacc_indices is not None
sacc_indices_list.append(stat.statistic.sacc_indices.copy())

sacc_indices = np.concatenate(sacc_indices_list)

if add_noise:
new_data_vector = self.make_realization_vector()
else:
new_data_vector = self.get_theory_vector()

assert len(sacc_indices) == len(new_data_vector)

if strict:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider collapsing the nested ifs into a single if with multiple conditions.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll deal with this in a latter issue.

if set(sacc_indices.tolist()) != set(sacc_data.indices()):
raise RuntimeError(
"The predicted data does not cover all the data in the "
"sacc object. To write only the calculated predictions, "
"set strict=False."
)

for prediction_idx, sacc_idx in enumerate(sacc_indices):
new_sacc.data[sacc_idx].value = new_data_vector[prediction_idx]

return new_sacc
10 changes: 10 additions & 0 deletions firecrown/likelihood/gauss_family/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

from __future__ import annotations
import numpy as np

from .gauss_family import GaussFamily
from ...modeling_tools import ModelingTools
Expand All @@ -15,3 +16,12 @@ def compute_loglike(self, tools: ModelingTools):
"""Compute the log-likelihood."""

return -0.5 * self.compute_chisq(tools)

def make_realization_vector(self) -> np.ndarray:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should be checking pre- and post-conditions on self.state in every method.
Consider introducing a decorator to solve any pylint complaints about duplicated code.

theory_vector = self.get_theory_vector()
assert self.cholesky is not None
new_data_vector = theory_vector + np.dot(
self.cholesky, np.random.randn(len(theory_vector))
)

return new_data_vector
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def get_data_vector(self) -> DataVector:
assert self.data_vector is not None
return self.data_vector

def compute_theory_vector(self, tools: ModelingTools) -> TheoryVector:
def _compute_theory_vector(self, tools: ModelingTools) -> TheoryVector:
assert tools.cluster_abundance is not None

theory_vector_list: List[float] = []
Expand Down
Loading
Loading