diff --git a/presamples/loader.py b/presamples/loader.py index de8ccba..6303dba 100644 --- a/presamples/loader.py +++ b/presamples/loader.py @@ -2,13 +2,14 @@ from .errors import IncompatibleIndices, ConflictingLabels from .indexer import Indexer from .package_interface import IndexedParametersMapping -from .utils import validate_presamples_dirpath, md5 +from .utils import validate_presamples_dirpath from pathlib import Path import itertools import json import numpy as np -import os import wrapt +from collections.abc import Sequence +from collections import defaultdict @wrapt.decorator @@ -276,8 +277,60 @@ def reset_sequential_indices(self): @property def parameters(self): if not hasattr(self, "_parameters"): - self._parameters = [ - IndexedParametersMapping(**metadata) - for metadata in self.parameter_metadata - ] + self._parameters = ConsolidatedIndexedParameterMapping( + [ + IndexedParametersMapping(**metadata) + for metadata in self.parameter_metadata + ] + ) return self._parameters + +class ConsolidatedIndexedParameterMapping(Sequence): + """ todo""" + def __init__(self, list_IPM, warn_on_replacement=False): + self.ipms = list_IPM + assert all([ + isinstance(ipm, IndexedParametersMapping) + for ipm in self.ipms + ]) + + self.consolidate_ipms() + + def __len__(self): + return len(self.ipms) + + def __getitem__(self, i): + return self.ipms[i] + + + def consolidate_ipms(self): + """map parameter names to source package and values""" + self.names = [] + self.ids = {} + self.ipm_mapper = {} + self.replaced = defaultdict(list) + for i, ipm in enumerate(self.ipms): + for n, name in enumerate(ipm.names): + if name not in self.names: + self.names.append(name) + else: + old_ipm = self.ipms[self.ipm_mapper[name]] + ind_index = list(old_ipm.mapping.keys()).index(name) + self.replaced[name].append((old_ipm.ids[ind_index][0], old_ipm.ids[ind_index][1])) + self.ids[name] = ipm.ids[n] + self.ipm_mapper[name] = i + + @property + def consolidated_array(self): + """ Array of values for named parameter + + Each value is taken from the last IndexedParameterMapping object that + contains data on the named parameter. + The used IndexedParameterMapping contains information about the path + to the presamples array, the corresponding mapping for the named + parameter and the current Indexer value. + """ + arr = np.empty(shape=(len(self.names))) + for i, name in enumerate(self.names): + arr[i] = self.ipms[self.ipm_mapper[name]][name] + return arr \ No newline at end of file diff --git a/presamples/package_interface.py b/presamples/package_interface.py index 57e95ba..21f37b6 100644 --- a/presamples/package_interface.py +++ b/presamples/package_interface.py @@ -2,10 +2,9 @@ from .indexer import Indexer from .utils import validate_presamples_dirpath, check_name_conflicts from collections.abc import Mapping +from collections import OrderedDict from pathlib import Path import json -import numpy as np -import os class PresamplesPackage: @@ -106,11 +105,11 @@ def __init__(self, path, resources, package_name, sample_index=0): if obj.get('names') ] check_name_conflicts(name_lists) - self.mapping = { - name: (i, j) + self.mapping = OrderedDict( + (name, (i, j)) for i, lst in enumerate(name_lists) for j, name in enumerate(lst) - } + ) self.ipa = RegularPresamplesArrays([ path / obj['samples']['filepath'] for obj in resources @@ -126,6 +125,10 @@ def values(self): for i, j in self.mapping.values(): yield self.ipa.data[i][j, :] + @property + def names(self): + return list(self.keys()) + def __getitem__(self, key): i, j = self.mapping[key] return self.ipa.data[i][j, :] diff --git a/tests/loader.py b/tests/loader.py index 2d10076..f449d48 100644 --- a/tests/loader.py +++ b/tests/loader.py @@ -70,6 +70,35 @@ def parameters_fixture(): ) yield dirpath +@pytest.fixture +def parameters_fixture_2(): + with tempfile.TemporaryDirectory() as d: + dirpath = Path(d) + s1 = np.array([100, 200]).reshape(2, 1) + s2 = np.array([42]).reshape(1, 1) + n1 = list('AB') + n2 = list('E') + id_, dirpath = create_presamples_package( + parameter_data=[(s1, n1, 'spring'), (s2, n2, 'fall')], + name='nufoo', id_='nubar', dirpath=dirpath + ) + yield dirpath + +@pytest.fixture +def parameters_fixture_3(): + with tempfile.TemporaryDirectory() as d: + dirpath = Path(d) + s1 = np.array([300]).reshape(1, 1) + s2 = np.array([123]).reshape(1, 1) + n1 = list('C') + n2 = list('E') + id_, dirpath = create_presamples_package( + parameter_data=[(s1, n1, 'equinox'), (s2, n2, 'solstice')], + name='nunufoo', id_='nunubar', dirpath=dirpath + ) + yield dirpath + + def test_init(package): mp = PackagesDataLoader([package]) assert not mp.empty @@ -588,3 +617,122 @@ def __init__(self): first = ml.sample_indexers[0].index ml.update_sample_indices() assert ml.sample_indexers[0].index != first + +def test_consolidated_indexed_parameter_arrays(parameters_fixture, parameters_fixture_2, parameters_fixture_3): + mp_1 = PackagesDataLoader([parameters_fixture]) + assert len(mp_1.parameters) == 1 #Only one IPM + assert mp_1.parameters.names == list("ABCDEFG") + # Consolidated array depends on index, but is sure to be one of following: + first_col_arr = np.array([0, 4, 8, 12, 0, 4, 8], dtype=np.float) + possible_consolidated_arrays = [first_col_arr+scalar for scalar in range(0, 4)] + assert any([np.array_equal(mp_1.parameters.consolidated_array, arr) for arr in possible_consolidated_arrays]) + # Values still possible after updating index + mp_1.update_sample_indices() + assert any([np.array_equal(mp_1.parameters.consolidated_array, arr) for arr in possible_consolidated_arrays]) + # All parameter values taken from imp at index 0 + assert all([mp_1.parameters.ipm_mapper[n] == 0 for n in mp_1.parameters.names]) + assert len(mp_1.parameters.replaced) == 0 + all_ids_paths = [mp_1.parameters.ids[name][0] for name in mp_1.parameters.names] + assert all([Path(p) == Path(parameters_fixture) for p in all_ids_paths]), "Got {}, expected {}".format(all_ids_paths, parameters_fixture) + + mp_2 = PackagesDataLoader([parameters_fixture, parameters_fixture_2]) + assert len(mp_2.parameters) == 2 + assert mp_2.parameters.names == list("ABCDEFG") + # Consolidated array depends on index, but is sure to be one of following: + not_replaced_first_col = np.array([8, 12, 4, 8], dtype=np.float) + possible_not_replaced_sample = [not_replaced_first_col+scalar for scalar in range(0, 4)] + not_replaced_indices = [2, 3, 5, 6] + replaced_indices = [0, 1, 4] + # Unreplaced named parameters still in possible values + assert any([np.array_equal(mp_2.parameters.consolidated_array[not_replaced_indices], arr) for arr in possible_not_replaced_sample]), "got this: {}".format(mp_1.parameters.consolidated_array) + # Replaced named parameters have new values + assert np.array_equal(mp_2.parameters.consolidated_array[replaced_indices], np.array([100, 200, 42])) + # Value tests still correct after updating index + mp_2.update_sample_indices() + assert any([np.array_equal(mp_2.parameters.consolidated_array[not_replaced_indices], arr) for arr in possible_not_replaced_sample]) + assert np.array_equal(mp_2.parameters.consolidated_array[replaced_indices], np.array([100, 200, 42])) + + # All parameter values taken from imp at index 0 + assert all( + [mp_2.parameters.ipm_mapper[n] == 0 + for n in mp_2.parameters.names + if n not in mp_2.parameters.replaced.keys() + ] + ) + assert all( + [mp_2.parameters.ipm_mapper[n] == 1 + for n in mp_2.parameters.names + if n in mp_2.parameters.replaced.keys() + ] + ) + + assert len(mp_2.parameters.replaced) == 3 + for replaced_name, replaced_paths in mp_2.parameters.replaced.items(): + assert len(replaced_paths)==1 + assert replaced_paths==[(parameters_fixture, 'foo')] + + all_ids_paths_not_replaced = [ + mp_2.parameters.ids[name][0] + for name in mp_2.parameters.names + if name not in mp_2.parameters.replaced + ] + all_ids_paths_replaced = [ + mp_2.parameters.ids[name][0] + for name in mp_2.parameters.names + if name in mp_2.parameters.replaced + ] + assert all([ + Path(p) == Path(parameters_fixture) for p in all_ids_paths_not_replaced + ] + ) + assert all([ + Path(p) == Path(parameters_fixture_2) for p in all_ids_paths_replaced + ] + ) + + mp_3 = PackagesDataLoader([parameters_fixture, parameters_fixture_2, parameters_fixture_3]) + assert len(mp_3.parameters) == 3 + assert mp_2.parameters.names == list("ABCDEFG") + # Consolidated array depends on index, but is sure to be one of following: + not_replaced_first_col = np.array([12, 4, 8], dtype=np.float) + possible_not_replaced_sample = [not_replaced_first_col + scalar for scalar in range(0, 4)] + not_replaced_indices = [3, 5, 6] + replaced_indices = [0, 1, 2, 4] + # Unreplaced named parameters still in possible values + assert any([ + np.array_equal(mp_3.parameters.consolidated_array[not_replaced_indices], arr) + for arr in possible_not_replaced_sample + ]) + # Replaced named parameters have new values + assert np.array_equal(mp_3.parameters.consolidated_array[replaced_indices], np.array([100, 200, 300, 123])) + # Value tests still correct after updating index + mp_3.update_sample_indices() + assert any([np.array_equal(mp_3.parameters.consolidated_array[not_replaced_indices], arr) for arr in + possible_not_replaced_sample]) + assert np.array_equal(mp_3.parameters.consolidated_array[replaced_indices], np.array([100, 200, 300, 123])) + + # All parameter values taken from imp at index 0 + assert all( + [mp_3.parameters.ipm_mapper[n] == 0 + for n in mp_3.parameters.names + if n not in mp_3.parameters.replaced.keys() + ] + ) + assert all( + [mp_3.parameters.ipm_mapper[n] == 1 + for n in mp_3.parameters.names + if n in list('AB') + ] + ) + assert all( + [mp_3.parameters.ipm_mapper[n] == 2 + for n in mp_3.parameters.names + if n in list('CE') + ] + ) + + assert len(mp_3.parameters.replaced) == 4 + assert mp_3.parameters.replaced['A'] == [(parameters_fixture, 'foo')] + assert mp_3.parameters.replaced['B'] == [(parameters_fixture, 'foo')] + assert mp_3.parameters.replaced['C'] == [(parameters_fixture, 'foo')] + assert mp_3.parameters.replaced['E'] == [(parameters_fixture, 'foo'), (parameters_fixture_2, 'nufoo')]