diff --git a/experimental_env/analysis/analyze_summarizers/error_summarizer.py b/experimental_env/analysis/analyze_summarizers/error_summarizer.py index 9a035752..4d512f7c 100644 --- a/experimental_env/analysis/analyze_summarizers/error_summarizer.py +++ b/experimental_env/analysis/analyze_summarizers/error_summarizer.py @@ -38,13 +38,14 @@ def calculate(self, results: list[ExperimentDescription]) -> tuple: errors.append(error) - mean = np.sum(errors) / len(errors) - standart_deviation = np.sqrt(np.sum([(x - mean) ** 2 for x in errors]) / len(errors)) + if not errors: + return 0, 0, 0 - errors.sort() - median = errors[len(errors) // 2] + mean = np.mean(errors) + std = np.std(errors) + median = np.median(errors) - return float(mean), float(standart_deviation), float(median) + return float(mean), float(std), float(median) def analyze_method(self, results: list[ExperimentDescription], method: str): mean, deviation, median = self.calculate(results) diff --git a/experimental_env/experiment/estimators.py b/experimental_env/experiment/estimators.py index 4aa8dc0c..2c608458 100644 --- a/experimental_env/experiment/estimators.py +++ b/experimental_env/experiment/estimators.py @@ -90,7 +90,7 @@ def __init__(self, brkpointer, dst_checker): @property def name(self): - return "LM-EM" + return "ELM" def _helper(self, problem: OrderedProblem): """ diff --git a/experimental_env/experiment/experiment_executors/abstract_executor.py b/experimental_env/experiment/experiment_executors/abstract_executor.py index 0f998704..27575811 100644 --- a/experimental_env/experiment/experiment_executors/abstract_executor.py +++ b/experimental_env/experiment/experiment_executors/abstract_executor.py @@ -1,5 +1,6 @@ """A module that provides an abstract class for performing the 2nd stage of the experiment""" +import random import warnings from abc import ABC, abstractmethod from pathlib import Path @@ -20,7 +21,7 @@ class AExecutor(ABC): as well as the implementation of the execute method, to implement the 2nd stage of the experiment. """ - def __init__(self, path: Path, cpu_count: int, seed): + def __init__(self, path: Path, cpu_count: int, seed: int): """ Class constructor @@ -31,6 +32,8 @@ def __init__(self, path: Path, cpu_count: int, seed): self._out_dir = path self._cpu_count = cpu_count self._seed = seed + + random.seed(self._seed) np.random.seed(self._seed) @abstractmethod diff --git a/experimental_env/experiment/experiment_executors/random_executor.py b/experimental_env/experiment/experiment_executors/random_executor.py index c520e804..9e6d5774 100644 --- a/experimental_env/experiment/experiment_executors/random_executor.py +++ b/experimental_env/experiment/experiment_executors/random_executor.py @@ -19,7 +19,7 @@ def init_problems(self, ds_descriptions, models): return [ Problem( descr.samples, - RandomMixtureGenerator(self._seed).create_mixture(models), + RandomMixtureGenerator().create_mixture(models), ) for i, descr in enumerate(ds_descriptions) ] diff --git a/experimental_env/experiment/experiment_executors/standart_executor.py b/experimental_env/experiment/experiment_executors/standart_executor.py index b8cdfdab..91e5a00b 100644 --- a/experimental_env/experiment/experiment_executors/standart_executor.py +++ b/experimental_env/experiment/experiment_executors/standart_executor.py @@ -19,7 +19,7 @@ def init_problems(self, ds_descriptions, models): return [ Problem( descr.samples, - StandartMixtureGenerator(self._seed).create_mixture(models), + StandartMixtureGenerator().create_mixture(models), ) for i, descr in enumerate(ds_descriptions) ] diff --git a/experimental_env/mixture_generators/abstract_generator.py b/experimental_env/mixture_generators/abstract_generator.py index fa1f6029..4a3f904f 100644 --- a/experimental_env/mixture_generators/abstract_generator.py +++ b/experimental_env/mixture_generators/abstract_generator.py @@ -1,6 +1,5 @@ """A module that provides an abstract class for generating a mixture.""" -import random from abc import ABC, abstractmethod from mpest import Distribution, MixtureDistribution @@ -12,9 +11,6 @@ class AMixtureGenerator(ABC): An abstract class for generating mixtures. """ - def __init__(self, seed: int = 42): - random.seed(seed) - @abstractmethod def generate_priors(self, models: list[type[AModel]]) -> list[float | None]: """ diff --git a/experimental_env/mixture_generators/utils.py b/experimental_env/mixture_generators/utils.py index a8d7090b..664dab7a 100644 --- a/experimental_env/mixture_generators/utils.py +++ b/experimental_env/mixture_generators/utils.py @@ -16,8 +16,8 @@ def generate_standart_params(models: list[type[AModel]]) -> list[Distribution]: params = [1.0] elif m == GaussianModel: params = [0.0, 1.0] - else: - params = [1.0, 1.5] + else: # Weibull + params = [1.0, 1.0] dists.append(Distribution.from_params(m, params)) @@ -34,7 +34,7 @@ def generate_uniform_params(models: list[type[AModel]]) -> list[Distribution]: params = [uniform(0.1, 5.0)] elif m == GaussianModel: params = [uniform(-5.0, 5.0), uniform(0.1, 5.0)] - else: + else: # Weibull params = [uniform(0.1, 5.0), uniform(0.1, 5.0)] dists.append(Distribution.from_params(m, params)) diff --git a/experimental_env/preparation/dataset_generator.py b/experimental_env/preparation/dataset_generator.py index b171987d..e60e522f 100644 --- a/experimental_env/preparation/dataset_generator.py +++ b/experimental_env/preparation/dataset_generator.py @@ -26,9 +26,11 @@ def __init__(self, seed: int = 42): """ Setting seed for determined result. """ - random.seed(seed) self._seed = seed + random.seed(self._seed) + np.random.seed(self._seed) + def generate( self, samples_size: int, @@ -59,7 +61,6 @@ class ConcreteDatasetGenerator: """ def __init__(self, seed: int = 42): - np.random.seed(seed) self._dists: list[Distribution] = [] self._priors: list[float | None] = [] diff --git a/tests/core/__init__.py b/tests/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/core/test_distribution.py b/tests/core/test_distribution.py new file mode 100644 index 00000000..73617b43 --- /dev/null +++ b/tests/core/test_distribution.py @@ -0,0 +1,210 @@ +from unittest.mock import MagicMock, Mock + +import numpy as np +import pytest +from hypothesis import given +from hypothesis import strategies as st + +from mpest.core.distribution import Distribution +from mpest.models import AModel, AModelWithGenerator + + +@st.composite +def valid_params(draw, min_size=1, max_size=5): + size = draw(st.integers(min_value=min_size, max_value=max_size)) + params_list = draw( + st.lists( + st.floats(min_value=-100, max_value=100, allow_nan=False, allow_infinity=False), + min_size=size, + max_size=size, + ) + ) + return np.array(params_list) + + +@st.composite +def valid_x(draw): + return draw(st.floats(min_value=-100, max_value=100, allow_nan=False, allow_infinity=False)) + + +@st.composite +def valid_size(draw): + return draw(st.integers(min_value=1, max_value=100)) + + +class MockModel(AModel): + @property + def name(self): + return "MockModel" + + def pdf(self, x, params): + return 0.1 * x * sum(params) + + def lpdf(self, x, params): + return np.log(self.pdf(x, params)) + + def params_convert_to_model(self, params): + return params + + def params_convert_from_model(self, params): + return params + + +class MockModelWithGenerator(AModelWithGenerator): + @property + def name(self): + return "MockModelWithGenerator" + + def pdf(self, x, params): + return 0.1 * x * sum(params) + + def lpdf(self, x, params): + return np.log(self.pdf(x, params)) + + def params_convert_to_model(self, params): + return params + + def params_convert_from_model(self, params): + return params + + def generate(self, params, size=1, **kwargs): + return np.random.uniform(0, 1, size=size) + + +class TestModuleDistribution: + def test_init(self): + model = Mock() + params = np.array([1.0, 2.0]) + + dist = Distribution(model=model, params=params) + + assert dist._model is model + assert np.array_equal(dist._params, params) + + def test_from_params(self): + MockModelClass = Mock() + mock_instance = Mock() + MockModelClass.return_value = mock_instance + params = [1.0, 2.0] + + dist = Distribution.from_params(MockModelClass, params) + + MockModelClass.assert_called_once() + assert dist._model is mock_instance + assert np.array_equal(dist._params, np.array(params)) + + def test_model_property(self): + model = Mock() + params = np.array([1.0, 2.0]) + + dist = Distribution(model=model, params=params) + + assert dist.model is model + + def test_params_property(self): + model = Mock() + params = np.array([1.0, 2.0]) + + dist = Distribution(model=model, params=params) + + assert dist.params is params + assert np.array_equal(dist.params, params) + + def test_has_generator_property_true(self): + model = MagicMock(spec=AModelWithGenerator) + params = np.array([1.0, 2.0]) + + dist = Distribution(model=model, params=params) + + assert dist.has_generator is True + + def test_has_generator_property_false(self): + model = MagicMock(spec=AModel) + params = np.array([1.0, 2.0]) + + dist = Distribution(model=model, params=params) + + assert dist.has_generator is False + + @given(valid_x(), valid_params()) + def test_pdf_calls_model_pdf_correctly(self, x, params): + model = Mock() + return_value = 0.1 + converted_params = np.array([3.0, 4.0]) + model.params_convert_to_model.return_value = converted_params + model.pdf.return_value = return_value + + dist = Distribution(model=model, params=params) + result = dist.pdf(x) + + model.params_convert_to_model.assert_called_once_with(params) + model.pdf.assert_called_once_with(x, converted_params) + assert result == return_value + + @given(valid_size(), valid_params()) + def test_generate_with_generator_model(self, size, params): + model = MagicMock(spec=AModelWithGenerator) + converted_params = np.array([3.0, 4.0]) + model.params_convert_to_model.return_value = converted_params + generated_samples = np.random.uniform(0, 1, size=size) + model.generate.return_value = generated_samples + + dist = Distribution(model=model, params=params) + result = dist.generate(size=size) + + model.params_convert_to_model.assert_called_once_with(params) + model.generate.assert_called_once_with(converted_params, size=size) + assert np.array_equal(result, generated_samples) + + def test_generate_without_generator_raises_typeerror(self): + model = MagicMock(spec=AModel) + params = np.array([1.0, 2.0]) + + dist = Distribution(model=model, params=params) + + with pytest.raises(TypeError): + dist.generate(size=3) + + +class TestIntegrationDistribution: + @given(valid_x(), valid_params()) + def test_pdf_integration(self, x, params): + model = MockModel() + dist = Distribution(model=model, params=params) + + converted_params = model.params_convert_to_model(params) + expected = model.pdf(x, converted_params) + actual = dist.pdf(x) + + assert actual == pytest.approx(expected) + + @given(valid_size(), valid_params()) + def test_generate_integration(self, size, params): + model = MockModelWithGenerator() + + dist = Distribution(model=model, params=params) + result = dist.generate(size=size) + + assert result.shape == (size,) + assert result.dtype == np.float64 + assert np.all(result >= 0) + assert np.all(result < 1) + + def test_generate_without_generator_raises_typeerror_integration(self): + model = MockModel() + params = np.array([1.0, 2.0]) + + dist = Distribution(model=model, params=params) + + with pytest.raises(TypeError): + dist.generate(size=3) + + @given(valid_x(), valid_params()) + def test_pdf_consistent_results(self, x, params): + model = MockModel() + dist = Distribution(model=model, params=params) + + result1 = dist.pdf(x) + result2 = dist.pdf(x) + + assert result1 == pytest.approx(result2) diff --git a/tests/core/test_mixture_distribution.py b/tests/core/test_mixture_distribution.py new file mode 100644 index 00000000..f74e5370 --- /dev/null +++ b/tests/core/test_mixture_distribution.py @@ -0,0 +1,408 @@ +from unittest.mock import MagicMock, call, patch + +import numpy as np +import pytest +from hypothesis import given +from hypothesis import strategies as st + +from mpest import Distribution +from mpest.core.mixture_distribution import DistributionInMixture, MixtureDistribution +from mpest.models import AModel, AModelWithGenerator + + +def valid_size(): + return st.integers(min_value=1, max_value=100) + + +def valid_x(): + return st.floats(min_value=-100, max_value=100, allow_nan=False, allow_infinity=False) + + +def valid_params(): + return st.lists( + st.floats(min_value=-100, max_value=100, allow_nan=False, allow_infinity=False), min_size=1, max_size=5 + ).map(np.array) + + +def valid_prior_probability(): + return st.one_of(st.none(), st.floats(min_value=0.0, max_value=1.0, exclude_min=False, exclude_max=False)) + + +def valid_distributions(): + return st.lists(st.builds(Distribution, model=st.just(MockModel()), params=valid_params()), min_size=1, max_size=5) + + +class MockModel(AModel): + @property + def name(self): + return "MockModel" + + def pdf(self, x, params): + return x * params[0] + + def lpdf(self, x, params): + return np.log(self.pdf(x, params)) + + def params_convert_from_model(self, params): + return params + + def params_convert_to_model(self, params): + return params + + +class MockModelWithGenerator(AModelWithGenerator): + @property + def name(self): + return "MockModelWithGenerator" + + def pdf(self, x, params): + return x * params[0] + + def lpdf(self, x, params): + return np.log(self.pdf(x, params)) + + def params_convert_from_model(self, params): + return params + + def params_convert_to_model(self, params): + return params + + def generate(self, params, normalized=True, size=1): + return np.random.uniform(0, 1, size=size) + + +class TestDistributionInMixture: + @given(valid_params(), valid_prior_probability()) + def test_init(self, params, prior_probability): + model = MagicMock(spec=AModel) + dist = DistributionInMixture(model=model, params=params, prior_probability=prior_probability) + + assert dist.params is params + assert dist.model is model + assert dist.prior_probability == prior_probability + + @given(valid_params(), valid_prior_probability()) + def test_prior_probability_property(self, params, prior_probability): + model = MagicMock(spec=AModel) + dist = DistributionInMixture(model=model, params=params, prior_probability=prior_probability) + + assert dist.prior_probability == prior_probability + + @given(valid_x(), valid_params()) + def test_pdf_with_none_prior_probability(self, x, params): + model = MagicMock(spec=AModel) + dist = DistributionInMixture(model=model, params=params, prior_probability=None) + + result = dist.pdf(x) + + model.params_convert_to_model.assert_not_called() + model.pdf.assert_not_called() + assert result == 0.0 + + @given(valid_x(), valid_params(), st.floats(min_value=0.01, max_value=1.0)) + def test_pdf_with_float_prior_probability(self, x, params, prior_probability): + model = MagicMock(spec=AModel) + converted_params = np.array([3.0, 4.0]) + model.params_convert_to_model.return_value = converted_params + return_value = 0.5 + model.pdf.return_value = return_value + + dist = DistributionInMixture(model=model, params=params, prior_probability=prior_probability) + result = dist.pdf(x) + + model.params_convert_to_model.assert_called_once_with(params) + model.pdf.assert_called_once_with(x, converted_params) + assert result == pytest.approx(prior_probability * return_value) + + +class TestIntegrationDistributionInMixture: + @given(valid_x(), valid_params(), st.floats(min_value=0.01, max_value=1.0)) + def test_pdf_integration(self, x, params, prior_probability): + model = MockModel() + dist = DistributionInMixture(model=model, params=params, prior_probability=prior_probability) + + converted_params = model.params_convert_to_model(params) + expected = prior_probability * model.pdf(x, converted_params) + actual = dist.pdf(x) + + assert actual == pytest.approx(expected) + + @given(valid_x(), valid_params()) + def test_pdf_with_none_prior_probability_integration(self, x, params): + model = MockModel() + dist = DistributionInMixture(model=model, params=params, prior_probability=None) + + actual = dist.pdf(x) + + assert actual == 0.0 + + +class TestMixtureDistribution: + def test_init(self): + mock_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + + with patch.object(MixtureDistribution, "_normalize") as mock_normalize: + mixture = MixtureDistribution(distributions=mock_distributions) # noqa: F841 + mock_normalize.assert_called_once() + + def test_iter(self): + original_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + new_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + + with patch.object(MixtureDistribution, "_normalize"): + mixture = MixtureDistribution(distributions=original_distributions) + mixture._distributions = new_distributions + + iterated_distributions = list(mixture) + assert iterated_distributions == new_distributions + + def test_getitem(self): + original_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + new_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + + with patch.object(MixtureDistribution, "_normalize"): + mixture = MixtureDistribution(distributions=original_distributions) + mixture._distributions = new_distributions + + for i in range(len(new_distributions)): + assert mixture[i] == new_distributions[i] + + def test_len(self): + original_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + new_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + + with patch.object(MixtureDistribution, "_normalize"): + mixture = MixtureDistribution(distributions=original_distributions) + mixture._distributions = new_distributions + + assert len(mixture) == len(new_distributions) + + @given(valid_x()) + def test_pdf(self, x): + original_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + new_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + pdf_values = [0.1, 0.2, 0.3] + + for dist, val in zip(new_distributions, pdf_values): + dist.pdf.return_value = val + + with patch.object(MixtureDistribution, "_normalize"): + mixture = MixtureDistribution(distributions=original_distributions) + mixture._distributions = new_distributions + + result = mixture.pdf(x) + + for dist in new_distributions: + dist.pdf.assert_called_once_with(x) + + assert result == pytest.approx(sum(pdf_values)) + + def test_has_generator_all_true(self): + original_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + new_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + + for dist in new_distributions: + dist.prior_probability = 0.3 + dist.model = MagicMock(spec=AModelWithGenerator) + + with patch.object(MixtureDistribution, "_normalize"): + mixture = MixtureDistribution(distributions=original_distributions) + mixture._distributions = new_distributions + + assert mixture.has_generator is True + + def test_has_generator_one_false(self): + original_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + new_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + + for i, dist in enumerate(new_distributions): + dist.prior_probability = 0.3 + if i == 1: + dist.model = MagicMock(spec=AModel) + else: + dist.model = MagicMock(spec=AModelWithGenerator) + + with patch.object(MixtureDistribution, "_normalize"): + mixture = MixtureDistribution(distributions=original_distributions) + mixture._distributions = new_distributions + + assert mixture.has_generator is False + + def test_has_generator_with_none_probability(self): + original_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + new_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + + new_distributions[0].prior_probability = None + new_distributions[0].model = MagicMock(spec=AModel) + + for dist in new_distributions[1:]: + dist.prior_probability = 0.5 + dist.model = MagicMock(spec=AModelWithGenerator) + + with patch.object(MixtureDistribution, "_normalize"): + mixture = MixtureDistribution(distributions=original_distributions) + mixture._distributions = new_distributions + + assert mixture.has_generator is True + + @given(valid_size()) + def test_generate(self, size): + original_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + new_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + probabilities = [0.3, 0.5, 0.2] + + for dist, prob in zip(new_distributions, probabilities): + dist.prior_probability = prob + dist.has_generator = True + dist.generate.return_value = np.array([0.1]) + + with ( + patch.object(MixtureDistribution, "_normalize"), + patch("numpy.random.choice") as mock_choice, + patch("numpy.random.shuffle") as mock_shuffle, + ): + mock_choice.return_value = np.array([0, 1, 1, 2]) + + mixture = MixtureDistribution(distributions=original_distributions) + mixture._distributions = new_distributions + mixture.generate(size=size) + + mock_choice.assert_called_once() + np.testing.assert_array_equal(mock_choice.call_args[0][0], [0, 1, 2]) + np.testing.assert_array_almost_equal(mock_choice.call_args[1]["p"], probabilities) + assert mock_choice.call_args[1]["size"] == size + + counts = [1, 2, 1] + for i, (dist, count) in enumerate(zip(new_distributions, counts)): + if count > 0: + dist.generate.assert_called_once_with(count) + + mock_shuffle.assert_called_once() + + def test_generate_without_generator_raises_typeerror(self): + original_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(2)] + new_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(2)] + + new_distributions[0].prior_probability = 0.5 + new_distributions[0].has_generator = True + + new_distributions[1].prior_probability = 0.5 + new_distributions[1].has_generator = False + + with patch.object(MixtureDistribution, "_normalize"): + mixture = MixtureDistribution(distributions=original_distributions) + mixture._distributions = new_distributions + + with pytest.raises(TypeError): + mixture.generate(size=3) + + def test_normalize_creates_new_distributions(self): + dists_count = 2 + mock_dist1 = MagicMock(spec=DistributionInMixture) + mock_dist1.prior_probability = 2.0 + mock_dist1.model = MagicMock(name="model1") + mock_dist1.params = np.array([1.0]) + + mock_dist2 = MagicMock(spec=DistributionInMixture) + mock_dist2.prior_probability = 3.0 + mock_dist2.model = MagicMock(name="model2") + mock_dist2.params = np.array([2.0]) + + with patch("mpest.core.mixture_distribution.DistributionInMixture") as mock_constructor: + mixture = MixtureDistribution([mock_dist1, mock_dist2]) # noqa: F841 + + assert mock_constructor.call_count == dists_count + assert mock_constructor.call_args_list[0] == call(mock_dist1.model, mock_dist1.params, 2.0 / 5.0) + assert mock_constructor.call_args_list[1] == call(mock_dist2.model, mock_dist2.params, 3.0 / 5.0) + + def test_distributions_property(self): + original_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + new_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)] + + with patch.object(MixtureDistribution, "_normalize"): + mixture = MixtureDistribution(distributions=original_distributions) + mixture._distributions = new_distributions + + assert mixture.distributions == new_distributions + + +class TestIntegrationMixtureDistribution: + def test_init_and_normalize(self): + model = MockModel() + dist1 = DistributionInMixture(model=model, params=np.array([1.0]), prior_probability=2.0) + dist2 = DistributionInMixture(model=model, params=np.array([2.0]), prior_probability=3.0) + dist3 = DistributionInMixture(model=model, params=np.array([3.0]), prior_probability=None) + + mixture = MixtureDistribution([dist1, dist2, dist3]) + + assert mixture[0].prior_probability == pytest.approx(2.0 / 5.0) + assert mixture[1].prior_probability == pytest.approx(3.0 / 5.0) + assert mixture[2].prior_probability is None + + @given( + st.lists(valid_params(), min_size=2, max_size=5), + st.lists(st.floats(min_value=0.01, max_value=1.0), min_size=2, max_size=5), + ) + def test_from_distributions_integration(self, params_list, probabilities): + if len(params_list) != len(probabilities): + probabilities = ( + probabilities[: len(params_list)] + if len(probabilities) > len(params_list) + else probabilities + [0.1] * (len(params_list) - len(probabilities)) + ) + + model = MockModel() + distributions = [Distribution(model=model, params=p) for p in params_list] + + mixture = MixtureDistribution.from_distributions(distributions, probabilities) + + assert len(mixture) == len(distributions) + + total_prob = sum(probabilities) + for i, (dist, prob) in enumerate(zip(mixture, probabilities)): + assert isinstance(dist, DistributionInMixture) + assert dist.model is model + np.testing.assert_array_equal(dist.params, params_list[i]) + assert dist.prior_probability == pytest.approx(prob / total_prob) + + @given(valid_x(), st.integers(min_value=2, max_value=5)) + def test_pdf_integration(self, x, n): + model = MockModel() + params = [np.array([float(i)]) for i in range(1, n + 1)] + priors = [float(i) for i in range(1, n + 1)] + + distributions = [Distribution(model=model, params=p) for p in params] + mixture = MixtureDistribution.from_distributions(distributions, priors) + + total_prior = sum(priors) + expected = sum((prior / total_prior) * model.pdf(x, param) for prior, param in zip(priors, params)) + + assert mixture.pdf(x) == pytest.approx(expected) + + @given(valid_size()) + def test_generate_integration(self, size): + model_with_gen = MockModelWithGenerator() + + dist1 = Distribution(model=model_with_gen, params=np.array([1.0])) + dist2 = Distribution(model=model_with_gen, params=np.array([2.0])) + + mixture = MixtureDistribution.from_distributions([dist1, dist2], [0.3, 0.7]) + result = mixture.generate(size=size) + + assert isinstance(result, np.ndarray) + assert result.shape == (size,) + assert result.dtype == np.float64 + assert np.all(result >= 0) + assert np.all(result < 1) + + def test_generate_without_generator_integration(self): + model = MockModel() + model_with_gen = MockModelWithGenerator() + + dist1 = Distribution(model=model, params=np.array([1.0])) + dist2 = Distribution(model=model_with_gen, params=np.array([2.0])) + + mixture = MixtureDistribution.from_distributions([dist1, dist2], [0.3, 0.7]) + + with pytest.raises(TypeError): + mixture.generate(size=3) diff --git a/tests/core/test_problem.py b/tests/core/test_problem.py new file mode 100644 index 00000000..a21f0aea --- /dev/null +++ b/tests/core/test_problem.py @@ -0,0 +1,68 @@ +from unittest.mock import Mock + +import numpy as np +from hypothesis import given +from hypothesis import strategies as st + +from mpest.core.problem import Problem + + +@st.composite +def valid_samples(draw, min_size=1, max_size=100): + size = draw(st.integers(min_value=min_size, max_value=max_size)) + samples_list = draw( + st.lists( + st.floats(min_value=-100, max_value=100, allow_nan=False, allow_infinity=False), + min_size=size, + max_size=size, + ) + ) + + return np.array(samples_list) + + +@st.composite +def valid_distributions(draw, min_count=1, max_count=5): + count = draw(st.integers(min_value=min_count, max_value=max_count)) + distributions = [] + for _ in range(count): + mock_dist = Mock() + mock_dist.name = draw( + st.text(min_size=1, max_size=10, alphabet=st.characters(whitelist_categories=("Lu", "Ll"))) + ) + distributions.append(mock_dist) + return distributions + + +class TestProblem: + def test_get_instance(self): + samples = np.array([1.0, 2.0, 3.0]) + distributions = [Mock(), Mock()] + + problem = Problem(samples=samples, distributions=distributions) + + assert isinstance(problem, Problem) + + @given(valid_samples(), valid_distributions()) + def test_init(self, samples, distributions): + """Тест инициализации Problem.""" + problem = Problem(samples=samples, distributions=distributions) + + assert problem.samples is samples + assert problem.distributions is distributions + + @given(valid_samples(), valid_distributions()) + def test_samples_property(self, samples, distributions): + problem = Problem(samples=samples, distributions=distributions) + + assert problem.samples is samples + assert np.array_equal(problem.samples, samples) + + @given(valid_samples(), valid_distributions()) + def test_distributions_property(self, samples, distributions): + problem = Problem(samples=samples, distributions=distributions) + + assert problem.distributions is distributions + assert len(problem.distributions) == len(distributions) + for i, dist in enumerate(distributions): + assert problem.distributions[i] is dist diff --git a/tests/experimental_env/__init__.py b/tests/experimental_env/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/experimental_env/test_reproducibility_expenv.py b/tests/experimental_env/test_reproducibility_expenv.py new file mode 100644 index 00000000..70557844 --- /dev/null +++ b/tests/experimental_env/test_reproducibility_expenv.py @@ -0,0 +1,146 @@ +import tempfile +from pathlib import Path + +import numpy as np + +from experimental_env.experiment.estimators import LikelihoodEstimator, LMomentsEstimator +from experimental_env.experiment.experiment_description import StepDescription +from experimental_env.experiment.experiment_executors.random_executor import RandomExperimentExecutor +from experimental_env.experiment.experiment_parser import ExperimentParser +from experimental_env.preparation.dataset_generator import RandomDatasetGenerator +from experimental_env.preparation.dataset_parser import SamplesDatasetParser +from mpest import MixtureDistribution +from mpest.em.breakpointers import StepCountBreakpointer +from mpest.em.distribution_checkers import FiniteChecker, PriorProbabilityThresholdChecker +from mpest.models import ExponentialModel, GaussianModel, WeibullModelExp + + +def compare_mixtures(mxt_1: MixtureDistribution, mxt_2: MixtureDistribution): + return all( + (np.array_equal(d1.prior_probability, d2.prior_probability, equal_nan=True)) + and np.array_equal(d1.params, d2.params, equal_nan=True) + and (type(d1.model) is type(d2.model)) + for d1, d2 in zip(mxt_1, mxt_2) + ) + + +def compare_mixtures_without_priors(mxt_1: MixtureDistribution, mxt_2: MixtureDistribution): + return all( + np.array_equal(d1.params, d2.params, equal_nan=True) and (type(d1.model) is type(d2.model)) + for d1, d2 in zip(mxt_1, mxt_2) + ) + + +def compare_steps(step_descr_1: list[StepDescription], step_descr_2: list[StepDescription]): + return all( + compare_mixtures(step_1.result_mixture, step_2.result_mixture) + for step_1, step_2 in zip(step_descr_1, step_descr_2) + ) + + +def stage_1(working_dir): + WORKING_DIR = Path(working_dir) / "stage_1" + SAMPLES_SIZE = 200 + + r_generator = RandomDatasetGenerator(42) + mixtures = [ + [GaussianModel, WeibullModelExp], + [ExponentialModel, GaussianModel], + [ExponentialModel, ExponentialModel], + ] + for models in mixtures: + r_generator.generate(SAMPLES_SIZE, models, WORKING_DIR, exp_count=5) + + +def stage_2(working_dir): + WORKING_DIR = Path(working_dir) / "stage_2" + SOURCE_DIR = Path(working_dir) / "stage_1" + parser = SamplesDatasetParser() + datasets = parser.parse(SOURCE_DIR) + + executor = RandomExperimentExecutor(WORKING_DIR, 5, 43) + executor.execute( + datasets, + LikelihoodEstimator( + StepCountBreakpointer(max_step=16), + FiniteChecker() + PriorProbabilityThresholdChecker(), + ), + ) + + executor = RandomExperimentExecutor(WORKING_DIR, 5, 43) + executor.execute( + datasets, + LMomentsEstimator( + StepCountBreakpointer(max_step=16), + FiniteChecker() + PriorProbabilityThresholdChecker(), + ), + ) + + +def test_expenv_scenario(): + ds_1 = None + ds_2 = None + + ELM_exp_1 = None + EM_exp_1 = None + ELM_exp_2 = None + EM_exp_2 = None + + with tempfile.TemporaryDirectory() as tmpdir: + stage_1(tmpdir) + stage_1_dir = Path(tmpdir) / "stage_1" + parser = SamplesDatasetParser() + ds_1 = parser.parse(stage_1_dir) + + stage_2(tmpdir) + ELM_stage_2_dir = Path(tmpdir) / "stage_2" / "ELM" + EM_stage_2_dir = Path(tmpdir) / "stage_2" / "MLE-EM" + ELM_exp_1 = ExperimentParser().parse(ELM_stage_2_dir) + EM_exp_1 = ExperimentParser().parse(EM_stage_2_dir) + + with tempfile.TemporaryDirectory() as tmpdir: + stage_1(tmpdir) + stage_1_dir = Path(tmpdir) / "stage_1" + parser = SamplesDatasetParser() + ds_2 = parser.parse(stage_1_dir) + + stage_2(tmpdir) + ELM_stage_2_dir = Path(tmpdir) / "stage_2" / "ELM" + EM_stage_2_dir = Path(tmpdir) / "stage_2" / "MLE-EM" + ELM_exp_2 = ExperimentParser().parse(ELM_stage_2_dir) + EM_exp_2 = ExperimentParser().parse(EM_stage_2_dir) + + for mxt in ds_1: + res_1, res_2 = ds_1[mxt], ds_2[mxt] + for descr_1, descr_2 in zip(res_1, res_2): + assert np.array_equal(descr_1.samples, descr_2.samples) + assert descr_1.exp_num == descr_2.exp_num + assert compare_mixtures(descr_1.base_mixture, descr_2.base_mixture) + + for mxt in ELM_exp_1: + res_1, res_2 = ELM_exp_1[mxt], EM_exp_1[mxt] + for exp_1, exp_2 in zip(res_1, res_2): + assert compare_mixtures(exp_1.base_mixture, exp_2.base_mixture) + assert compare_mixtures(exp_1.init_mixture, exp_2.init_mixture) + assert not compare_mixtures_without_priors(exp_1.base_mixture, exp_1.init_mixture) + assert not compare_mixtures_without_priors(exp_2.base_mixture, exp_2.init_mixture) + assert exp_1.samples_size == exp_2.samples_size + assert exp_1.exp_num == exp_2.exp_num + + for mxt in ELM_exp_1: + res_1, res_2 = ELM_exp_1[mxt], ELM_exp_2[mxt] + for exp_1, exp_2 in zip(res_1, res_2): + assert compare_mixtures(exp_1.base_mixture, exp_2.base_mixture) + assert compare_mixtures(exp_1.init_mixture, exp_2.init_mixture) + assert compare_steps(exp_1.steps, exp_2.steps) + assert exp_1.samples_size == exp_2.samples_size + assert exp_1.exp_num == exp_2.exp_num + + for mxt in ELM_exp_1: + res_1, res_2 = EM_exp_1[mxt], EM_exp_2[mxt] + for exp_1, exp_2 in zip(res_1, res_2): + assert compare_mixtures(exp_1.base_mixture, exp_2.base_mixture) + assert compare_mixtures(exp_1.init_mixture, exp_2.init_mixture) + assert compare_steps(exp_1.steps, exp_2.steps) + assert exp_1.samples_size == exp_2.samples_size + assert exp_1.exp_num == exp_2.exp_num