From 08328f836977974124e6afe2938768e4099c8223 Mon Sep 17 00:00:00 2001 From: yannikschaelte Date: Thu, 27 Feb 2020 14:48:06 +0100 Subject: [PATCH 01/10] allow to compute chi2 values --- petab/calculate.py | 23 +++++++++ tests/test_calculate.py | 100 +++++++++++++++++++++++++++------------- 2 files changed, 90 insertions(+), 33 deletions(-) diff --git a/petab/calculate.py b/petab/calculate.py index de95dab8..1f5a49c4 100644 --- a/petab/calculate.py +++ b/petab/calculate.py @@ -1,5 +1,6 @@ """Functions performing various calculations.""" +import numpy as np import pandas as pd from functools import reduce from typing import List, Union @@ -185,3 +186,25 @@ def evaluate_noise_formula( f"Cannot replace all parameters in noise formula {noise_value} " f"for observable {observable_id}.") return noise_value + + +def calculate_chi2( + measurement_dfs: Union[List[pd.DataFrame], pd.DataFrame], + simulation_dfs: Union[List[pd.DataFrame], pd.DataFrame], + observable_dfs: Union[List[pd.DataFrame], pd.DataFrame], + parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame], + normalize: bool = True, + scale: bool = True +) -> float: + residual_dfs = calculate_residuals( + measurement_dfs, simulation_dfs, observable_dfs, parameter_dfs, + normalize, scale) + chi2s = [calculate_chi2_for_table_from_residuals(df) + for df in residual_dfs] + chi2 = sum(chi2s) + return chi2 + + +def calculate_chi2_for_table_from_residuals( + residual_df: pd.DataFrame) -> float: + return (np.array(residual_df[RESIDUAL])**2).sum() diff --git a/tests/test_calculate.py b/tests/test_calculate.py index 1e8cd7f8..1633cf37 100644 --- a/tests/test_calculate.py +++ b/tests/test_calculate.py @@ -1,14 +1,14 @@ """Tests related to petab.calculate.""" -from petab import calculate_residuals +from petab import calculate_residuals, calculate_chi2 from petab.C import * import pandas as pd import numpy as np import pytest -def test_calculate_residuals(): - """Test calculate.calculate_residuals.""" +def model_simple(): + "Simple model.""" measurement_df = pd.DataFrame(data={ OBSERVABLE_ID: ['obs_a', 'obs_a', 'obs_b', 'obs_b'], SIMULATION_CONDITION_ID: ['c0', 'c1', 'c0', 'c1'], @@ -31,23 +31,15 @@ def test_calculate_residuals(): columns={MEASUREMENT: SIMULATION}) simulation_df[SIMULATION] = [2, 2, 19, 20] - residual_dfs = calculate_residuals( - measurement_df, simulation_df, observable_df, parameter_df) - - assert set(residual_dfs[0][RESIDUAL]) == pytest.approx( - {(2-0)/2, (2-1)/2, (19-20)/3, (20-22)/3}) + expected_residuals = {(2-0)/2, (2-1)/2, (19-20)/3, (20-22)/3} + expected_residuals_nonorm = {2-0, 2-1, 19-20, 20-22} - # don't apply normalization - residual_dfs = calculate_residuals( - measurement_df, simulation_df, observable_df, - parameter_df, normalize=False) + return (measurement_df, observable_df, parameter_df, + simulation_df, expected_residuals, expected_residuals_nonorm) - assert set(residual_dfs[0][RESIDUAL]) == pytest.approx( - {(2-0), (2-1), (19-20), (20-22)}) - -def test_calculate_residuals_replicates(): - """Test calculate.calculate_residuals with replicates.""" +def model_replicates(): + """Model with replicates.""" measurement_df = pd.DataFrame(data={ OBSERVABLE_ID: ['obs_a', 'obs_a'], SIMULATION_CONDITION_ID: ['c0', 'c0'], @@ -70,14 +62,15 @@ def test_calculate_residuals_replicates(): columns={MEASUREMENT: SIMULATION}) simulation_df[SIMULATION] = [2, 2] - residual_dfs = calculate_residuals( - measurement_df, simulation_df, observable_df, parameter_df) + expected_residuals = {(2-0)/2, (2-1)/2} + expected_residuals_nonorm = {2-0, 2-1} - assert set(residual_dfs[0][RESIDUAL]) == pytest.approx({(2-0)/2, (2-1)/2}) + return (measurement_df, observable_df, parameter_df, + simulation_df, expected_residuals, expected_residuals_nonorm) -def test_calculate_residuals_scaling(): - """Test calculate.calculate_residuals with scaling.""" +def model_scalings(): + """Model with scalings.""" measurement_df = pd.DataFrame(data={ OBSERVABLE_ID: ['obs_a', 'obs_a'], SIMULATION_CONDITION_ID: ['c0', 'c0'], @@ -101,16 +94,15 @@ def test_calculate_residuals_scaling(): columns={MEASUREMENT: SIMULATION}) simulation_df[SIMULATION] = [2, 3] - residual_dfs = calculate_residuals( - measurement_df, simulation_df, observable_df, parameter_df) + expected_residuals = {(np.log(2)-np.log(0.5))/2, (np.log(3)-np.log(1))/2} + expected_residuals_nonorm = {np.log(2)-np.log(0.5), np.log(3)-np.log(1)} - assert set(residual_dfs[0][RESIDUAL]) == pytest.approx( - {(np.log(2)-np.log(0.5))/2, (np.log(3)-np.log(1))/2}) + return (measurement_df, observable_df, parameter_df, + simulation_df, expected_residuals, expected_residuals_nonorm) -def test_calculate_residuals_non_numeric_overrides(): - """Test calculate.calculate_residuals with non-numeric noise formula - overrides.""" +def model_non_numeric_overrides(): + """Model with non-numeric overrides.""" measurement_df = pd.DataFrame(data={ OBSERVABLE_ID: ['obs_a', 'obs_a'], SIMULATION_CONDITION_ID: ['c0', 'c0'], @@ -136,8 +128,50 @@ def test_calculate_residuals_non_numeric_overrides(): columns={MEASUREMENT: SIMULATION}) simulation_df[SIMULATION] = [2, 3] - residual_dfs = calculate_residuals( - measurement_df, simulation_df, observable_df, parameter_df) + expected_residuals = {(np.log(2)-np.log(0.5))/(2*7+8+4), + (np.log(3)-np.log(1))/(2*2+3+4)} + expected_residuals_nonorm = {np.log(2)-np.log(0.5), np.log(3)-np.log(1)} + + return (measurement_df, observable_df, parameter_df, + simulation_df, expected_residuals, expected_residuals_nonorm) - assert set(residual_dfs[0][RESIDUAL]) == pytest.approx( - {(np.log(2)-np.log(0.5))/(2*7+8+4), (np.log(3)-np.log(1))/(2*2+3+4)}) + +@pytest.fixture +def models(): + return [model_simple(), model_replicates(), + model_scalings(), model_non_numeric_overrides()] + + +def test_calculate_residuals(models): + """Test calculate.calculate_residuals.""" + for model in models: + (measurement_df, observable_df, parameter_df, simulation_df, + expected_residuals, _) = model + residual_dfs = calculate_residuals( + measurement_df, simulation_df, observable_df, parameter_df) + assert set(residual_dfs[0][RESIDUAL]) == pytest.approx( + expected_residuals) + + +def test_calculate_non_normalized_residuals(models): + """Test calculate.calculate_residuals without normalization.""" + for model in models: + (measurement_df, observable_df, parameter_df, simulation_df, + _, expected_residuals_nonorm) = model + residual_dfs = calculate_residuals( + measurement_df, simulation_df, observable_df, parameter_df, + normalize=False) + assert set(residual_dfs[0][RESIDUAL]) == pytest.approx( + expected_residuals_nonorm) + + +def test_calculate_chi2(models): + """Test calculate.calculate_chi2.""" + for model in models: + (measurement_df, observable_df, parameter_df, simulation_df, + expected_residuals, _) = model + chi2 = calculate_chi2( + measurement_df, simulation_df, observable_df, parameter_df) + + expected = sum(np.array(list(expected_residuals))**2) + assert chi2 == pytest.approx(expected) From bac290dd101d7f79ec8c2be38bf550217a92f5f1 Mon Sep 17 00:00:00 2001 From: yannikschaelte Date: Thu, 27 Feb 2020 14:50:45 +0100 Subject: [PATCH 02/10] add docs --- petab/calculate.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/petab/calculate.py b/petab/calculate.py index 1f5a49c4..30fd3bdc 100644 --- a/petab/calculate.py +++ b/petab/calculate.py @@ -196,6 +196,26 @@ def calculate_chi2( normalize: bool = True, scale: bool = True ) -> float: + """Calculate the chi2 value. + + Arguments: + measurement_dfs: + The problem measurement tables. + simulation_dfs: + Simulation tables corresponding to the measurement tables. + observable_dfs: + The problem observable tables. + parameter_dfs: + The problem parameter tables. + normalize: + Whether to normalize residuals by the noise standard deviation + terms. + scale: + Whether to calculate residuals of scaled values. + + Returns: + chi2: The aggregated chi2 value. + """ residual_dfs = calculate_residuals( measurement_dfs, simulation_dfs, observable_dfs, parameter_dfs, normalize, scale) @@ -207,4 +227,5 @@ def calculate_chi2( def calculate_chi2_for_table_from_residuals( residual_df: pd.DataFrame) -> float: + """Compute chi2 value for a single residual table.""" return (np.array(residual_df[RESIDUAL])**2).sum() From 020a343b855026807f8f59737b0358c7a430b987 Mon Sep 17 00:00:00 2001 From: yannikschaelte Date: Thu, 27 Feb 2020 14:56:14 +0100 Subject: [PATCH 03/10] init llh --- petab/calculate.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/petab/calculate.py b/petab/calculate.py index 30fd3bdc..ea26e32c 100644 --- a/petab/calculate.py +++ b/petab/calculate.py @@ -229,3 +229,17 @@ def calculate_chi2_for_table_from_residuals( residual_df: pd.DataFrame) -> float: """Compute chi2 value for a single residual table.""" return (np.array(residual_df[RESIDUAL])**2).sum() + + +def calculate_llh( + measurement_dfs: Union[List[pd.DataFrame], pd.DataFrame], + simulation_dfs: Union[List[pd.DataFrame], pd.DataFrame], + observable_dfs: Union[List[pd.DataFrame], pd.DataFrame], + parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame], + normalize: bool = True, + scale: bool = True +) -> float: + residual_dfs = calculate_residuals( + measurement_dfs, simulation_dfs, observable_dfs, parameter_dfs, + normalize, scale) + llhs = [calculate_llh_for_table_from_residuals(residual_df, From 28ba529c1cf01fcad00975ae2c370ef74db01129 Mon Sep 17 00:00:00 2001 From: yannikschaelte Date: Thu, 27 Feb 2020 14:57:13 +0100 Subject: [PATCH 04/10] fix w602 errors --- tests/test_calculate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_calculate.py b/tests/test_calculate.py index 1633cf37..f92167e1 100644 --- a/tests/test_calculate.py +++ b/tests/test_calculate.py @@ -142,7 +142,7 @@ def models(): model_scalings(), model_non_numeric_overrides()] -def test_calculate_residuals(models): +def test_calculate_residuals(models): # pylint: disable=W0621 """Test calculate.calculate_residuals.""" for model in models: (measurement_df, observable_df, parameter_df, simulation_df, @@ -153,7 +153,7 @@ def test_calculate_residuals(models): expected_residuals) -def test_calculate_non_normalized_residuals(models): +def test_calculate_non_normalized_residuals(models): # pylint: disable=W0621 """Test calculate.calculate_residuals without normalization.""" for model in models: (measurement_df, observable_df, parameter_df, simulation_df, @@ -165,7 +165,7 @@ def test_calculate_non_normalized_residuals(models): expected_residuals_nonorm) -def test_calculate_chi2(models): +def test_calculate_chi2(models): # pylint: disable=W0621 """Test calculate.calculate_chi2.""" for model in models: (measurement_df, observable_df, parameter_df, simulation_df, From e38cf461fd94dc8705793c99997c4e9d6da31520 Mon Sep 17 00:00:00 2001 From: yannikschaelte Date: Thu, 27 Feb 2020 14:58:02 +0100 Subject: [PATCH 05/10] fix docstring coverage --- tests/test_calculate.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_calculate.py b/tests/test_calculate.py index f92167e1..63d59a65 100644 --- a/tests/test_calculate.py +++ b/tests/test_calculate.py @@ -138,6 +138,7 @@ def model_non_numeric_overrides(): @pytest.fixture def models(): + """Test model collection covering different features.""" return [model_simple(), model_replicates(), model_scalings(), model_non_numeric_overrides()] From 1272adc206c6be368b1c98260a1b2446f88165bf Mon Sep 17 00:00:00 2001 From: yannikschaelte Date: Thu, 27 Feb 2020 14:59:24 +0100 Subject: [PATCH 06/10] fix codacy warnings --- tests/test_calculate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_calculate.py b/tests/test_calculate.py index 63d59a65..9ffdd14a 100644 --- a/tests/test_calculate.py +++ b/tests/test_calculate.py @@ -147,7 +147,7 @@ def test_calculate_residuals(models): # pylint: disable=W0621 """Test calculate.calculate_residuals.""" for model in models: (measurement_df, observable_df, parameter_df, simulation_df, - expected_residuals, _) = model + expected_residuals, _) = model residual_dfs = calculate_residuals( measurement_df, simulation_df, observable_df, parameter_df) assert set(residual_dfs[0][RESIDUAL]) == pytest.approx( @@ -158,7 +158,7 @@ def test_calculate_non_normalized_residuals(models): # pylint: disable=W0621 """Test calculate.calculate_residuals without normalization.""" for model in models: (measurement_df, observable_df, parameter_df, simulation_df, - _, expected_residuals_nonorm) = model + _, expected_residuals_nonorm) = model residual_dfs = calculate_residuals( measurement_df, simulation_df, observable_df, parameter_df, normalize=False) @@ -170,7 +170,7 @@ def test_calculate_chi2(models): # pylint: disable=W0621 """Test calculate.calculate_chi2.""" for model in models: (measurement_df, observable_df, parameter_df, simulation_df, - expected_residuals, _) = model + expected_residuals, _) = model chi2 = calculate_chi2( measurement_df, simulation_df, observable_df, parameter_df) From b5932f12bb55b9442e20114f54debe8d7c7f8ffc Mon Sep 17 00:00:00 2001 From: yannikschaelte Date: Thu, 27 Feb 2020 17:09:25 +0100 Subject: [PATCH 07/10] allow calculating likelihoods --- petab/C.py | 1 + petab/calculate.py | 104 ++++++++++++++++++++++++++++++--- tests/test_calculate.py | 126 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 211 insertions(+), 20 deletions(-) diff --git a/petab/C.py b/petab/C.py index aecf42e2..49081774 100644 --- a/petab/C.py +++ b/petab/C.py @@ -153,3 +153,4 @@ SIMULATION = 'simulation' RESIDUAL = 'residual' +NOISE_VALUE = 'noiseValue' diff --git a/petab/calculate.py b/petab/calculate.py index ea26e32c..88690fc0 100644 --- a/petab/calculate.py +++ b/petab/calculate.py @@ -77,6 +77,8 @@ def calculate_residuals_for_table( # create residual df as copy of measurement df, change column residual_df = measurement_df.copy(deep=True).rename( columns={MEASUREMENT: RESIDUAL}) + # record noise + residual_df[NOISE_VALUE] = None # matching columns compared_cols = set(MEASUREMENT_DF_COLS) @@ -106,13 +108,15 @@ def calculate_residuals_for_table( # non-normalized residual is just the difference residual = simulation - measurement + noise_value = 1 if normalize: # look up noise standard deviation noise_value = evaluate_noise_formula( row, parameter_df, noise_formulas) - residual /= noise_value + residual /= noise_value - # fill in value + # fill in values + residual_df.loc[irow, NOISE_VALUE] = noise_value residual_df.loc[irow, RESIDUAL] = residual return residual_df @@ -236,10 +240,94 @@ def calculate_llh( simulation_dfs: Union[List[pd.DataFrame], pd.DataFrame], observable_dfs: Union[List[pd.DataFrame], pd.DataFrame], parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame], - normalize: bool = True, - scale: bool = True ) -> float: - residual_dfs = calculate_residuals( - measurement_dfs, simulation_dfs, observable_dfs, parameter_dfs, - normalize, scale) - llhs = [calculate_llh_for_table_from_residuals(residual_df, + # convenience + if isinstance(measurement_dfs, pd.DataFrame): + measurement_dfs = [measurement_dfs] + if isinstance(simulation_dfs, pd.DataFrame): + simulation_dfs = [simulation_dfs] + if isinstance(observable_dfs, pd.DataFrame): + observable_dfs = [observable_dfs] + if isinstance(parameter_dfs, pd.DataFrame): + parameter_dfs = [parameter_dfs] + + # iterate over data frames + llhs = [] + for (measurement_df, simulation_df, observable_df, parameter_df) in zip( + measurement_dfs, simulation_dfs, observable_dfs, parameter_dfs): + _llh = calculate_llh_for_table( + measurement_df, simulation_df, observable_df, parameter_df) + llhs.append(_llh) + llh = sum(llhs) + return llh + + +def calculate_llh_for_table( + measurement_df: pd.DataFrame, + simulation_df: pd.DataFrame, + observable_df: pd.DataFrame, + parameter_df: pd.DataFrame) -> float: + llhs = [] + + # matching columns + compared_cols = set(MEASUREMENT_DF_COLS) + compared_cols -= {MEASUREMENT} + compared_cols &= set(measurement_df.columns) + + # compute noise formulas for observables + noise_formulas = get_symbolic_noise_formulas(observable_df) + + # iterate over measurements, find corresponding simulations + for irow, row in measurement_df.iterrows(): + measurement = row[MEASUREMENT] + + # look up in simulation df + masks = [simulation_df[col] == row[col] for col in compared_cols] + mask = reduce(lambda x, y: x & y, masks) + + simulation = simulation_df.loc[mask][SIMULATION].iloc[0] + + observable = observable_df.loc[row[OBSERVABLE_ID]] + + # get scale + scale = observable.get(OBSERVABLE_TRANSFORMATION, LIN) + + # get noise standard deviation + noise_value = evaluate_noise_formula( + row, parameter_df, noise_formulas) + + # get noise distribution + noise_distribution = observable.get(NOISE_DISTRIBUTION, NORMAL) + + llh = calculate_single_llh( + measurement, simulation, scale, noise_distribution, noise_value) + llhs.append(llh) + llh = sum(llhs) + return llh + + +def calculate_single_llh( + measurement: float, + simulation: float, + scale: str, + noise_distribution: str, + noise_value: float) -> float: + """Calculate a single log likelihood.""" + # short-hand + m, s, sigma = measurement, simulation, noise_value + pi, log, log10 = np.pi, np.log, np.log10 + + # go over the possible cases + if noise_distribution == NORMAL and scale == LIN: + llh = 0.5*log(2*pi*sigma**2) + 0.5*((s-m)/sigma)**2 + elif noise_distribution == NORMAL and scale == LOG: + llh = 0.5*log(2*pi*sigma**2*m**2) + 0.5*((log(s)-log(m))/sigma)**2 + elif noise_distribution == NORMAL and scale == LOG10: + llh = 0.5*log(2*pi*sigma**2*m**2) + 0.5*((log10(s)-log10(m))/sigma)**2 + elif noise_distribution == LAPLACE and scale == LIN: + llh = log(2*sigma) + abs((s-m)/sigma) + elif noise_distribution == LAPLACE and scale == LOG: + llh = log(2*sigma*m) + abs((log(s)-log(m))/sigma) + elif noise_distribution == LAPLACE and scale == LOG10: + llh = log(2*sigma*m) + abs((log10(s)-log10(m))/sigma) + return llh diff --git a/tests/test_calculate.py b/tests/test_calculate.py index 9ffdd14a..3f2d5a01 100644 --- a/tests/test_calculate.py +++ b/tests/test_calculate.py @@ -1,6 +1,7 @@ """Tests related to petab.calculate.""" -from petab import calculate_residuals, calculate_chi2 +from petab import (calculate_residuals, calculate_chi2, calculate_llh, + calculate_single_llh) from petab.C import * import pandas as pd import numpy as np @@ -33,9 +34,12 @@ def model_simple(): expected_residuals = {(2-0)/2, (2-1)/2, (19-20)/3, (20-22)/3} expected_residuals_nonorm = {2-0, 2-1, 19-20, 20-22} + expected_llh = 0.5*(np.array(list(expected_residuals))**2).sum() + \ + 0.5*np.log(2*np.pi*np.array([2, 2, 3, 3])**2).sum() return (measurement_df, observable_df, parameter_df, - simulation_df, expected_residuals, expected_residuals_nonorm) + simulation_df, expected_residuals, expected_residuals_nonorm, + expected_llh) def model_replicates(): @@ -64,9 +68,12 @@ def model_replicates(): expected_residuals = {(2-0)/2, (2-1)/2} expected_residuals_nonorm = {2-0, 2-1} + expected_llh = 0.5*(np.array(list(expected_residuals))**2).sum() + \ + 0.5*np.log(2*np.pi*np.array([2, 2])**2).sum() return (measurement_df, observable_df, parameter_df, - simulation_df, expected_residuals, expected_residuals_nonorm) + simulation_df, expected_residuals, expected_residuals_nonorm, + expected_llh) def model_scalings(): @@ -96,9 +103,12 @@ def model_scalings(): expected_residuals = {(np.log(2)-np.log(0.5))/2, (np.log(3)-np.log(1))/2} expected_residuals_nonorm = {np.log(2)-np.log(0.5), np.log(3)-np.log(1)} + expected_llh = 0.5*(np.array(list(expected_residuals))**2).sum() + \ + 0.5*np.log(2*np.pi*np.array([2, 2])**2*np.array([0.5, 1])**2).sum() return (measurement_df, observable_df, parameter_df, - simulation_df, expected_residuals, expected_residuals_nonorm) + simulation_df, expected_residuals, expected_residuals_nonorm, + expected_llh) def model_non_numeric_overrides(): @@ -131,23 +141,65 @@ def model_non_numeric_overrides(): expected_residuals = {(np.log(2)-np.log(0.5))/(2*7+8+4), (np.log(3)-np.log(1))/(2*2+3+4)} expected_residuals_nonorm = {np.log(2)-np.log(0.5), np.log(3)-np.log(1)} + expected_llh = 0.5*(np.array(list(expected_residuals))**2).sum() + \ + 0.5*np.log(2*np.pi*np.array([2*7+8+4, 2*2+3+4])**2 + * np.array([0.5, 1])**2).sum() return (measurement_df, observable_df, parameter_df, - simulation_df, expected_residuals, expected_residuals_nonorm) + simulation_df, expected_residuals, expected_residuals_nonorm, + expected_llh) + + +def model_custom_likelihood(): + """Model with customized likelihoods.""" + measurement_df = pd.DataFrame(data={ + OBSERVABLE_ID: ['obs_a', 'obs_b'], + SIMULATION_CONDITION_ID: ['c0', 'c0'], + TIME: [5, 10], + MEASUREMENT: [0.5, 2] + }) + + observable_df = pd.DataFrame(data={ + OBSERVABLE_ID: ['obs_a', 'obs_b'], + OBSERVABLE_FORMULA: ['A', 'B'], + OBSERVABLE_TRANSFORMATION: [LOG, LIN], + NOISE_FORMULA: [2, 1.5], + NOISE_DISTRIBUTION: [LAPLACE, LAPLACE] + }).set_index([OBSERVABLE_ID]) + + parameter_df = pd.DataFrame(data={ + PARAMETER_ID: ['par1', 'par2'], + NOMINAL_VALUE: [3, 4] + }).set_index([PARAMETER_ID]) + + simulation_df = measurement_df.copy(deep=True).rename( + columns={MEASUREMENT: SIMULATION}) + simulation_df[SIMULATION] = [2, 3] + + expected_residuals = {(np.log(2)-np.log(0.5))/2, (3-2)/1.5} + expected_residuals_nonorm = {np.log(2)-np.log(0.5), 3-2} + expected_llh = np.abs(list(expected_residuals)).sum() + \ + np.log(2*np.array([2, 1.5])*np.array([0.5, 1])).sum() + + return (measurement_df, observable_df, parameter_df, + simulation_df, expected_residuals, expected_residuals_nonorm, + expected_llh) @pytest.fixture def models(): """Test model collection covering different features.""" return [model_simple(), model_replicates(), - model_scalings(), model_non_numeric_overrides()] + model_scalings(), model_non_numeric_overrides(), + model_custom_likelihood()] def test_calculate_residuals(models): # pylint: disable=W0621 """Test calculate.calculate_residuals.""" - for model in models: + for i_model, model in enumerate(models): + print(f"Model {i_model}") (measurement_df, observable_df, parameter_df, simulation_df, - expected_residuals, _) = model + expected_residuals, _, _) = model residual_dfs = calculate_residuals( measurement_df, simulation_df, observable_df, parameter_df) assert set(residual_dfs[0][RESIDUAL]) == pytest.approx( @@ -156,9 +208,10 @@ def test_calculate_residuals(models): # pylint: disable=W0621 def test_calculate_non_normalized_residuals(models): # pylint: disable=W0621 """Test calculate.calculate_residuals without normalization.""" - for model in models: + for i_model, model in enumerate(models): + print(f"Model {i_model}") (measurement_df, observable_df, parameter_df, simulation_df, - _, expected_residuals_nonorm) = model + _, expected_residuals_nonorm, _) = model residual_dfs = calculate_residuals( measurement_df, simulation_df, observable_df, parameter_df, normalize=False) @@ -168,11 +221,60 @@ def test_calculate_non_normalized_residuals(models): # pylint: disable=W0621 def test_calculate_chi2(models): # pylint: disable=W0621 """Test calculate.calculate_chi2.""" - for model in models: + for i_model, model in enumerate(models): + print(f"Model {i_model}") (measurement_df, observable_df, parameter_df, simulation_df, - expected_residuals, _) = model + expected_residuals, _, _) = model chi2 = calculate_chi2( measurement_df, simulation_df, observable_df, parameter_df) expected = sum(np.array(list(expected_residuals))**2) assert chi2 == pytest.approx(expected) + + +def test_calculate_llh(models): # pylint: disable=W0621 + """Test calculate.calculate_llh.""" + for i_model, model in enumerate(models): + print(f"Model {i_model}") + (measurement_df, observable_df, parameter_df, simulation_df, + _, _, expected_llh) = model + llh = calculate_llh( + measurement_df, simulation_df, observable_df, parameter_df) + assert llh == pytest.approx(expected_llh) or expected_llh is None + + +def test_calculate_single_llh(): + m, s, sigma = 5.3, 4.5, 1.6 + pi, log, log10 = np.pi, np.log, np.log10 + + llh = calculate_single_llh(measurement=m, simulation=s, noise_value=sigma, + noise_distribution=NORMAL, scale=LIN) + expected_llh = 0.5 * (((s-m)/sigma)**2 + log(2*pi*sigma**2)) + assert llh == pytest.approx(expected_llh) + + llh = calculate_single_llh(measurement=m, simulation=s, noise_value=sigma, + noise_distribution=NORMAL, scale=LOG) + expected_llh = 0.5 * (((log(s)-log(m))/sigma)**2 + + log(2*pi*sigma**2*m**2)) + assert llh == pytest.approx(expected_llh) + + llh = calculate_single_llh(measurement=m, simulation=s, noise_value=sigma, + noise_distribution=NORMAL, scale=LOG10) + expected_llh = 0.5 * (((log10(s)-log10(m))/sigma)**2 + + log(2*pi*sigma**2*m**2)) + assert llh == pytest.approx(expected_llh) + + llh = calculate_single_llh(measurement=m, simulation=s, noise_value=sigma, + noise_distribution=LAPLACE, scale=LIN) + expected_llh = abs((s-m)/sigma) + log(2*sigma) + assert llh == pytest.approx(expected_llh) + + llh = calculate_single_llh(measurement=m, simulation=s, noise_value=sigma, + noise_distribution=LAPLACE, scale=LOG) + expected_llh = abs((log(s)-log(m))/sigma) + log(2*sigma*m) + assert llh == pytest.approx(expected_llh) + + llh = calculate_single_llh(measurement=m, simulation=s, noise_value=sigma, + noise_distribution=LAPLACE, scale=LOG10) + expected_llh = abs((log10(s)-log10(m))/sigma) + log(2*sigma*m) + assert llh == pytest.approx(expected_llh) From ebde39fddbfa9063529c0139a7ef0647ece4035e Mon Sep 17 00:00:00 2001 From: yannikschaelte Date: Thu, 27 Feb 2020 17:13:35 +0100 Subject: [PATCH 08/10] fixup --- tests/test_calculate.py | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/tests/test_calculate.py b/tests/test_calculate.py index e397ed06..3f2d5a01 100644 --- a/tests/test_calculate.py +++ b/tests/test_calculate.py @@ -1,11 +1,7 @@ """Tests related to petab.calculate.""" -<<<<<<< HEAD from petab import (calculate_residuals, calculate_chi2, calculate_llh, calculate_single_llh) -======= -from petab import calculate_residuals, calculate_chi2 ->>>>>>> origin/develop from petab.C import * import pandas as pd import numpy as np @@ -38,7 +34,6 @@ def model_simple(): expected_residuals = {(2-0)/2, (2-1)/2, (19-20)/3, (20-22)/3} expected_residuals_nonorm = {2-0, 2-1, 19-20, 20-22} -<<<<<<< HEAD expected_llh = 0.5*(np.array(list(expected_residuals))**2).sum() + \ 0.5*np.log(2*np.pi*np.array([2, 2, 3, 3])**2).sum() @@ -47,13 +42,6 @@ def model_simple(): expected_llh) -======= - - return (measurement_df, observable_df, parameter_df, - simulation_df, expected_residuals, expected_residuals_nonorm) - - ->>>>>>> origin/develop def model_replicates(): """Model with replicates.""" measurement_df = pd.DataFrame(data={ @@ -80,18 +68,12 @@ def model_replicates(): expected_residuals = {(2-0)/2, (2-1)/2} expected_residuals_nonorm = {2-0, 2-1} -<<<<<<< HEAD expected_llh = 0.5*(np.array(list(expected_residuals))**2).sum() + \ 0.5*np.log(2*np.pi*np.array([2, 2])**2).sum() return (measurement_df, observable_df, parameter_df, simulation_df, expected_residuals, expected_residuals_nonorm, expected_llh) -======= - - return (measurement_df, observable_df, parameter_df, - simulation_df, expected_residuals, expected_residuals_nonorm) ->>>>>>> origin/develop def model_scalings(): @@ -121,18 +103,12 @@ def model_scalings(): expected_residuals = {(np.log(2)-np.log(0.5))/2, (np.log(3)-np.log(1))/2} expected_residuals_nonorm = {np.log(2)-np.log(0.5), np.log(3)-np.log(1)} -<<<<<<< HEAD expected_llh = 0.5*(np.array(list(expected_residuals))**2).sum() + \ 0.5*np.log(2*np.pi*np.array([2, 2])**2*np.array([0.5, 1])**2).sum() return (measurement_df, observable_df, parameter_df, simulation_df, expected_residuals, expected_residuals_nonorm, expected_llh) -======= - - return (measurement_df, observable_df, parameter_df, - simulation_df, expected_residuals, expected_residuals_nonorm) ->>>>>>> origin/develop def model_non_numeric_overrides(): From 146ccf060c050cd51e13e1d331c10ea587ab207c Mon Sep 17 00:00:00 2001 From: yannikschaelte Date: Thu, 27 Feb 2020 17:18:26 +0100 Subject: [PATCH 09/10] add documentation --- petab/calculate.py | 31 ++++++++++++++++++++++++++++++- tests/test_calculate.py | 1 + 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/petab/calculate.py b/petab/calculate.py index fe025053..11058455 100644 --- a/petab/calculate.py +++ b/petab/calculate.py @@ -238,6 +238,21 @@ def calculate_llh( observable_dfs: Union[List[pd.DataFrame], pd.DataFrame], parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame], ) -> float: + """Calculate total log likelihood. + + Arguments: + measurement_dfs: + The problem measurement tables. + simulation_dfs: + Simulation tables corresponding to the measurement tables. + observable_dfs: + The problem observable tables. + parameter_dfs: + The problem parameter tables. + + Returns: + llh: The log-likelihood. + """ # convenience if isinstance(measurement_dfs, pd.DataFrame): measurement_dfs = [measurement_dfs] @@ -264,6 +279,8 @@ def calculate_llh_for_table( simulation_df: pd.DataFrame, observable_df: pd.DataFrame, parameter_df: pd.DataFrame) -> float: + """Calculate log-likelihood for one set of tables. For the arguments, see + `calculate_llh`.""" llhs = [] # matching columns @@ -309,7 +326,19 @@ def calculate_single_llh( scale: str, noise_distribution: str, noise_value: float) -> float: - """Calculate a single log likelihood.""" + """Calculate a single log likelihood. + + Arguments: + measurement: The measurement value. + simulation: The simulated value. + scale: The scale on which the noise model is to be applied. + noise_distribution: The noise distribution. + noise_value: The considered noise models possess a single noise + parameter, e.g. the normal standard deviation. + + Returns: + llh: The computed likelihood for the given values. + """ # short-hand m, s, sigma = measurement, simulation, noise_value pi, log, log10 = np.pi, np.log, np.log10 diff --git a/tests/test_calculate.py b/tests/test_calculate.py index 3f2d5a01..d132feba 100644 --- a/tests/test_calculate.py +++ b/tests/test_calculate.py @@ -244,6 +244,7 @@ def test_calculate_llh(models): # pylint: disable=W0621 def test_calculate_single_llh(): + """Test calculate.calculate_single_llh.""" m, s, sigma = 5.3, 4.5, 1.6 pi, log, log10 = np.pi, np.log, np.log10 From 43b356e3aea251d42b0407f3ded517cf116a49c0 Mon Sep 17 00:00:00 2001 From: yannikschaelte Date: Thu, 27 Feb 2020 17:19:05 +0100 Subject: [PATCH 10/10] fix codacy --- petab/calculate.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/petab/calculate.py b/petab/calculate.py index 11058455..e03955f4 100644 --- a/petab/calculate.py +++ b/petab/calculate.py @@ -233,10 +233,10 @@ def calculate_chi2_for_table_from_residuals( def calculate_llh( - measurement_dfs: Union[List[pd.DataFrame], pd.DataFrame], - simulation_dfs: Union[List[pd.DataFrame], pd.DataFrame], - observable_dfs: Union[List[pd.DataFrame], pd.DataFrame], - parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame], + measurement_dfs: Union[List[pd.DataFrame], pd.DataFrame], + simulation_dfs: Union[List[pd.DataFrame], pd.DataFrame], + observable_dfs: Union[List[pd.DataFrame], pd.DataFrame], + parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame], ) -> float: """Calculate total log likelihood.