# Infer Pooled Model Parameters from Individuals in Lung Cancer Control Group

## Show control group data

In [1]:
import os

import pandas as pd

import erlotinib as erlo


# Get data
data = erlo.DataLibrary().lung_cancer_control_group()

# Create scatter plot
fig = erlo.plots.PDTimeSeriesPlot()
fig.add_data(data, id_key='#ID', time_key='TIME in day', biom_key='TUMOUR VOLUME in cm^3')
fig.set_axis_labels(xlabel=r'$\text{Time in day}$', ylabel=r'$\text{Tumour volume in cm}^3$')

# Show figure
fig.show()

**Figure 1:** To be completed

## Find MAP estimates for pooled model parameters

In [2]:
import numpy as np
import pints


# Define model
path = erlo.ModelLibrary().tumour_growth_inhibition_model_koch()
model = erlo.PharmacodynamicModel(path)

# Find maximum a posteriori probability estimates (MAP)
log_likelihoods = []
ids = data['#ID'].unique()
for individual in ids:
    # Get data for individual
    mask = data['#ID'] == individual
    times = data[mask]['TIME in day'].to_numpy()
    observed_volumes = data[mask]['TUMOUR VOLUME in cm^3'].to_numpy()

    # Create likelihoods for each individuals
    problem = erlo.InverseProblem(model, times, observed_volumes)
    log_likelihoods.append(
        pints.ConstantAndMultiplicativeGaussianLogLikelihood(problem))

# Create pooled posterior
log_likelihood = pints.PooledLogPDF(log_pdfs=log_likelihoods, pooled=[True]*8)
log_prior_tumour_volume = pints.HalfCauchyLogPrior(location=0, scale=3)
log_prior_drug_conc = pints.UniformLogPrior(-1E-3, 1E-3)  # Fixed to zero below
log_prior_kappa = pints.UniformLogPrior(-1E-3, 1E-3)  # Fixed to zero below
log_prior_lambda_0 = pints.HalfCauchyLogPrior(location=0, scale=3)
log_prior_lambda_1 = pints.HalfCauchyLogPrior(location=0, scale=3)
log_prior_sigma_base = pints.HalfCauchyLogPrior(location=0, scale=3)
log_prior_eta = pints.UniformLogPrior(1E-3, 1E3)  # Fixed to 1 below
log_prior_sigma_rel = pints.HalfCauchyLogPrior(location=0, scale=3)
log_prior = pints.ComposedLogPrior(
    log_prior_tumour_volume, 
    log_prior_drug_conc,
    log_prior_kappa,
    log_prior_lambda_0,
    log_prior_lambda_1,
    log_prior_sigma_base,
    log_prior_eta,
    log_prior_sigma_rel)
log_posterior = pints.LogPosterior(log_likelihood, log_prior)

# Set up optmisation controller
optimiser = erlo.OptimisationController(log_posterior)

# Fix potency kappa, and drug concentration to 0, as well as eta to 1
# (This is the control group, and thus there is no drug)
optimiser.fix_parameters(
    mask=[False, True, True, False, False, False,True, False],
    values=[0, 0, 1])

# Run optimisation
result = optimiser.run(log_to_screen=True)

# Label ID as pooled
result['ID'] = 'Pooled'

HBox(children=(FloatProgress(value=0.0, max=8.0), HTML(value='')))


Maximising LogPDF
Using Covariance Matrix Adaptation Evolution Strategy (CMA-ES)
Running in parallel with 8 worker processes.
Population size: 8
Iter. Eval. Best      Time m:s
0     8     -635.4527   0:00.1
1     16    -630.515    0:00.1
2     24    -624.1829   0:00.1
3     32    -615.4756   0:00.1
20    168   -304.3325   0:00.4
40    328   -244.9372   0:00.6
60    488   -162.9111   0:00.9
80    648   -157.8646   0:01.1
100   808   -154.9664   0:01.4
120   968   -116.4934   0:01.6
140   1128  -88.52092   0:01.9
160   1288  -87.64113   0:02.2
180   1448  -86.4995    0:02.4
200   1608  -85.77796   0:02.7
220   1768  -85.15311   0:03.0
240   1928  -74.5623    0:03.3
260   2088  -44.01553   0:03.5
280   2248  -27.70515   0:03.8
300   2408  -25.22678   0:04.1
320   2568  -25.03119   0:04.4
340   2728  -24.97868   0:04.6
360   2888  -24.97616   0:04.9
380   3048  -24.97539   0:05.2
400   3208  -24.97321   0:05.6
420   3368  -24.97245   0:05.9
440   3528  -24.97238   0:06.3
460   3688  -24.9

### Visualise optimisation results

In [5]:
fig = erlo.plots.ParameterEstimatePlot()
fig.add_data(result)

fig.show()

Figure 2:

## Find posterior probability distribution

In [7]:
import numpy as np
import pints
from tqdm.notebook import tqdm


# Define model
path = erlo.ModelLibrary().tumour_growth_inhibition_model_koch()
model = erlo.PharmacodynamicModel(path)

# Find posterior probability distributions
log_likelihoods = []
ids = data['#ID'].unique()
for individual in ids:
    # Get data for individual
    mask = data['#ID'] == individual
    times = data[mask]['TIME in day'].to_numpy()
    observed_volumes = data[mask]['TUMOUR VOLUME in cm^3'].to_numpy()

    # Create likelihoods for each individuals
    problem = erlo.InverseProblem(model, times, observed_volumes)
    log_likelihoods.append(
        pints.ConstantAndMultiplicativeGaussianLogLikelihood(problem))

# Create pooled posterior
log_likelihood = pints.PooledLogPDF(log_pdfs=log_likelihoods, pooled=[True]*8)
log_prior_tumour_volume = pints.HalfCauchyLogPrior(location=0, scale=3)
log_prior_drug_conc = pints.UniformLogPrior(-1E-3, 1E-3)  # Fixed to zero below
log_prior_kappa = pints.UniformLogPrior(-1E-3, 1E-3)  # Fixed to zero below
log_prior_lambda_0 = pints.HalfCauchyLogPrior(location=0, scale=3)
log_prior_lambda_1 = pints.HalfCauchyLogPrior(location=0, scale=3)
log_prior_sigma_base = pints.HalfCauchyLogPrior(location=0, scale=3)
log_prior_eta = pints.UniformLogPrior(1E-3, 1E3)  # Fixed to 1 below
log_prior_sigma_rel = pints.HalfCauchyLogPrior(location=0, scale=3)
log_prior = pints.ComposedLogPrior(
    log_prior_tumour_volume, 
    log_prior_drug_conc,
    log_prior_kappa,
    log_prior_lambda_0,
    log_prior_lambda_1,
    log_prior_sigma_base,
    log_prior_eta,
    log_prior_sigma_rel)
log_posterior = pints.LogPosterior(log_likelihood, log_prior)

# Set up optmisation controller
sampler = erlo.SamplingController(log_posterior)

# Fix potency kappa, and drug concentration to 0, as well as eta to 1
# (This is the control group, and thus there is no drug)
sampler.fix_parameters(
    mask=[False, True, True, False, False, False,True, False],
    values=[0, 0, 1])

# Run sampler
result = sampler.run(n_iterations=10000)

# Label samples as pooled
result['ID'] = 'Pooled'

Using Haario adaptive covariance MCMC
Generating 10 chains.
Running in parallel with 8 worker processess.
Iter. Eval. Accept.   Accept.   Accept.   Accept.   Accept.   Accept.   Accept.   Accept.   Accept.   Accept.   Time m:s
0     10     0         0         0         0         0         0         0         0         0         0          0:00.1
1     20     0.5       0         0         0         0         0         0         0         0.5       0          0:00.1
2     30     0.667     0.333     0         0.333     0         0         0         0.333     0.667     0.333      0:00.1
3     40     0.75      0.25      0.25      0.25      0         0         0.25      0.5       0.75      0.25       0:00.1
Initial phase completed.
500   5010   0.247505  0.0958    0.311     0.18      0.305     0.325     0.369     0.313     0.329     0.22       0:08.9
1000  10010  0.22      0.234     0.226     0.198     0.23      0.249     0.23      0.272     0.286     0.139      0:16.6
1500  15010  0.211    

In [9]:
result['ID'] = 1

In [10]:
fig = erlo.plots.MarginalPosteriorPlot()
fig.add_data(data=result, warm_up_iter=8000)

fig.show()