# Infer Model Parameters for Individuals in Lung Cancer Treatment Group with Medium Erlotinib Dose

In [1]:
import erlotinib as erlo


# Get data
data = erlo.DataLibrary().lung_cancer_medium_erlotinib_dose_group()

# Create scatter plot
fig = erlo.plots.PKTimeSeriesPlot()
fig.add_data(data, biomarker='Plasma concentration')
fig.set_axis_labels(
    time_label=r'$\text{Time in day}$', biom_label=r'$\text{Plasma concentration in ng/mL}$', 
    dose_label=r'$\text{Dose in mg}$')

# Show figure
fig.show()

**Figure 1:** Visualisation of the measured erlotinib pharmacokinetic data in 8 mice with patient-derived lung cancer implants. The bottom subplot shows the measured plasma concentration time series. The top subplot displays the administered oral doses of erlotinib for each mouse.

## Build prior predictive model

In [2]:
import numpy as np
import pints

# Define predictive model
path = erlo.ModelLibrary().one_compartment_pk_model()
mechanistic_model = erlo.PharmacokineticModel(path)
mechanistic_model.set_administration(compartment='central', direct=False)
mechanistic_model.set_parameter_names(names={
    'central.drug_amount': 'Initial plasma drug amount in mg',
    'dose.drug_amount': 'Initial dose comp. drug amount in mg',
    'central.size': 'Volume of distribution in L',
    'dose.absorption_rate': 'Absorption rate in 1/d',
    'myokit.elimination_rate': 'Elimination rate in 1/d'})
error_models = [erlo.ConstantAndMultiplicativeGaussianErrorModel()]
predictive_model = erlo.PredictiveModel(mechanistic_model, error_models)
predictive_model.fix_parameters(name_value_dict={
    'Initial plasma drug amount in mg': 0,
    'Initial dose comp. drug amount in mg': 0})

# Define dosing regimen
predictive_model.set_dosing_regimen(dose=0.7, start=3, period=1, num=14)

# Define prior distribution
log_prior_volume = pints.TruncatedGaussianLogPrior(mean=1, sd=1, a=0, b=np.inf)
log_prior_absorption_rate = pints.TruncatedGaussianLogPrior(mean=55, sd=30, a=0, b=np.inf)
log_prior_elimination_rate = pints.TruncatedGaussianLogPrior(mean=10, sd=10, a=0, b=np.inf)
log_prior_sigma_base = pints.TruncatedGaussianLogPrior(mean=0.1, sd=0.1, a=0, b=np.inf)
log_prior_sigma_rel = pints.TruncatedGaussianLogPrior(mean=0.1, sd=0.3, a=0, b=np.inf)
log_prior = pints.ComposedLogPrior(
    log_prior_volume, log_prior_absorption_rate, log_prior_elimination_rate, 
    log_prior_sigma_base, log_prior_sigma_rel)

# Define prior predictive model and sample 1000 virtual patients
model = erlo.PriorPredictiveModel(predictive_model, log_prior)

### Visualise prior predictive model

In [3]:
import numpy as np

seed = 42
n_samples = 1000
times = np.linspace(0, 30, num=1000)
samples = model.sample(times, n_samples, seed)

# Visualise prior predictive model
fig = erlo.plots.PDPredictivePlot()
fig.add_prediction(data=samples, bulk_probs=[0.3, 0.6, 0.9])
fig.set_axis_labels(xlabel=r'$\text{Time in day}$', ylabel=r'$\text{Plasma conc. in ng/mL}$')
fig.show()

**Figure 2:** Prior predictive model for the plasma concentration over time.

## Find MAP estimates for model parameters

In [4]:
# TODO: Temporary mask data manually
mask = data['Biomarker'] != 'Body weight'
data = data[mask]
mask = data['Biomarker'] != 'Tumour volume'
data = data[mask]

# Create problem
problem = erlo.ProblemModellingController(data, biom_keys=['Measurement'])
problem.set_mechanistic_model(mechanistic_model)
problem.set_error_model(
    error_models=[pints.ConstantAndMultiplicativeGaussianLogLikelihood])
problem.fix_parameters(name_value_dict=dict({
    'Initial plasma drug amount in mg': 0,
    'Initial dose comp. drug amount in mg': 0,
    'Noise param 2': 1}))
population_models = [erlo.PooledModel] * 5
problem.set_population_model(population_models)

# Create posterior
problem.set_log_prior(log_priors=[
    log_prior_volume, 
    log_prior_absorption_rate,
    log_prior_elimination_rate,
    log_prior_sigma_base,
    log_prior_sigma_rel])
log_posteriors = problem.get_log_posteriors()

# Find maximum a posteriori probability estimates (MAP)
opt = erlo.OptimisationController(log_posteriors)
opt.set_transform(transform=pints.LogTransformation(n_parameters=5))
map_estimates = opt.run(show_run_progress_bar=True)

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




### Visualise optimisation results

In [5]:
fig = erlo.plots.ParameterEstimatePlot()
fig.add_data(map_estimates)

fig.show()

**Figure 3:** Maximum a posteriori (MAP) estimates of the model parameters. The y axis displays the estimated parameter value, and the x axis the corresponding individual.

## Find posterior probability distribution

In [6]:
# Set up sampling controller
sampler = erlo.SamplingController(log_posteriors)
sampler.set_initial_parameters(data=map_estimates)
sampler.set_transform(transform=pints.LogTransformation(n_parameters=5))

# Run sampling
posterior_samples = sampler.run(n_iterations=4000, show_progress_bar=True)

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




### Visualise marginal posteriors

In [7]:
fig = erlo.plots.MarginalPosteriorPlot()
fig.add_data(data=posterior_samples, warm_up_iter=2000)

fig.show()

**Figure 4:** Marginal posterior distributions of model parameters. The y axis displays the sampled parameter value, and the x axis the binned number of samples for each individual.

## Posterior predictive check

In [8]:
# Sample from posterior predictive model
times = np.linspace(0, 30, num=1000)
n_samples = 1000
model = erlo.PosteriorPredictiveModel(
    predictive_model, posterior_samples,param_map={'Sigma base': 'Noise param 1', 'Sigma rel.': 'Noise param 3'})
samples = model.sample(times, n_samples)

# Visualise posterior predictive model
fig = erlo.plots.PDPredictivePlot()
fig.add_prediction(data=samples)
mask = data['Biomarker'] ==  'Plasma concentration'
data = data[mask]
fig.add_data(data)
fig.set_axis_labels(xlabel=r'$\text{Time in day}$', ylabel=r'$\text{Plasma concentration in ng/mL}$')
fig.show()

**Figure 5:** Posterior predictive model of future measurements of a virtual mouse with a daily dose of 0.7 mg of erlotinib. The shaded area illustrates the approximate 90% bulk probability of the posterior predictive model constructed from 1000 samples from the posterior distribution and subsequent virtual "measurements" for each of 100 equidistant time points. The circles represent the measurements of the mice that were used to infer the posterior distribution.