# Infer Population Model Parameters from Individuals in Lung Cancer Control Group

In [15]:
import os

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pints
from scipy.optimize import minimize, basinhopping
import xarray as xr

import erlotinib as erlo

## Show control group data

In [2]:
# Get data
data = erlo.DataLibrary().lung_cancer_control_group()

# Create scatter plot
fig = erlo.plots.PDTimeSeriesPlot()
fig.add_data(data, biomarker='Tumour volume')
fig.set_axis_labels(xlabel=r'$\text{Time in day}$', ylabel=r'$\text{Tumour volume in cm}^3$')

# Show figure
fig.show()

**Figure 1:** Visualisation of the measured tumour growth in 8 mice with patient-derived lung cancer implants.

## Build model

In [3]:
# Define mechanistic model
path = erlo.ModelLibrary().tumour_growth_inhibition_model_koch_reparametrised()
mechanistic_model = erlo.PharmacodynamicModel(path)
mechanistic_model.set_parameter_names(names={
    'myokit.tumour_volume': 'Tumour volume in cm^3',
    'myokit.critical_volume': 'Critical volume in cm^3',
    'myokit.drug_concentration': 'Drug concentration in mg/L',
    'myokit.kappa': 'Potency in L/mg/day',
    'myokit.lambda': 'Exponential growth rate in 1/day'})
mechanistic_model.set_output_names({
    'myokit.tumour_volume': 'Tumour volume'})

# Define error model
error_model = erlo.ConstantAndMultiplicativeGaussianErrorModel()

# Define population model
population_model = [
    erlo.LogNormalModel(),  # Initial tumour volume
    erlo.LogNormalModel(),  # Critical tumour volume
    erlo.LogNormalModel(),  # Tumour growth rate
    erlo.PooledModel(),     # Base noise
    erlo.PooledModel()]     # Relative noise

# Build model
problem = erlo.ProblemModellingController(
    mechanistic_model, error_model)
problem.fix_parameters({
    'Drug concentration in mg/L': 0,
    'Potency in L/mg/day': 0})
problem.set_population_model(population_model)

## Prior predictive checks

### Population model

In [4]:
# Define prior distribution
log_priors = [
    pints.TruncatedGaussianLogPrior(mean=0.1, sd=1, a=0, b=np.inf),      # Mean Initial tumour volume
    pints.TruncatedGaussianLogPrior(mean=1, sd=1, a=0, b=np.inf),        # Std. Initial tumour volume
    pints.TruncatedGaussianLogPrior(mean=1, sd=1, a=0, b=np.inf),        # Mean Critical tumour volume
    pints.TruncatedGaussianLogPrior(mean=1, sd=1, a=0, b=np.inf),        # Std. Critical tumour volume
    pints.TruncatedGaussianLogPrior(mean=0.1, sd=1, a=0, b=np.inf),      # Mean Growth rate
    pints.TruncatedGaussianLogPrior(mean=1, sd=1, a=0, b=np.inf),        # Std. Growth rate
    pints.TruncatedGaussianLogPrior(mean=0.1, sd=1, a=0, b=np.inf),      # Pooled Sigma base
    pints.TruncatedGaussianLogPrior(mean=0.1, sd=0.1, a=0, b=np.inf)]    # Pooled Sigma rel.
log_prior = pints.ComposedLogPrior(*log_priors)

# Define prior predictive model
predictive_model = problem.get_predictive_model()
model = erlo.PriorPredictiveModel(predictive_model, log_prior)

# Sample from prior predictive model
seed = 42
n_samples = 100
times = np.linspace(0, 30)
samples = model.sample(times, n_samples, seed)

# Visualise prior predictive model
fig = erlo.plots.PDPredictivePlot()
fig.add_prediction(data=samples, bulk_probs=[0.3, 0.6, 0.9])
fig.set_axis_labels(xlabel=r'$\text{Time in day}$', ylabel=r'$\text{Tumour volume in cm}^3$')
fig.show()

**Figure 3:** Approximate prior predictive model for the tumour growth in a population over time. The shaded areas indicate the 30%, 60% and 90% bulk of the prior predictive model (from dark to light). The prior predictive model was approximated by sampling 1000 parameters from the prior distribution, and subsequent sampling of 50 equidistant time points from the predictive model for each parameter set.

## Find maximum a posteriori estimates

In [14]:
# # Define log-posterior
# problem.set_data(data)
# problem.set_log_prior(log_priors)
# log_posterior = problem.get_log_posterior()

def fun(log_parameters):
    score, sens = log_posterior.evaluateS1(np.exp(log_parameters))
    return (-score, -sens)

# Run optimisation
initial_parameters = np.log(erlo.InferenceController(log_posterior)._initial_params[0, 0])
print(fun(initial_parameters))
result = minimize(fun=fun, x0=initial_parameters, method='L-BFGS-B', jac=True)
result

(887.5806872798586, array([ 1.10779537e+01,  1.75824844e+01,  8.02920549e+00,  2.81290227e+00,
        1.15025146e+01,  1.79616668e+01,  3.42980396e+00,  1.19900097e+01,
        7.46772257e-01, -4.12736381e-01,  1.21100021e+01,  1.20170349e+01,
        2.08297885e+01,  1.80512361e+01,  2.13357956e+01,  1.97133238e+01,
        2.15448767e+01,  2.79472840e+01,  3.51394593e+00, -1.41680989e+01,
        9.45216450e+01,  1.86476172e+02,  4.28948687e+01,  7.05305411e+00,
        1.24961655e+02,  1.22000710e+02,  1.14210884e+01,  9.52245172e+01,
        3.90117604e+00, -4.26422454e-01, -1.91524464e+02, -5.16237998e+03]))


      fun: 163.89696944574575
 hess_inv: <32x32 LbfgsInvHessProduct with dtype=float64>
      jac: array([-5.25444300e+01, -5.80549950e+01, -5.85590456e+00,  3.24996353e+00,
       -2.62607122e+00,  1.30637098e+01,  5.21588886e+00,  1.05863355e+01,
        4.32115758e+00, -1.25332831e+00,  2.08808022e+00,  2.24018404e+00,
       -1.93948127e+00,  3.31989939e+01,  2.03819134e+00,  2.34373700e+00,
        3.16616258e+01,  1.26029078e+00,  3.73479070e+00, -6.62961275e+00,
        6.03173639e+02, -1.26040987e+01, -6.36286173e+01,  5.08148421e+00,
        1.01991950e+03,  2.63453428e+01,  1.01768921e+01,  1.33274962e+03,
        2.94539377e+01,  6.27392797e-01, -5.41082443e+01, -8.75550765e+01])
  message: 'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 75
      nit: 15
     njev: 75
   status: 0
  success: True
        x: array([-0.48919098, -1.80529658, -0.55062115,  0.63018652, -0.28573835,
       -0.62918872,  0.34731744, -1.35056125,  0.13015478,  0.43236743,
       -0.032

In [72]:
np.exp(result.x) # 408.5831950704941

array([2.36046564, 0.14658533, 1.42182502, 0.94663521, 1.23525966,
       0.77720641, 1.5563648 , 0.04347341, 1.14594007, 2.19940199,
       0.1919755 , 0.73862421, 0.81298792, 0.3476019 , 1.09445962,
       0.40894549, 6.52755776, 0.09462294, 1.1653343 , 1.52386334,
       2.05931476, 0.14609553, 0.60407322, 1.96516177, 0.5932555 ,
       5.46441817, 0.52333384, 1.47920264, 1.97812413, 2.59163708,
       0.1003342 , 1.24500182])

In [74]:
np.exp(result.x) # 406.1479936996002

array([0.09446702, 0.18655014, 1.26181401, 0.2417204 , 1.74485398,
       0.25178457, 0.89041414, 1.77852053, 0.99345989, 1.94978347,
       0.23178191, 0.16075266, 0.73520212, 0.589075  , 0.4448781 ,
       1.05686917, 1.11843366, 0.20917684, 0.77909957, 0.93673714,
       0.87651453, 0.29139583, 0.35724763, 0.87188396, 2.07263218,
       3.77028637, 0.79882506, 1.40863071, 1.57216136, 1.74930404,
       1.24712023, 1.21970178])

In [76]:
np.exp(result.x)  # 219.54113709105013

array([0.28662575, 0.55805704, 0.44599993, 3.88162178, 1.45028662,
       0.96866915, 0.45767183, 0.34859964, 1.30822732, 1.6135591 ,
       3.47809987, 2.51005898, 2.29505125, 3.64998648, 1.81412804,
       1.27459161, 2.47101995, 1.759051  , 2.07950533, 1.11994698,
       0.06311923, 0.40863893, 0.04879505, 0.04811052, 0.01846206,
       0.0397989 , 0.00982949, 0.0149758 , 0.1655798 , 0.39161306,
       0.60035052, 0.26296883])

In [11]:
np.exp(result.x)  # 36.90877472832281

array([4.52380309e-02, 5.62316819e-02, 2.78592268e-02, 1.16755830e-01,
       2.87420884e-02, 3.92802527e-02, 3.76033488e-01, 1.93734719e-02,
       2.81535038e-01, 1.53735261e+00, 2.25032257e-01, 2.24560726e-01,
       1.27968825e-01, 2.84840127e-03, 5.23956953e-02, 4.01977985e-01,
       6.34591303e-01, 7.37799571e-03, 5.66239153e-01, 2.50295820e+00,
       8.72779075e-01, 2.82457289e-01, 9.24643985e-01, 5.51886601e+00,
       7.60759740e-01, 3.55138960e-01, 2.73160152e-01, 1.34323039e+00,
       1.05834342e+00, 1.18811488e+00, 4.86214413e-02, 5.52587918e-01])

Running 3 times produces three vastly different results!

In [29]:
# Run optimisation
initial_parameters = np.log(erlo.InferenceController(log_posterior)._initial_params[0, 0])
minimizer_kwargs = {"method":"L-BFGS-B", "jac":True}
result = basinhopping(
    func=fun, x0=initial_parameters, minimizer_kwargs=minimizer_kwargs, niter=10000)
result


An error occured while solving the mechanistic model: 
A numerical error occurred during simulation at t = 0.
Last reached state: 
  myokit.tumour_volume =  7.85715458083556190e+43
Inputs for binding:
  time        = 0.0
  pace        = 0.0
  realtime    = 0.0
  evaluations = 227.0
Function CVode() failed with flag -4 CV_CONV_FAILURE: Convergence test failures occurred too many times during one internal time step or minimum step size was reached..
 A score of -infinity is returned.


An error occured while solving the mechanistic model: 
A numerical error occurred during simulation at t = 0.
Last reached state: 
  myokit.tumour_volume =  1.90727848515339273e+32
Inputs for binding:
  time        = 0.0
  pace        = 0.0
  realtime    = 0.0
  evaluations = 227.0
Function CVode() failed with flag -4 CV_CONV_FAILURE: Convergence test failures occurred too many times during one internal time step or minimum step size was reached..
 A score of -infinity is returned.


overflow encountered 

Exception: Maximum number of zero-length steps taken at t=0.420697

In [24]:
np.exp(result.x)  # -98.49277693232358

array([0.12355877, 0.09482676, 0.20195665, 0.13704376, 0.16121391,
       0.14450629, 0.11772658, 0.08777106, 0.48000457, 1.69061861,
       1.13684717, 0.84009621, 0.44005131, 0.77529497, 1.27877059,
       0.505204  , 0.52488828, 0.36184197, 1.12412159, 0.9707243 ,
       0.24691003, 0.08649179, 0.3480514 , 0.13673611, 0.1075047 ,
       0.27478369, 0.10179892, 0.11468437, 0.21403395, 1.46571222,
       0.0019662 , 0.23576104])

In [22]:
log_posterior.get_parameter_names(include_ids=True)

['ID 40 Tumour volume in cm^3',
 'ID 94 Tumour volume in cm^3',
 'ID 95 Tumour volume in cm^3',
 'ID 136 Tumour volume in cm^3',
 'ID 140 Tumour volume in cm^3',
 'ID 155 Tumour volume in cm^3',
 'ID 169 Tumour volume in cm^3',
 'ID 170 Tumour volume in cm^3',
 'Mean Tumour volume in cm^3',
 'Std. Tumour volume in cm^3',
 'ID 40 Critical volume in cm^3',
 'ID 94 Critical volume in cm^3',
 'ID 95 Critical volume in cm^3',
 'ID 136 Critical volume in cm^3',
 'ID 140 Critical volume in cm^3',
 'ID 155 Critical volume in cm^3',
 'ID 169 Critical volume in cm^3',
 'ID 170 Critical volume in cm^3',
 'Mean Critical volume in cm^3',
 'Std. Critical volume in cm^3',
 'ID 40 Exponential growth rate in 1/day',
 'ID 94 Exponential growth rate in 1/day',
 'ID 95 Exponential growth rate in 1/day',
 'ID 136 Exponential growth rate in 1/day',
 'ID 140 Exponential growth rate in 1/day',
 'ID 155 Exponential growth rate in 1/day',
 'ID 169 Exponential growth rate in 1/day',
 'ID 170 Exponential growth r