# Parameter Identifiability

If you wish to infer the parameters of a model from data, it would be useful to ensure the parameters can be reliably inferred. If not then the results and any further analysis could be significantly affected and not reproducible. In this notebook, I will teach you how to determine parameter identifiability by going through some examples.

In [1]:
import pandas as pd
import numpy as np
import pints
import plotly.express.colors as pxclrs
import plotly.colors as pclrs
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly import figure_factory as ff
from scipy import integrate, optimize
from Code.Profile_loglikelihood import ProfileLogLikelihood


image_file = "../Images/parameter_identifiability/"

base_colour = "rebeccapurple"
heat_col_scale = "dense"
cont_col_scale = "viridis"
disc_col_scale = pxclrs.qualitative.Safe

# To provide consistent results when rerunning the notebook
seed = 321
rng = np.random.default_rng(seed=seed)

## The logistic growth models

The first example I will use, will be comparing three different logistic growth models. The parameter identifiability of these models have already been analysed [1] and thus we can assess the accuracy of our methods in trying to reproduce the results. 

They all are defined in the form
$$        \dot{C} = rCf(C), $$
where $C(t)$ is the population density at time, $t$, $r>0$ is the growth rate and $f(C)$ is a capacity limiting function.

The three different models have different forms for $f(C)$:
- The standard logistic growth model:  $f(C)=(1-C/K)$, for a carrying capacity $K>0$.
- The gompertz growth model: $f(C)=\log(C/K)$
- The Richards growth model: $f(C)=(1-C/K)^{\beta}$ for some $\beta>0$

[1] M.J. Simpson, A.P. Browning et. al. Parameter identifiability and model selection for sigmoid population growth models, Journal of Theoretical Biology, Volume 535, 2022,
https://doi.org/10.1016/j.jtbi.2021.110998.

In [2]:
# Create a class to simulate the growth models
class LogisticGrowth():
    def __init__(self, model="Standard", IC=1) -> None:
        self.model = model
        if self.model == "Richards":
            self.n_parameters = 3  # r, K and beta
        else:
            self.n_parameters = 2  # r and K
        self.params = None
        self.IC = IC

    def set_params(self, parameter):
        if len(parameter) != self.n_parameters:
            raise TypeError(
                "Length of parameter does not match the number of " +
                "parameters, " + str(self.n_parameters) + ", required by" + 
                self.model + " growth model."
            )
        self.params = {
            "r": parameter[0],
            "K": parameter[1],
        }
        if self.model == "Richards":
            self.params["beta"] = parameter[2]
    
    def set_initial_condition(self, IC):
        self.IC = IC

    def ODE(self, C, t):
        """
            Takes the current population density, $C$, and time, $t$, and returns $\dot{C}$
        """
        if self.model == "Standard":
            fC = 1-C/self.params['K']
        elif self.model == "Gompertz":
            fC = np.log(C/self.params['K'])
        elif self.model == "Richards":
            fC = np.power(1-C/self.params['K'], self.params['beta'])

        dC_dt = self.params['r']*C*fC
        return dC_dt
    
    def analytic_solution(self, times):
        """
            Solves the Logistic Growth equation
        """
        C_0 = self.IC
        K = self.params['K']
        beta = 1
        if self.model == "Richards":
            beta = self.params['beta']
        exp_growth = np.exp(-self.params['r']*beta*times)
        if self.model == "Gompertz":
            C_t = K * np.exp(np.log(C_0/K) * exp_growth)
        else:
            Q =  np.power(K, beta)-np.power(C_0, beta)
            C_t = (K*C_0) /np.power((C_0**beta + Q*exp_growth), 1/beta)
        return C_t


    def numerical_solution(self, times):
        """
            Solves the Logistic Growth equation
        """

        # Get the solution to the ODEs
        results = integrate.odeint(
            self.ODE,
            self.IC,
            times
            )
        return results

    def simulate(self, parameter, times):
        """
            Return the population density simulated by the Logistic Growth
            equation for a specific parameter vector
        """
        
        self.set_params(parameter)
        if times[0] == 0:
            return self.analytic_solution(times)
        elif times[0] > 0:  
            # If the times do not start at 0 then odeint will begin simulation at the first timepoint.
            # However the IC is for time t=0 and so we have to solve from that point
            times = np.concatenate((np.array([0]), times))
            return self.analytic_solution(times)[1:]
        else:
            raise ValueError(
                "Times must be greater than or equal to zero"
            )

Now that the model has been built, we can determine the resulting population-time profiles of this model.

In [3]:
r3 = 20
K = 10
betas = [0.1, 0.3, 0.5, 1]
models = ["Standard", "Gompertz", "Richards"]

fig = make_subplots(
    rows=1, cols=len(betas), subplot_titles=np.char.add("beta = ", np.array(betas, dtype=str))
)

for i_m, m in enumerate(models):
    model = LogisticGrowth(m, IC=0.1)
    for col, b in enumerate(betas):
        r_times = np.linspace(0, 10, 100)
        if m == "Standard":
            param = [r3, K]
            times = r_times/param[0]
        elif m == "Gompertz":
            param = [r3*b, K]
            times = r_times/param[0]
        elif m == "Richards":
            param = [r3, K, b]
            times = r_times/(r3*b)
        solution = model.simulate(param, times)
    
        fig.add_trace(
            go.Scatter(
                name=m+" growth model",
                x=r_times,
                y=solution/K,
                mode='lines',
                line=dict(color=disc_col_scale[i_m]),
                showlegend=col==0,
                legendgroup=m
            ),
            row=1,
            col=col+1
        )

fig.update_layout(
    template='plotly_white',
    width=1000,
    height=400,
)
fig.show()

### Simulated data

To determine the correct model and parameters to use for a particular situation, the model needs to be compared to some data. For this we will use datasets simulated from each of the models, using the maximum likelihood estimates accquired from the paper. We will also add constant gaussian noise with standard deviation, $\sigma$, to the solution.


In [4]:
def create_data(m, parameters):
    mech_params = parameters[:-2]
    C_0 = parameters[-2]
    sigma = parameters[-1]
    sim_times = np.linspace(0, 4000, 11)
    model = LogisticGrowth(m, IC=C_0)
    solution = model.simulate(mech_params, sim_times)
    noise = rng.normal(loc=0, scale = sigma, size=len(sim_times))
    noisy_data = solution+noise

    # Any negative data points should be positive
    noisy_data = np.abs(noisy_data)
    data = {
        "model": m,
        "t": sim_times,
        "C": noisy_data,
        "Parameters": parameters,
    }
    return data

In [5]:
r_exp = 0

# Standard Logistic growth model parameters:
r = 2.5*10**(r_exp-3)
K = 80
C_0 = 0.7
sigma = 2.3

standard_data = create_data("Standard", [r, K, C_0, sigma])

# Gompertz logistic growth model parameters:
r = 1.6*10**(r_exp-3)
K = 83
C_0 = 1.1e-4
sigma = 2.7

gompertz_data = create_data("Gompertz", [r, K, C_0, sigma])

# Richards' logistic growth model parameters:
r = 5.5*10**(r_exp-3)
K = 82
C_0 = 9.4e-2
beta = 0.3
sigma = 2.1

richards_data = create_data("Richards", [r, K, beta, C_0, sigma])


In [6]:
fig = go.Figure()
datas = [standard_data, gompertz_data, richards_data]
for i_m, data in enumerate(datas):
    fig.add_trace(
        go.Scatter(
            name= "data",
            x=data["t"],
            y=data["C"],
            mode='markers',
            line=dict(color=disc_col_scale[i_m]),
            showlegend=True,
            legendgroup=data["model"],
            legendgrouptitle=dict(
                text=data["model"]+" growth model"
            )
        )
    )
    parameters = data["Parameters"]
    model = LogisticGrowth(data["model"], IC=parameters[-2])
    more_times = np.linspace(0, 4000, 1000)
    solution = model.simulate(parameters[:-2], more_times)
    fig.add_trace(
        go.Scatter(
            name= "Simulated curve",
            x=more_times,
            y=solution,
            mode='lines',
            line=dict(color=disc_col_scale[i_m]),
            showlegend=True,
            legendgroup=data["model"],
            legendgrouptitle=dict(
                text=data["model"]+" growth model"
            )
        )
    )
fig.update_layout(
    template='plotly_white',
    width=600,
    height=400,
)
fig.write_image(image_file+"/Log_Growth_Data.svg")
fig.show()

### Maximum likelihood

A common approach to parameter inference is to maximise the probability the data, $X^{obs}$, was generated by the model with parameters, $\theta \in \Theta$, over the parameter space $\Theta$. This probability, $P\left(X^{obs}|\theta\right)$, is called the likelihood. We can test whether the parameters of our models, $\theta = (r, K, \beta, C_0, \sigma)$, can be reliably recovered from the data. 

First we need to define the likelihood. For this example, I will use a constant gaussian likelihood as additive noise was used to generate the data. In most caes we will not know the exact type of noise present in a data set and so some assumptions will need to be made. The Likelihood can be calculated as the product of point-wise likelihoods,
\begin{equation*}
    P\left(X^\textrm{obs}|\theta\right) = \prod_{x^{obs}_i \in X^\textrm{obs}} P\left(x^{obs}_i|\theta\right).
\end{equation*}
Alternatively, a computationally easier but equivalent optimisation is the log-likelihood,
\begin{equation*}
    \log\left(P\left(X^\textrm{obs}|\theta\right)\right) = \sum_{x^{obs}_i \in X^\textrm{obs}} \log\left(P\left(x^{obs}_i|\theta\right)\right),
\end{equation*}
As we are using a Gaussian noise, each of these pointwise observations are expected to be normally distributed around the true value, $\mu_i$, i.e.  $x^{obs}_i \sim N\left(\mu_i, \sigma^2\right)$ where $\sigma$ is the standard deviation of the observations. Thus the pointwise likelihood can be formulated as a Gaussian distribution,
\begin{align*}
    P\left(x^{obs}_i|\theta\right) &= P\left(x^{obs}_i|\mu_i = x^{\theta}\left(t_i\right), \sigma\right) \\
    &= \frac{1}{\sigma \sqrt{2\pi}}e^{-\frac{1}{2}\left(x^{obs}_i - x^\theta\left(t_i\right)\right)^2\left(\sigma \right)^{-2} },
\end{align*}
where $x^{\theta}$ is the simulated dynamical model under parameters $\theta$. Thus the log-likelihood is 
\begin{align*}
    \log \left(P\left(X^{obs}|\theta\right)\right) &= \sum_{x^{obs}_i \in X^\textrm{obs}} \left(-\frac{1}{2}\log\left(2\pi\right) -\log\left(\sigma \right) -\frac{1}{2}\left(\frac{x^{obs}_i - x^\theta\left(t_i\right)}{\sigma }\right)^2\right).
\end{align*}

So now let's build a function that returns this likelihood.

In [7]:
class GrowthLogLikelihood():
    def __init__(self, times, observations, model) -> None:
        self.model = LogisticGrowth(model=model)
        self.times=times
        self.obs=observations
    
    def log_likelihood(self, parameters):
        # Parameters must be positive
        parameters = np.array(parameters)
        if any(parameters<0):
            return -np.inf
        sigma = parameters[-1]
        C_0 = parameters[-2]
        n_obs = len(self.times)

        self.model.set_initial_condition(C_0)
        x_theta = self.model.simulate(parameters[:-2], self.times)
        # Numerical errors can turn up negative results which affects likelihood calculation
        if any(x_theta<=0):
            x_theta[x_theta<=0] = 1e-20 
        
        term_1 = -0.5*n_obs*np.log(2*np.pi)
        term_2 = -n_obs*np.log(sigma)
        term_3 = -(0.5/sigma**2) * np.sum((self.obs - x_theta)**2)

        log_likelihood = term_1 + term_2 + term_3
        return log_likelihood


And maximise this function for each of the models, to find the MLE. I will use the ProfileLikelihood class to do this, which is found in the code section if you wish to investigate further.

In [8]:
approx_params = np.array([3e-3*10**r_exp, 100, 0.5, 0.001, 1])
n_runs = 10
# acqire 4 random start points for optimisation
start_params = np.abs(1+rng.normal(size=(n_runs, 5)))*approx_params
param_names = ["r", "K", "beta", "C_0", "sigma"]

for data in datas:
    data["log_like"] = GrowthLogLikelihood(data['t'], data['C'], data['model'])
    if data['model'] != 'Richards':
        approx_m = np.delete(approx_params, 2)
        start_m = np.delete(start_params, 2, axis = 1)
        names_m = np.delete(param_names, 2)
    else:
        approx_m = approx_params
        start_m = start_params
        names_m = param_names

    data["PLL"] = ProfileLogLikelihood(data["log_like"].log_likelihood, approx_m)
    MLEs, LL_scores = data["PLL"].max_likelihood_estimate(
        n_runs=n_runs, param_start = start_m,
        opts={
            'optimiser': 'CMAES', 'opt_package': 'pints',
            'random_seed': seed, 'lower_bounds': 0, 'maxiter': 1e5
        }
    )

    print(data['model'] + " Growth Model")
    table_df = pd.DataFrame(
        MLEs,
        columns=names_m
    )
    table_df["Log-likelihood"] = LL_scores
    table_df = table_df.reindex(list(range(0, n_runs))+["True"])
    table_df.loc["True"] = data['Parameters'] + [
        data["log_like"].log_likelihood(data['Parameters'])
    ]
    table_df = table_df.round(4)
    fig = ff.create_table(table_df, index=True)
    fig.update_layout(
        width=600,
        height=300,
    )
    fig.show()


Standard Growth Model


Gompertz Growth Model


Richards Growth Model


From the above results, There may be some identifiability problems with the Standard and Richards' growth model. However, as it is a random process, there is always a chance that the optimiser does not find the global maximum. Five runs of the maximum likelihood estimation is not enough to quantify the identifiability. To achieve this, we need to view the Profile likelihood.

### Profile Likelihood

To determine Parameter Identifiability we can examine the profile likelihoods of the parameters. To do this, each parameter is varied around the optimised value, and the log-likelihood for the other parameters re-optimised, i.e.:
\begin{equation}
    PL\left(\theta_i\right) = \max_{\theta_{-i}}\left(\log\left(P\left(X^\textrm{obs} | \theta_i, \theta_{-i}\right)\right)\right)
\end{equation}
for each parameter $\theta_i$, where $\theta_{-i}$ are all parameters excluding $\theta_i$. This is then compared with a vertical confidence line where the intersection of this line to the profile likelihood curve indicates the confidence interval. For a confidence interval of 95\% a confidence line at 1.92 below the maximum is used. If the curve acquired from $PL\left(\theta_i\right)$ does not cross the confidence line on one or both sides of the maximum then the parameters are unidentifiable. We have already performed a global optimisation, we can use this as the starting point for subsequent optimisations.

In [9]:
# As we have run the MLE function, this has been set for the start of profile likelihood
opts = {
    'method': 'sequential calc.',
    'max_N_opts': 50,
    'interp':0,
    'lower_bounds': 0,
    'random_seed': seed
}
timer = pints.Timer()
slow_times = {}
row_names = list(np.array(param_names)[[0, 1, 3, 2]])
col_names = ["Standard", "Gompertz", "Richards'"]
fig1 = make_subplots(rows=4, cols=3, column_titles=col_names , row_titles=row_names)
for i_m, data in enumerate(datas):
    print(data['model'] )
    data["PLL"].set_options(opts, reset=True)

    bounds = [[0, 0.01*10**r_exp], [60, 100]]
    if data['model'] == 'Standard':
        data['bounds'] = bounds + [[0.01, 2]]
        names_m = zip(range(0, 3), row_names[:-1])
    elif data['model'] == 'Gompertz':
        data['bounds'] = bounds + [[0, 0.015]]
        names_m = zip(range(0, 3), row_names[:-1])
    else:
        data['bounds'] =  bounds + [[0, 2], [0, 2]]
        # data['bounds'][0] = []
        names_m = zip([0, 1, 3, 2], row_names)
    out_LB = data["PLL"].MLE[:-1] < np.array(data['bounds'])[:, 0]
    if np.any(out_LB):
        for i_param, out in enumerate(out_LB):
            if out:
                bounds = data['bounds'][i_param]
                bounds[0] = data["PLL"].MLE[i_param] - abs(data["PLL"].MLE[i_param]-bounds[1])
                data['bounds'][i_param] = bounds
    out_UB = data["PLL"].MLE[:-1] > np.array(data['bounds'])[:, 1]
    if np.any(out_UB):
        for i_param, out in enumerate(out_UB):
            if out:
                bounds = data['bounds'][i_param]
                bounds[1] = data["PLL"].MLE[i_param] + abs(data["PLL"].MLE[i_param]-bounds[0])
                data['bounds'][i_param] = bounds
    data["PLL"].set_param_range(range(0, len(data['bounds'])), bounds=data['bounds'], adapt=False)
    row = 0
    for i_param, param in names_m:
        print("\t", param)
        row += 1
        timer.reset()
        result = data["PLL"].run(i_param, n_points = 200)
        slow_times[(data['model'], param)] = timer.time()
        fig1.add_trace(
            go.Scatter(
                name= "Profile Likelihood",
                x=result[0],
                y=result[1],
                mode='lines',
                line=dict(color=disc_col_scale[i_m], dash='dash'),
                showlegend=i_param==0,
                legendgroup=data["model"],
                legendgrouptitle=dict(
                    text=data["model"]+" growth model"
                )
            ), row=row, col=i_m+1
        )

fig1.update_layout(
    template='plotly_white',
    width=900,
    height=800,
)
fig1.show()

Standard
	 r



invalid value encountered in scalar multiply



Used global optimisation on shape points [0.00040816326530612246]
	 K
	 C_0
Gompertz
	 r
Used global optimisation on shape points [0.0012244897959183673]
	 K
	 C_0
Used global optimisation on shape points [0.0]
Richards
	 r



divide by zero encountered in log


invalid value encountered in multiply



Used global optimisation on shape points [0.0006122448979591836]
	 K
	 C_0



invalid value encountered in divide



Used global optimisation on shape points [0.0]
	 beta



divide by zero encountered in scalar divide



Used global optimisation on shape points [0.0]


In [10]:
opts = {
    'method': 'quadratic approx.',
    'max_N_opts': 40,
    'approx shape N': 10,
    'interp': 0,
    'lower_bounds': 0,
    'seed': seed
}
first_L = True
first_MLE = True
first_U = True
fig2 = make_subplots(rows=4, cols=3)
df_ident = pd.DataFrame(columns=["Model", "Parameter", "Lower", "Upper", "Time Change"])
data["PLL"].result = {}
for i_m, data in enumerate(datas):
    print(data['model'])
    data["PLL"].set_options(opts, reset=True)

    if data['model'] == 'Standard':
        names_m = zip(range(0, 3), row_names[:-1])
    elif data['model'] == 'Gompertz':
        names_m = zip(range(0, 3), row_names[:-1])
    else:
        names_m = zip([0, 1, 3, 2], row_names[:])
    data["PLL"].set_param_range(range(0, len(data['bounds'])), bounds=data['bounds'], adapt=False)
    row = 0
    for i_param, param in names_m:
        print("\t", i_param, param)
        row += 1
        timer.reset()
        result = data["PLL"].run(i_param, n_points = 200)
        quick_time = timer.time()
        time_change = (
            quick_time - slow_times[(data['model'], param)]
        )/slow_times[(data['model'], param)]
        time_change = str(round(time_change*100, 2)) + "%"
        identifiability = data["PLL"].result[i_param]['identifiabilty']
        df_ident.loc[len(df_ident.index)] = [
            data['model'], param, identifiability[0], identifiability[1],
            time_change
        ]
        fig2.add_trace(
            go.Scatter(
                name= "Quick Profile Likelihood",
                x=result[0],
                y=result[1],
                mode='lines',
                line=dict(color=disc_col_scale[i_m]),
                showlegend=i_param==0,
                legendgroup=data["model"],
                legendgrouptitle=dict(
                    text=data["model"]+" growth model"
                )
            ), row=row, col=i_m+1
        )
        x_CI, LL_CI, _ = data["PLL"].result[i_param]['CI']
        if identifiability[0]=='Ident':
            fig2.add_trace(
                go.Scatter(
                    name= "Lower confidence interval",
                    x=[x_CI[0], x_CI[0]],
                    y=[LL_CI[0], -5],  # min(result[1, np.isfinite(result[1])])],
                    mode='lines',
                    line=dict(color='darkgrey', dash='dash'),
                    showlegend=first_L,
                    legendgroup="CI",
                    legendgrouptitle=dict(
                        text="Confidence Interval"
                    )
                ), row=row, col=i_m+1
            )
            first_L = False
        fig2.add_trace(
            go.Scatter(
                name= "MLE",
                x=[x_CI[1], x_CI[1]],
                y=[LL_CI[1], -5],   # min(result[1, np.isfinite(result[1])])],
                mode='lines',
                line=dict(color='darkgrey'),
                showlegend=first_MLE,
                legendgroup="CI",
                legendgrouptitle=dict(
                    text="Confidence Interval"
                )
            ), row=row, col=i_m+1
        )
        first_MLE = False
        if identifiability[1]=='Ident':
            fig2.add_trace(
                go.Scatter(
                    name= "Upper confidence interval",
                    x=[x_CI[2], x_CI[2]],
                    y=[LL_CI[2], -5],   # min(result[1, np.isfinite(result[1])])],
                    mode='lines',
                    line=dict(color='darkgrey', dash='dash'),
                    showlegend=first_U,
                    legendgroup="CI",
                    legendgrouptitle=dict(
                    # k=2, w=weights, s=5e-2*len(weights)
                        text="Confidence Interval"
                    )
                ), row=row, col=i_m+1
            )
            first_U = False
        optimiser_points = data["PLL"].result[i_param]['opt points']
        fig2.add_trace(
            go.Scatter(
                name='Optimiser Evaluations',
                x=optimiser_points[0],
                y=optimiser_points[1],
                mode='markers',
                marker=dict(
                    color=disc_col_scale[i_m],
                    symbol="star-diamond",
                    opacity=0.5
                ),
                showlegend=i_param==0,
                legendgroup=data["model"],
            ), row=row, col=i_m+1
        )
fig = go.Figure(data = fig1.data + fig2.data).set_subplots(
    4, 3, column_titles=col_names , row_titles=row_names
)
fig.update_yaxes(range=[-5, 2.1], title = "Log-likelihood - L^*")
fig.update_layout(
    template='plotly_white',
    width=900,
    height=800,
)
fig.write_image(image_file+"/Log_Growth_PLL_QPLL_r"+str(r_exp)+".svg")
fig.show()

df_ident.replace(['Ident', 'Unident', None], ['Identifiable', 'Unidentible', 'Unknown'])
fig3 = ff.create_table(df_ident)
fig3.update_layout(
    width=600,
    height=300,
)
fig3.write_image(image_file+"/Log_Growth_QPLL_Table_r"+str(r_exp)+".svg")
fig3.show()

Standard
	 0 r



invalid value encountered in scalar multiply


invalid value encountered in scalar subtract


invalid value encountered in scalar subtract



	 1 K
	 2 C_0



invalid value encountered in scalar multiply



Used global optimisation on shape points [0.8944444444444445]
Gompertz
	 0 r
	 1 K



invalid value encountered in scalar multiply


invalid value encountered in scalar multiply



	 2 C_0
Richards
	 0 r



divide by zero encountered in log


invalid value encountered in scalar multiply


Convergence is slow, approximation method may not be appropriate


Convergence is slow, approximation method may not be appropriate


invalid value encountered in multiply


invalid value encountered in scalar multiply


Convergence is slow, approximation method may not be appropriate



Used global optimisation for CI finding on points [0.0001709  0.01730009]
	 1 K



invalid value encountered in scalar subtract


invalid value encountered in scalar subtract


invalid value encountered in scalar multiply



	 3 C_0



invalid value encountered in scalar multiply


Convergence is slow, approximation method may not be appropriate


Convergence is slow, approximation method may not be appropriate


invalid value encountered in divide



Used global optimisation on shape points [0.8888888888888888]
	 2 beta



divide by zero encountered in scalar divide


invalid value encountered in scalar multiply


Convergence is slow, approximation method may not be appropriate



Used global optimisation on shape points [0.0, 0.2222222222222222]


In [11]:
opts = {
    'method': 'quad adaptive',
    'max_N_opts': 40,
    'approx shape N': 10,
    'interp': 0,
    'lower_bounds': 0,
    'seed': seed
}
first_L = True
first_MLE = True
first_U = True
fig2 = make_subplots(rows=4, cols=3)
df_ident = pd.DataFrame(columns=["Model", "Parameter", "Lower", "Upper", "Time Change"])
data["PLL"].result = {}
for i_m, data in enumerate(datas):
    print(data['model'])
    data["PLL"].set_options(opts, reset=True)

    if data['model'] == 'Standard':
        names_m = zip(range(0, 3), row_names[:-1])
    elif data['model'] == 'Gompertz':
        names_m = zip(range(0, 3), row_names[:-1])
    else:
        names_m = zip([0, 1, 3, 2], row_names[:])
    data["PLL"].set_param_range(range(0, len(data['bounds'])), bounds=data['bounds'], adapt=False)
    row = 0
    for i_param, param in names_m:
        print("\t", i_param, param)
        row += 1
        timer.reset()
        result = data["PLL"].run(i_param, n_points = 200)
        quick_time = timer.time()
        time_change = (
            quick_time - slow_times[(data['model'], param)]
        )/slow_times[(data['model'], param)]
        time_change = str(round(time_change*100, 2)) + "%"
        identifiability = data["PLL"].result[i_param]['identifiabilty']
        df_ident.loc[len(df_ident.index)] = [
            data['model'], param, identifiability[0], identifiability[1],
            time_change
        ]
        fig2.add_trace(
            go.Scatter(
                name= "Quick Profile Likelihood",
                x=result[0],
                y=result[1],
                mode='lines',
                line=dict(color=disc_col_scale[i_m]),
                showlegend=i_param==0,
                legendgroup=data["model"],
                legendgrouptitle=dict(
                    text=data["model"]+" growth model"
                )
            ), row=row, col=i_m+1
        )
        x_CI, LL_CI, _ = data["PLL"].result[i_param]['CI']
        if identifiability[0]=='Ident':
            fig2.add_trace(
                go.Scatter(
                    name= "Lower confidence interval",
                    x=[x_CI[0], x_CI[0]],
                    y=[LL_CI[0], -5],  # min(result[1, np.isfinite(result[1])])],
                    mode='lines',
                    line=dict(color='darkgrey', dash='dash'),
                    showlegend=first_L,
                    legendgroup="CI",
                    legendgrouptitle=dict(
                        text="Confidence Interval"
                    )
                ), row=row, col=i_m+1
            )
            first_L = False
        fig2.add_trace(
            go.Scatter(
                name= "MLE",
                x=[x_CI[1], x_CI[1]],
                y=[LL_CI[1], -5],   # min(result[1, np.isfinite(result[1])])],
                mode='lines',
                line=dict(color='darkgrey'),
                showlegend=first_MLE,
                legendgroup="CI",
                legendgrouptitle=dict(
                    text="Confidence Interval"
                )
            ), row=row, col=i_m+1
        )
        first_MLE = False
        if identifiability[1]=='Ident':
            fig2.add_trace(
                go.Scatter(
                    name= "Upper confidence interval",
                    x=[x_CI[2], x_CI[2]],
                    y=[LL_CI[2], -5],   # min(result[1, np.isfinite(result[1])])],
                    mode='lines',
                    line=dict(color='darkgrey', dash='dash'),
                    showlegend=first_U,
                    legendgroup="CI",
                    legendgrouptitle=dict(
                    # k=2, w=weights, s=5e-2*len(weights)
                        text="Confidence Interval"
                    )
                ), row=row, col=i_m+1
            )
            first_U = False
        optimiser_points = data["PLL"].result[i_param]['opt points']
        fig2.add_trace(
            go.Scatter(
                name='Optimiser Evaluations',
                x=optimiser_points[0],
                y=optimiser_points[1],
                mode='markers',
                marker=dict(
                    color=disc_col_scale[i_m],
                    symbol="star-diamond",
                    opacity=1
                ),
                showlegend=i_param==0,
                legendgroup=data["model"],
            ), row=row, col=i_m+1
        )
        approx_params = data["PLL"].result[i_param]['projected optimum']
        approx_ll = [
            data["log_like"].log_likelihood(param)- data["PLL"].l_star
            for param in approx_params
        ]
        # fig2.add_trace(
        #     go.Scatter(
        #         name='Optimiser Start point',
        #         x=optimiser_points[0],
        #         y=approx_ll,
        #         mode='markers',
        #         marker=dict(
        #             color='darkgrey',
        #             symbol='circle',
        #             opacity=1
        #         ),
        #         showlegend=i_param==0,
        #         legendgroup=data["model"],
        #     ), row=row, col=i_m+1
        # )
fig = go.Figure(data = fig1.data + fig2.data).set_subplots(
    4, 3, column_titles=col_names , row_titles=row_names
)
fig.update_yaxes(range=[-5, 2.1], title = "Log-likelihood - L^*")
fig.update_layout(
    template='plotly_white',
    width=900,
    height=800,
)
fig.write_image(image_file+"/Log_Growth_PLL_QPLL_adapt_r"+str(r_exp)+".svg")
fig.show()

df_ident.replace(['Ident', 'Unident', None], ['Identifiable', 'Unidentible', 'Unknown'])
fig3 = ff.create_table(df_ident)
fig3.update_layout(
    width=600,
    height=300,
)
fig3.write_image(image_file+"/Log_Growth_QPLL_Table_adapt_r"+str(r_exp)+".svg")
fig3.show()

Standard
	 0 r
	 1 K



invalid value encountered in scalar multiply


invalid value encountered in scalar subtract


invalid value encountered in scalar subtract



	 2 C_0



invalid value encountered in scalar multiply



Used global optimisation on shape points [0.8944444444444445]
Gompertz
	 0 r
	 1 K



invalid value encountered in scalar multiply


invalid value encountered in scalar multiply



	 2 C_0



divide by zero encountered in log


invalid value encountered in scalar multiply


Convergence is slow, changing approximation method


Convergence is slow, changing approximation method


invalid value encountered in scalar subtract


invalid value encountered in scalar subtract



Used global optimisation for CI finding on points [0.02591808]
Richards
	 0 r



invalid value encountered in multiply


invalid value encountered in scalar multiply


Convergence is slow, changing approximation method


overflow encountered in power


overflow encountered in scalar power


invalid value encountered in multiply


invalid value encountered in add


invalid value encountered in scalar subtract


invalid value encountered in power


divide by zero encountered in divide



Used global optimisation for CI finding on points [1.70898267e-04 1.73000922e-02 1.59016151e-01 3.54013272e-01
 4.07554519e-01 1.06932222e+00]
	 1 K



invalid value encountered in scalar subtract


invalid value encountered in scalar subtract


invalid value encountered in power


invalid value encountered in scalar multiply



	 3 C_0



invalid value encountered in scalar multiply


Convergence is slow, changing approximation method


Convergence is slow, changing approximation method


invalid value encountered in divide


divide by zero encountered in scalar divide


invalid value encountered in scalar multiply



	 2 beta



Convergence is slow, changing approximation method



Used global optimisation on shape points [0.0, 0.2222222222222222, 1.1111111111111112]


## The PK model

The second example I will go through in this notebook is a one-compartment Pharmacokinetic (PK) model. PK models are mathematical models of the concentration of the drug throughout the body over time and the one-compartment model with linear rates is the simplest of these. In the one-compartment model the body is abstracted into a central compartment. At time $t=0$, the drug is injected straight into the central compartment. Over time, the drug is then cleared from the compartment. This model is given by the following ordinary differential equation (ODE),
$$
        \dot{A_c} = - K_\textrm{cl}\frac{A_c}{V_c},
$$
where $A_c$ is the mass of drug in the central compartment, $V_c$ is the volume of the central compartment, and $K_\textrm{cl} >0$ is the rate of clearance from the central compartment. The initial condition of this model is $A_c(0) = d$, the drug dose amount. We will utilise the CHI package to build this model.

In [12]:
import chi.library
import chi

# Define 1-compartment pharmacokinetic model
PK_model = chi.library.ModelLibrary().one_compartment_pk_model()

INFO:root:running build_ext
INFO:root:building 'myokit_sim_1_827657314385058099' extension
INFO:root:creating build
INFO:root:creating build/temp.linux-x86_64-cpython-310
INFO:root:creating build/temp.linux-x86_64-cpython-310/tmp
INFO:root:creating build/temp.linux-x86_64-cpython-310/tmp/tmp2btaz6wnmyokit
INFO:root:x86_64-linux-gnu-gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC -I/usr/local/include -I/opt/local/include -I/home/rumney/Documents/Myleotoxicity/Myleotoxicity-PKPD/venv/lib/python3.10/site-packages/myokit/_sim -I/home/rumney/Documents/Myleotoxicity/Myleotoxicity-PKPD/venv/lib/python3.10/site-packages/myokit/_sim -I/home/rumney/Documents/Myleotoxicity/Myleotoxicity-PKPD/venv/include -I/usr/include/python3.10 -c /tmp/tmp2btaz6wnmyokit/source.c -o build/temp.linux-x86_64-cpython-310/tmp/tmp2btaz6wnmyokit/source.o
INFO:root:creating build/lib.linux-x86_64-cpython-310
INFO:root:

In [13]:
# The above logging can be annoying with the frequency that it happens at, so lets turn it off
import logging
logger = logging.getLogger()
logger.handlers = []

In [14]:
# Plot the model simulation under various doses

V_c = 5.58
K_cl = 2.8
dose_amts = [1, 2, 3]
times = np.linspace(0, 5, 100)

fig = go.Figure()

for i_dose, d in enumerate(dose_amts):
        param = [d, V_c, K_cl]
        solution = PK_model.simulate(param, times)
    
        fig.add_trace(
            go.Scatter(
                name="Dose amount: "+str(d)+" mg",
                x=times,
                y=solution[0],
                mode='lines',
                line=dict(color=disc_col_scale[i_dose]),
                showlegend=True
            ),
        )

fig.update_layout(
    template='plotly_white',
    width=500,
    height=400,
)
fig.show()

### Mixed effects modelling

The above model may be a good general discription for a particular drug, however different people will have different specific responses to the drug. This inter-individual variation can be represented using mixed-effects models. These models assume that each individual has an individual set of parameters determined from a population distribution of parameters. This population distribution is further described by hyper-parameters. When parameter inference is performed on a mixed effects model, the hyper-parameters are inferred alongside the individual parameters.

Taking the above PK model, I will initially set just the parameter $V_c$ to vary between individuals, and assume that it best represented by a log-normal distribution, i.e. for indivisual i
$$
    V_{c, i} = V_{c, typ}e^{\eta_i}
$$
where $V_{c, typ} > 0$ is the typical population parameter and $\eta_i \sim N\left(0, \omega_{V_c}^2\right)$ is the random effect for the individual i. The hyper parameters of this model are 
$\{V_{c, typ}, \omega_{V_c}^2, K_{cl}, \sigma\}$ and the individual parameters $\{V_{c, i}| 0\leq i \leq N_{ind}\}$ meaning there is an additional $N_{ind}+1$ parameters to infer in this problem when compared to the fixed-effects problem (where all individuals are described by the same parameters).

Now lets generate some individual parameters.

In [15]:
# Set the population model
population_model = chi.ComposedPopulationModel([
    chi.LogNormalModel(n_dim=1),    # V_c
    chi.PooledModel(n_dim=1),       # K_cl
    chi.PooledModel(n_dim=1)        # sigma
])

# "Sample" individuals 
omega_V_c = 0.3
true_typ_params = [0, V_c, K_cl, 0.1]
param_names = ["Log(V_c)", "omega_{V_c}", "K_{cl}", "sigma"]
true_pop_params = [np.log(V_c), omega_V_c, K_cl, 0.1]
individual_parameters = population_model.sample(
    parameters=true_pop_params,
    n_samples=1000,
    seed=seed
)

# Plot distribution
fig = go.Figure()
fig.add_trace(
    go.Histogram(
        name='Samples',
        x=individual_parameters[:, 0],
        histnorm='probability',
        showlegend=False,
        marker_color=base_colour
    )
)
fig.update_layout(
    template='plotly_white',
    width=400,
    height=400,
    title="V_{c, i} sampled from log-normal distribution"
)
fig.show()


Next we can wrap these two models together in the CHI package and include a multiplicative gaussian noise with standand deviation $\sigma$

In [16]:
PK_model.parameters()

['central.drug_amount', 'central.size', 'myokit.elimination_rate']

In [17]:
# Set up a dosing regime which easily allows us to compare multiple doses
PK_model.set_administration(
    compartment='central', amount_var='drug_amount'
)
# Also need to fix A_c(0) to 0 as this will add to our dosing regime
fixed = {'central.drug_amount': 0}

# Set the noise 
noise_model = chi.MultiplicativeGaussianErrorModel()

# Set up the inference problem
problem = chi.ProblemModellingController(PK_model, noise_model)
problem.fix_parameters(fixed)
problem.set_population_model(population_model)

### The Data

Again I will need data to do further analyses. For this example, I will generate some data from the model to use in place of experimental data with 15 "pataients" per dose.

In [18]:
data_times = np.linspace(0.1, 5, 20)
n_ids_per_dose = 15

n_ids_data = n_ids_per_dose*len(dose_amts)
# Set up Dataframe
df = pd.DataFrame(columns=[
    "ID", "Time", "Observable", "Value", "Dose group", "Duration", "Dose"
])
# Generate data
for i_dose, dose in enumerate(dose_amts):
    PK_model.set_dosing_regimen(dose=dose, start=0, period=0)
    for i_ind in range(0, n_ids_per_dose):
        # Simulate model
        pat_param = np.insert(individual_parameters[i_ind+i_dose, :], 0, 0)
        patient_result = PK_model.simulate(pat_param[:-1], data_times)
        patient_result = noise_model.sample(
            pat_param[-1:], patient_result[0]
        )[:, 0]

        # Format patient data
        patient_data= pd.DataFrame(columns=[
            "ID", "Time", "Observable", "Value", "Dose group", "Duration", "Dose"
        ])
        patient_id = i_ind+(i_dose*n_ids_per_dose)+1
        patient_data["ID"] = [patient_id]*(len(data_times)+1)
        patient_data["Time"] = np.concatenate((data_times, [0]))
        patient_data["Observable"] = (
            ['central.drug_concentration']*len(data_times)+[None]
        )
        patient_data["Value"] = np.concatenate((patient_result, [None]))
        patient_data["Dose group"] = [dose]*(len(data_times)+1)
        patient_data["Duration"] = [None]*len(data_times)+[0.01]
        patient_data["Dose"] = [None]*len(data_times)+[dose]

        # Join to main dataframe
        df = pd.concat([df, patient_data])
    df = df.reset_index(drop=True)

problem.set_data(df)
df

Unnamed: 0,ID,Time,Observable,Value,Dose group,Duration,Dose
0,1,0.100000,central.drug_concentration,0.162326,1,,
1,1,0.357895,central.drug_concentration,0.073835,1,,
2,1,0.615789,central.drug_concentration,0.039239,1,,
3,1,0.873684,central.drug_concentration,0.020636,1,,
4,1,1.131579,central.drug_concentration,0.01046,1,,
...,...,...,...,...,...,...,...
940,45,4.226316,central.drug_concentration,0.000003,3,,
941,45,4.484211,central.drug_concentration,0.000002,3,,
942,45,4.742105,central.drug_concentration,0.000001,3,,
943,45,5.000000,central.drug_concentration,0.0,3,,


We can also visualise the data on a graph:

In [19]:
fig = go.Figure()
colours = []
for i, dose in enumerate(dose_amts):
    col_low = 1.5*i/(len(dose_amts)*1.5-0.5)
    col_high = (1.5*i+1)/(len(dose_amts)*1.5-0.5)
    colours += pxclrs.sample_colorscale(
        pxclrs.get_colorscale(cont_col_scale),
        n_ids_per_dose,
        low=col_low,
        high=col_high
    )
ind_colours = np.asarray(colours)

more_times = np.linspace(0, 5, 1000)
for i_dose, dose in enumerate(dose_amts):
    PK_model.set_dosing_regimen(dose=dose, start=0, period=0)
    typ_result = PK_model.simulate(true_typ_params[:-1], more_times)
    fig.add_trace(
        go.Scatter(
            name= "Typical Individual Simulation",
            x=more_times,
            y=typ_result[0],
            mode='lines',
            line=dict(color=ind_colours[int(i_dose*n_ids_per_dose+n_ids_per_dose/2)]),
            showlegend=True,
            legendgroup=dose
        )
    )
for i_ind in range(0, 45):
    data = df.loc[df['ID']==i_ind+1]
    dose = np.array(data["Dose group"])[0]
    fig.add_trace(
        go.Scatter(
            name= "ID "+str(i_ind+1),
            x=data["Time"],
            y=data["Value"],
            mode='markers',
            line=dict(color=ind_colours[i_ind]),
            showlegend=True,
            legendgroup=dose,
            legendgrouptitle=dict(
                text="Dose " +str(dose) +"mg"
            )
        )
    )
fig.update_layout(
    template='plotly_white',
    width=600,
    height=400,
)
# fig.update_yaxes(type="log")
fig.write_image(image_file+"/Log_Growth_Data.svg")
fig.show()

### Maximum likelihood

Mixed effects modelling has a hierarchiacal likelihood and posterior which is defined slightly differently. 
The mixed effects modelling defines the posterior of the model as,
\begin{equation*}
P\left(\theta_1, \ldots, \theta_n, \theta_\textrm{typ}, \Omega | X^\textrm{obs}_\textrm{1}, \ldots, X^\textrm{obs}_\textrm{n}\right) \propto P\left(\theta_\textrm{typ}, \Omega\right) \cdot P\left(\theta_1, \ldots, \theta_n,| \theta_\textrm{typ}, \Omega\right) \cdot \prod_{\textrm{i} = 1}^{n_{ind}} P\left(X^\textrm{obs}_\textrm{i}|\theta_\textrm{i}\right),
\end{equation*}
where $\prod_{\textrm{i} = 1}^{n_{ind}} P\left(X^\textrm{obs}_\textrm{i}|\theta_\textrm{i}\right)$ is defined as the individual likelihood, $P\left(\theta_1, \ldots, \theta_n| \theta_\textrm{typ}, \Omega\right)$ as the individual prior, and $P\left(\theta_\textrm{typ}, \Omega\right)$ as the hyperprior. <!-- \cite{Jakaite2022BayesianApplications}. --> In our scenario, the population likelihood of all the parametrs is 
$$
P\left( X^\textrm{obs}_\textrm{1}, \ldots, X^\textrm{obs}_\textrm{n}|\theta_1, \ldots, \theta_n, \theta_\textrm{typ}, \Omega \right) = P\left(\theta_1, \ldots, \theta_n,| \theta_\textrm{typ}, \Omega\right) \cdot \prod_{\textrm{i} = 1}^{n_{ind}} P\left(X^\textrm{obs}_\textrm{i}|\theta_\textrm{i}\right)
$$
and this (or equivalently the log of this) is what we wish to maximise. 

As a Gaussian multiplicative noise is assumed in the PK case, the individual Log-Likelihood is
\begin{align*}
    LL_i\left(\theta_\textrm{i}\right) = LL_i\left(V_{c, i}, \theta_{-V_c}\right)
        = \sum_j{PLL_{i, j}\left(V_{c, i}, \theta_{-V_c}\right)}
\end{align*}
with Point-wise log-likelihoods
\begin{align*}
    PLL_{i, j}\left(V_{c, i}, \theta_{-V_c}\right) &= \log\left(P\left(x^\textrm{obs}_{i,j}|V_{c, i}, \theta_{-V_c}\right)\right) \\
    &= -\frac{1}{2}\log\left(2\pi\right) -\log\left(\sigma_{PK} x^{\theta_i}\left(t_j\right)\right) -\frac{1}{2}\left(\frac{x^\textrm{obs}_{i, j} - x^{\theta_i}\left(t_j\right)}{\sigma_{PK} x^{\theta_i}\left(t_j\right)}\right)^2
\end{align*}
As the individual distribution of $V_c$ is Log-normal, the Log Prior for the individual parameters becomes
\begin{align*}
    \log{P\left(\theta_i,| \theta_\textrm{typ}, \Omega\right)} &= \log{P\left(V_{c, i},| V_\textrm{c, typ}, \omega^2\right)} \\
    &= \log{\left(\frac{1}{V_{c, i}} \frac{1}{\sqrt{2\pi}\omega}\exp{\left(-\frac{\left(\log{V_{c,i}}-\log{V_{c, typ}}\right)^2}{2\omega^2}\right)}\right)} \\
    &= -\log{V_{c,i}} - \log{\left(\sqrt{2\pi}\omega\right)} -\frac{\left(\log{V_{c,i}}-\log{V_{c, typ}}\right)^2}{2\omega^2}
\end{align*}

This is already implemented in the CHI package.

In [20]:
# Generate a log hyper-prior. Not used to calculate log-likelihood,
# but required for the CHI problem class and to sample initial points.
log_priors = [
    pints.GaussianLogPrior(np.log(5), 0.3),     # V_{c, typ}
    pints.LogNormalLogPrior(np.log(0.1), 0.2),  # omega_{V_c}
    pints.LogNormalLogPrior(np.log(3), 0.2),    # K_cl
    pints.LogNormalLogPrior(np.log(0.01), 0.4)  # sigma
]
log_prior = pints.ComposedLogPrior(*log_priors)
problem.set_log_prior(log_prior)

# Aquire the log-likelihood from the CHI problem class
log_posterior = problem.get_log_posterior()
log_likelihood = log_posterior.get_log_likelihood()

We can use the log hyper-prior and individual log-prior to sample starting points for our MLE optimisation.

In [21]:
n_runs = 2
ini_params = log_posterior.sample_initial_parameters(n_samples=n_runs, seed=seed)

Now to optimise. We will use the global optimiser CMAES provided by PINTS.

In [22]:
lower_bounds = [0]*(n_ids_data+len(param_names))
lower_bounds[n_ids_data] = -np.inf  # log(V_c) is not bounded by zero

PLL_controller = ProfileLogLikelihood(log_likelihood, ini_params[0])
MLEs, LL_scores = PLL_controller.max_likelihood_estimate(
    n_runs=n_runs, param_start = ini_params,
    opts={
        'optimiser': 'CMAES', 'opt_package': 'pints',
        'random_seed': seed, 'lower_bounds': lower_bounds, 'maxiter': 1e5
    }
)
table_df = pd.DataFrame(
    MLEs[:, -len(param_names):],
    columns=param_names
)
table_df["Log-likelihood"] = LL_scores
table_df = table_df.reindex(list(range(0, n_runs))+["True"])
table_df.loc["True"] = true_pop_params + [
    log_likelihood(np.concatenate((individual_parameters[:n_ids_data, 0], true_pop_params)))
]
table_df = table_df.round(4)
fig = ff.create_table(table_df, index=True)
fig.update_layout(
    width=600,
    height=300,
)
fig.show()

Finding the MLE in this case takes much longer than for the Growth models as $N_{ind}$ more parameters are required. And thus it is even more important to find an efficient Profile Likelihood method that requires as few optimisations as possible.

## Profile likelihood

We will again use the two different methods to determine identifiability on the parameters and compare results and efficiency. In this case the identifiability of individual parameters is not important as long as we can identify the population mechanics and so we will not aquire the profile likelihoods for the individual parameters. In other cases it may be more important such as when developing individually adapted therapies.

In [23]:
opts = {
    'method': 'sequential calc.',
    'max_N_opts': 50,
    'interp':0,
    'lower_bounds': lower_bounds,
    'random_seed': seed
}
timer = pints.Timer()
slow_times = {}
fig1 = make_subplots(rows=4, cols=1, row_titles=param_names)
PLL_controller.set_options(opts, reset=True)
# bounds =  [[0, 0.01], [60, 100]]
# PLL_controller.set_param_range(range(0, len(data['bounds'])), bounds=data['bounds'], adapt=False)
row = 0
for i_pop_param, param in enumerate(param_names):
    i_param = i_pop_param + n_ids_data
    print("\t", param)
    row += 1
    timer.reset()
    result = PLL_controller.run(i_param, n_points = 200)
    slow_times[param] = timer.time()
    fig1.add_trace(
        go.Scatter(
            name= "Profile Likelihood",
            x=result[0],
            y=result[1],
            mode='lines',
            line=dict(color=disc_col_scale[i_m], dash='dash'),
            showlegend=i_pop_param==0,
            legendgroup="Slow",
        ), row=row, col=1
    )

fig1.update_layout(
    template='plotly_white',
    width=900,
    height=800,
)
fig1.show()

	 Log(V_c)



invalid value encountered in scalar multiply


invalid value encountered in log



Used global optimisation on shape points [1.6816505951579446, 1.6740067288163176, 1.6663628624746907, 1.6587189961330635, 1.6510751297914366, 1.6434312634498096, 1.6357873971081824, 1.6281435307665555, 1.6204996644249285, 1.6128557980833014, 1.6052119317416744, 1.5975680654000475, 1.5899241990584203, 1.5822803327167934, 1.5746364663751664, 1.5669926000335392, 1.5593487336919123, 1.5517048673502851, 1.5440610010086582, 1.5364171346670312, 1.528773268325404, 1.521129401983777, 1.5134855356421502, 1.505841669300523, 1.498197802958896, 1.6892944614995717, 1.6969383278411987, 1.7045821941828256, 1.7122260605244528, 1.7198699268660798, 1.7275137932077067, 1.7351576595493339, 1.7428015258909608, 1.7504453922325878, 1.758089258574215, 1.765733124915842, 1.7733769912574688, 1.781020857599096, 1.788664723940723, 1.79630859028235, 1.803952456623977, 1.811596322965604, 1.819240189307231, 1.8268840556488581, 1.834527921990485, 1.8421717883321123, 1.8498156546737392, 1.8574595210153662, 1.8651033873

ValueError: `x` must contain at least 2 elements.

In [None]:
opts = {
    'method': 'quadratic approx.',
    'max_N_opts': 40,
    'approx shape N': 10,
    'interp': 0,
    'lower_bounds': 0,
    'seed': seed
}
first_L = True
first_MLE = True
first_U = True
fig2 = make_subplots(rows=4, cols=1)
df_ident = pd.DataFrame(columns=["Parameter", "Lower", "Upper", "Time Change"])
PLL_controller.result = {}
PLL_controller.set_options(opts, reset=True)
# data["PLL"].set_param_range(range(0, len(data['bounds'])), bounds=data['bounds'], adapt=False)
row = 0
for i_pop_param, param in enumerate(param_names):
    i_param = i_pop_param + n_ids_data
    print("\t", i_pop_param, param)
    row += 1
    timer.reset()
    result = PLL_controller.run(i_param, n_points = 200)
    quick_time = timer.time()
    time_change = (quick_time - slow_times[param])/slow_times[param]
    time_change = str(round(time_change*100, 2)) + "%"
    identifiability = PLL_controller.result[i_param]['identifiabilty']
    df_ident.loc[len(df_ident.index)] = [
        param, identifiability[0], identifiability[1], time_change
    ]
    fig2.add_trace(
        go.Scatter(
            name= "Quick Profile Likelihood",
            x=result[0],
            y=result[1],
            mode='lines',
            line=dict(color=base_colour),
            showlegend=i_param==0,
            legendgroup="Quick",
        ), row=row, col=1
    )
    x_CI, LL_CI, _ = PLL_controller.result[i_param]['CI']
    if identifiability[0]=='Ident':
        fig2.add_trace(
            go.Scatter(
                name= "Lower confidence interval",
                x=[x_CI[0], x_CI[0]],
                y=[LL_CI[0], -5],  # min(result[1, np.isfinite(result[1])])],
                mode='lines',
                line=dict(color='darkgrey', dash='dash'),
                showlegend=first_L,
                legendgroup="CI",
                legendgrouptitle=dict(
                    text="Confidence Interval"
                )
            ), row=row, col=1
        )
        first_L = False
    fig2.add_trace(
        go.Scatter(
            name= "MLE",
            x=[x_CI[1], x_CI[1]],
            y=[LL_CI[1], -5],   # min(result[1, np.isfinite(result[1])])],
            mode='lines',
            line=dict(color='darkgrey'),
            showlegend=first_MLE,
            legendgroup="CI",
            legendgrouptitle=dict(
                text="Confidence Interval"
            )
        ), row=row, col=1
    )
    first_MLE = False
    if identifiability[1]=='Ident':
        fig2.add_trace(
            go.Scatter(
                name= "Upper confidence interval",
                x=[x_CI[2], x_CI[2]],
                y=[LL_CI[2], -5],   # min(result[1, np.isfinite(result[1])])],
                mode='lines',
                line=dict(color='darkgrey', dash='dash'),
                showlegend=first_U,
                legendgroup="CI",
                legendgrouptitle=dict(
                    text="Confidence Interval"
                )
            ), row=row, col=1
        )
        first_U = False
        optimiser_points = PLL_controller.result[i_param]['opt points']
        fig2.add_trace(
            go.Scatter(
                name='Optimiser Evaluations',
                x=optimiser_points[0],
                y=optimiser_points[1],
                mode='markers',
                marker=dict(
                    color=base_colour,
                    symbol="star-diamond",
                    opacity=0.5
                ),
                showlegend=i_param==0,
                legendgroup="Quick",
            ), row=row, col=1
        )
fig = go.Figure(data = fig1.data + fig2.data).set_subplots(
    4, 1, row_titles=param_names
)
fig.update_yaxes(range=[-5, 2.1], title = "Log-likelihood - L^*")
fig.update_layout(
    template='plotly_white',
    width=900,
    height=800,
)
fig.write_image(image_file+"/PK_PLL_QPLL.svg")
fig.show()

df_ident.replace(['Ident', 'Unident'], ['Identifiable', 'Unidentible'])
fig3 = ff.create_table(df_ident)
fig3.update_layout(
    width=600,
    height=300,
)
fig3.write_image(image_file+"/PK_QPLL_Table.svg")
fig3.show()