### Likelihood-Free Parameter Inference on the Vilar Model

This notebook illustrates neural network-based (henceforth referred to as ANN) inference and approximate Bayesian computation.
The ANN models learn the relationship ${\bf y} \rightarrow {\bf \theta}$, where ${\bf y}$ is a time series response and ${\bf \theta}$ are the parameters of the descriptive model.
Therefore, we require a training set of the form $({\bf y, \theta})$ to train the ANN models. We will first generate such a training set.

In [1]:
# import 
import dask
import numpy as np
import gillespy2
from gillespy2.solvers.cpp import SSACSolver
from sciope.utilities.priors import uniform_prior

In [2]:
# The following function sets specified values of model parameters
def set_model_parameters(model, params):
    """ params - array, needs to have the same order as
        model.listOfParameters """
    model.tspan = np.linspace(1, 100, 100)
    for e, (pname, p) in enumerate(model.listOfParameters.items()):
        model.get_parameter(pname).set_expression(params[e])
    return model

In [3]:
def set_model_parameters(model, params):
    params = params.ravel()
    model.tspan = np.linspace(1, 100, 100)
    
    temp_param = model.get_parameter('alpha_A')
    temp_param.set_expression(params[0])
    
    temp_param = model.get_parameter('alpha_a_prime')
    temp_param.set_expression(params[1])
    
    temp_param = model.get_parameter('alpha_r')
    temp_param.set_expression(params[2])
    
    temp_param = model.get_parameter('alpha_r_prime')
    temp_param.set_expression(params[3])
    
    temp_param = model.get_parameter('beta_a')
    temp_param.set_expression(params[4])
    
    temp_param = model.get_parameter('beta_r')
    temp_param.set_expression(params[5])
    
    temp_param = model.get_parameter('delta_ma')
    temp_param.set_expression(params[6])
    
    temp_param = model.get_parameter('delta_mr')
    temp_param.set_expression(params[7])
    
    temp_param = model.get_parameter('delta_a')
    temp_param.set_expression(params[8])
    
    temp_param = model.get_parameter('delta_r')
    temp_param.set_expression(params[9])
    
    temp_param = model.get_parameter('gamma_a')
    temp_param.set_expression(params[10])
    
    temp_param = model.get_parameter('gamma_r')
    temp_param.set_expression(params[11])
    
    temp_param = model.get_parameter('gamma_c')
    temp_param.set_expression(params[12])
    
    temp_param = model.get_parameter('Theta_a')
    temp_param.set_expression(params[13])
    
    temp_param = model.get_parameter('Theta_r')
    temp_param.set_expression(params[14])

We read the model definition from a file and instantiate the model.

In [4]:
model_doc = gillespy2.StochMLDocument.from_file("/home/ubuntu/code/sciope/examples/inference/vilar/StochSS_model/vilar_oscillator_AIYDNg/models/data/vilar_oscillator.xml")
model = model_doc.to_model("Vilar")

Next, we define a search prior for the parameter inference problem, and our 'true' parameter point that corresponds to observed data.

In [5]:
dmin = [0,    100,    0,   20,   10,   1,    1,   0,   0,   0, 0.5,    0,   0,    0,   0]
dmax = [80,   600,    4,   60,   60,   7,   12,   2,   3, 0.7, 2.5,   4,   3,   70,   300]
v_prior = uniform_prior.UniformPrior(np.asarray(dmin), np.asarray(dmax))
fixed_point = np.asarray([50.0, 500.0, 0.01, 50.0, 50.0, 5.0, 10.0, 0.5, 1.0, 0.2, 1.0, 1.0, 2.0, 50.0, 100.0])

Now we are ready to generate some samples based on the prior above.

In [6]:
N = 10000
samples_delayed = v_prior.draw(N)
samples ,= dask.compute(samples_delayed)
samples = np.asarray(samples)
samples = samples.reshape(N, len(dmin))

Define the simulator as below. We will concentrate on the species C,A,R.

In [7]:
num_trajs = 50
num_timestamps = 101
num_species = 3

def sim_vilar(param, trajs=1):
    num_trajs = trajs
    model = model_doc.to_model("Vilar")
    set_model_parameters(model, param)
    sim_results = model.run(solver=SSACSolver, show_labels=False, number_of_trajectories=num_trajs)
    tot_res = np.asarray([x.T for x in sim_results]) # reshape to (N, S, T)  
    tot_res = tot_res[:,1:, :] # should not contain timepoints
    tot_res = tot_res[:,6:9,:].reshape((num_trajs,num_species,num_timestamps))
    return tot_res

Simulate the samples from the prior.

In [8]:
responses = []

for i in range(N):
    current_sample = samples[i,:].reshape(1, len(samples[i,:]))
    
    lazy_response = dask.delayed(sim_vilar)(current_sample)
    responses.append(lazy_response)

# dask compute
computed_responses ,= dask.compute(responses)

# get it in the right shape sciope shape - N x S x T
ts = np.asarray(computed_responses)
ts = ts.reshape(N, num_species, num_timestamps)

Now that we have data, we can train the ANN models to learn the mapping from ${\bf y}$ to ${\theta}$.

In [9]:
# Set input and output shape for the CNN
input_shape = (101,3)
output_shape = 15

In [10]:
# Routines to normalize and denormalize data
# Makes training easier
def normalize_data(data, dmin, dmax):
    dmin = np.array(dmin)
    dmax = np.array(dmax)
    return (data - dmin)/(dmax-dmin)

def denormalize_data(data, dmin, dmax):
    dmin = np.array(dmin)
    dmax = np.array(dmax)
    denorm = data * (dmax-dmin) + dmin
    return denorm

In [11]:
ts = np.asarray(computed_responses)
ts = ts.reshape(N, num_species, num_timestamps)
ts.shape

(10000, 3, 101)

In [12]:
normed_thetas = normalize_data(samples, dmin, dmax)

In [13]:
# Import the CNN model
from sciope.models.cnn_regressor import CNNModel

In [14]:
# Instantiate the model
model_cnn = CNNModel(input_shape, output_shape)

In [15]:
history_cnn = model_cnn.train(ts, normed_thetas, batch_size=256, 
                      epochs=500, verbose=0, learning_rate=0.001, 
                      early_stopping_patience=5, validation_split=0.2)

We can validate the models to see how accurate they are.

In [16]:
N_test = 10000
samples_delayed = v_prior.draw(N_test)
samples_test ,= dask.compute(samples_delayed)
samples_test = np.asarray(samples_test)
samples_test = samples_test.reshape(N_test, len(dmin))

responses_test = []

for i in range(N_test):
    current_sample = samples_test[i,:].reshape(1, len(samples_test[i,:]))
    
    lazy_response = dask.delayed(sim_vilar)(current_sample)
    responses_test.append(lazy_response)

# dask compute
computed_responses_test ,= dask.compute(responses_test)

# get it in the right shape sciope shape - N x S x T
ts_test = np.asarray(computed_responses_test)
ts_test = ts_test.reshape(N_test, num_species, num_timestamps)

In [17]:
# Generate 'observed' data
response_obs = sim_vilar(fixed_point, trajs=1)
ts_obs = response_obs.reshape(1, num_species, num_timestamps)

In [18]:
# Routine to test each ANN architecture on MAE
from sklearn.metrics import mean_absolute_error
def test_model(model):
    pred_test = model.predict(ts_test)
    pred_test = denormalize_data(pred_test, dmin, dmax)
    #mae_test = np.mean(abs(pred_test - samples_test), axis=0)
    mae_test = mean_absolute_error(samples_test, pred_test)
    
    theta_pred = model.predict(ts_obs)
    samples_true = np.asarray(fixed_point)
    theta_pred = denormalize_data(theta_pred, dmin, dmax)
    mae_true = np.mean(np.abs(theta_pred.ravel() - samples_true), axis=0)
    #mae_true = mean_absolute_error(np.asarray(samples_true).reshape(1,2), np.asarray(theta_pred).reshape(1,2))
    return mae_test, mae_true

In [19]:
# Calculate test metrics
mae_test_cnn, mae_true_cnn = test_model(model_cnn)

In [20]:
print("CNN test MAE = {}, MAE at true point = {}".format(mae_test_cnn, mae_true_cnn))

CNN test MAE = 16.271204419741156, MAE at true point = 12.902529955863953


In [21]:
from sciope.utilities.summarystats.summary_base import SummaryBase
from sciope.utilities.housekeeping import sciope_logger as ml

class ANN_Statistics(SummaryBase):
    """
    The thetas predicted by ANN models act as summary statistics
    """

    def __init__(self, mean_trajectories=False, use_logger=False):
        self.name = 'ANN_Statistics'
        super(ANN_Statistics, self).__init__(self.name, mean_trajectories, use_logger)
        if self.use_logger:
            self.logger = ml.SciopeLogger().get_logger()
            self.logger.info("ANN_Statistics summary statistic initialized")

    def compute(self, data):
        """
        Calculate the value(s) of the summary statistic(s)
        
        Parameters
        ----------
        data : [type]
            simulated or data set in the form N x S X T - num data points x num species x num time steps
        
        Returns
        -------
        [type]
            computed statistic value
        
        """
        data_arr = np.array(data)
        assert len(data_arr.shape) == 3, "required input shape is (n_points, n_species, n_timepoints)"

        res = model_cnn.predict(data_arr)
        res = denormalize_data(res, dmin, dmax)

        if self.mean_trajectories:
            res = np.asarray(np.mean(res, axis=0))  # returns a scalar, so we cast it as an array

        if self.use_logger:
            self.logger.info("ANN_Statistics summary statistic: processed data matrix of shape {0} and generated summaries"
                             " of shape {1}".format(data.shape, res.shape))
        return res

In [22]:
# Test our new summary statistic
cnn_stat = ANN_Statistics()
predicted_stat = cnn_stat.compute(ts_obs)

In [23]:
print(predicted_stat)
stat_mae = np.mean(np.abs(predicted_stat.ravel() - np.asarray(fixed_point)), axis=0)
print("MAE upon comparison as a statistic = {}".format(stat_mae))

[[3.46384788e+01 3.83292413e+02 1.97799933e+00 4.12151909e+01
  3.92695999e+01 4.63687801e+00 5.90746319e+00 6.15483046e-01
  1.13809007e+00 2.92096889e-01 1.37776500e+00 2.12254214e+00
  1.85050839e+00 4.89150506e+01 1.32449555e+02]]
MAE upon comparison as a statistic = 12.902529955863953
