In [1]:
import pints
import pints.toy
import numpy as np
import matplotlib.pyplot as plt

from tensorflow import keras

import emupints
import emupints.plot as emuplt
import emupints.utils as emutils
import emupints.metrics as emumet


from sklearn.preprocessing import StandardScaler, MinMaxScaler

## Prediction time and mae depending on Kernel and problem

In [2]:
def create_NN_emulator(log_likelihood, X, y, model_size):
    # save best val model
    cp_callback = keras.callbacks.ModelCheckpoint("models/nn_performance.hdf5", 
                                                  monitor='val_mean_absolute_error', 
                                                  save_best_only=True, 
                                                  verbose=0,
                                                  save_weights_only=True)
    
    emu = emupints.NNEmulator(log_likelihood, X, y, 
                              model_size=model_size,
                              input_scaler=MinMaxScaler(feature_range = (-1, 1)),
                              output_scaler=StandardScaler(),
                             )
    emu.set_parameters()
    emu.fit(verbose=0, callbacks=[cp_callback])
    # restore best
    emu._model.load_weights("models/nn_performance.hdf5")
    
    return emu

In [3]:
nn_models = ['small', 'average', 'large']

train_size = 2000
test_size = 400

problems = [
    (emupints.Problems.LogisticModel, 'Logistic'),
    (emupints.Problems.SIRModel, 'SIR'),
    (emupints.Problems.FitzhughNagumoModel, 'FitzhughNagumo'),
    (emupints.Problems.FitzhughNagumoModelDiscontinious, 'FitzhughNagumoDiscontinious'),
    (emupints.Problems.LotkaVolterraModel, 'LotkaVolterra'),
    (emupints.Problems.LotkaVolterraModelDiscontinious, 'LotkaVolterraDiscontinious'),
    (emupints.Problems.HodgkinHuxleyIKModel, 'HodgkinHuxleyIK'),
    (emupints.Problems.GoodwinOscillatorModel, 'GoodwinOscillator'),
]

results = {}

for problem, problem_name in problems:
    print(problem_name)
    # load problem
    problem = emupints.Problems.load_problem(problem)
    n_parameters = problem['n_parameters']
    log_likelihood = problem['log_likelihood']
    log_posterior = problem['log_posterior']
    bounds = problem['bounds']
    parameters = problem['parameters']

    # create data for training and testing
    # genera example using normal distribution around parametrs
    train_X = parameters + np.random.randn(train_size, n_parameters) * (0.05 * parameters)
    train_y = np.apply_along_axis(log_likelihood, 1, train_X)

    test_X = parameters + np.random.randn(test_size, n_parameters) * (0.05 * parameters)
    test_y = np.apply_along_axis(log_likelihood, 1, test_X)

    # metrics to store
    model_chain_mae = []
    model_chain_mape = []
    model_time = []

    for model_size in nn_models:
        # train emulator
        emu = create_NN_emulator(log_likelihood, train_X, train_y, model_size)

        # Perform single MCMC run to calcluate chain mape
        mcmc = pints.MCMCSampling(
            log_posterior, 
            1, # one chain
            [parameters], 
        )
        mcmc.set_max_iterations(20000)
        mcmc.set_log_to_screen(False)
        mcmc_chain = mcmc.run()[0]
    
        # accuracy via mae
        chain_mae = emumet.chain_mae(mcmc_chain, emu, log_likelihood)
        model_chain_mae.append(chain_mae)
        
        # accuracy via absolute error along a chain 
        chain_mape = emumet.chain_mape(mcmc_chain, emu, log_likelihood)
        model_chain_mape.append(chain_mape)

        # speed
        pred_time = %timeit -r 50 -o -q emu(parameters)
        pred_time = pred_time.average
        model_time.append(pred_time)
        
 
        # delete used emulators
        del emu
        
        print("    {}: {:.5f}, {:.5f}, {:.10f}".format(model_size, chain_mae, chain_mape, pred_time))
    
    # store results
    results[problem_name] = [model_chain_mae, model_chain_mape, model_time]

Logistic
    small: 0.37846, 0.00082, 0.0004236168
    average: 0.32411, 0.00070, 0.0004682717
    large: 0.80347, 0.00174, 0.0004837585
SIR
    small: 2.65331, 0.03591, 0.0005174434
    average: 1.45928, 0.02014, 0.0005032776
    large: 1.39011, 0.01926, 0.0005459108
FitzhughNagumo
    small: 5.16363, 0.02459, 0.0004996991
    average: 4.81941, 0.02701, 0.0006129419
    large: 6.24960, 0.03084, 0.0006761247
FitzhughNagumoDiscontinious
    small: 2.61550, 0.01205, 0.0006767949
    average: 4.54474, 0.02117, 0.0005324787
    large: 2.53155, 0.01158, 0.0006848132
LotkaVolterra
    small: 62.84968, 0.08794, 0.0005953986
    average: 35.98043, 0.04901, 0.0006190122
    large: 42.99322, 0.05853, 0.0005695984
LotkaVolterraDiscontinious
    small: 56.33616, 0.07832, 0.0005970111
    average: 81.46864, 0.11674, 0.0006596515
    large: 42.41698, 0.05819, 0.0007689969
HodgkinHuxleyIK
    small: 2398.79861, 0.02456, 0.0006510672
    average: 1762.80199, 0.01803, 0.0007076633
    large: 1345.35075

In [4]:
# print results 
max_name_len = max([len(name) for _, name in problems])

columns = "".join([name.center(10) for name in nn_models])
columns = " " * (max_name_len + 8) + columns
print(columns)
print("-" * 70)

for _, problem_name in problems:
    print(problem_name.center(max_name_len), "|mae  |", end="")
    chain_maes = np.array(results[problem_name][0]) # don't convert
    for mae in chain_maes:
        print("{:.2f}".format(mae).center(10), end="")
    print()
    
    print("".rjust(max_name_len),"|cmape|", end="")
    chain_cmaes = np.array(results[problem_name][1]) * 100 # convert to percentages
    for cmae in chain_cmaes:
        print("{:.2f}".format(cmae).center(10), end="")
    print()
    
    print("".rjust(max_name_len), "|time |", end="")
    times = np.array(results[problem_name][2]) * 1000000 # convert to micro-seconds
    for time in times:
        print("{:.2f}".format(time).center(10), end="")
    print()
    
    print("-" * 70)

                                     small    average    large   
----------------------------------------------------------------------
          Logistic          |mae  |   0.38      0.32      0.80   
                            |cmape|   0.08      0.07      0.17   
                            |time |  423.62    468.27    483.76  
----------------------------------------------------------------------
            SIR             |mae  |   2.65      1.46      1.39   
                            |cmape|   3.59      2.01      1.93   
                            |time |  517.44    503.28    545.91  
----------------------------------------------------------------------
       FitzhughNagumo       |mae  |   5.16      4.82      6.25   
                            |cmape|   2.46      2.70      3.08   
                            |time |  499.70    612.94    676.12  
----------------------------------------------------------------------
FitzhughNagumoDiscontinious |mae  |   2.62      4.54    

In [18]:
problem_names = ['Logistic', 'SIR', 'FitzhughNagumo', 'LotkaVolterra', 'HodgkinHuxleyIK', 'GoodwinOscillator']
for problem_name in problem_names:
    for i, model_size in enumerate(nn_models):
        print(problem_name, model_size, end=": ")
        chain_mae = results[problem_name][0][i]
        chain_cmae = results[problem_name][1][i] * 100
        time = results[problem_name][2][i] * 1000000
        print("& {:.2f} & {:.2f} & {:.2f}".format(chain_mae, chain_cmae, time))

Logistic small: & 0.38 & 0.08 & 423.62
Logistic average: & 0.32 & 0.07 & 468.27
Logistic large: & 0.80 & 0.17 & 483.76
SIR small: & 2.65 & 3.59 & 517.44
SIR average: & 1.46 & 2.01 & 503.28
SIR large: & 1.39 & 1.93 & 545.91
FitzhughNagumo small: & 5.16 & 2.46 & 499.70
FitzhughNagumo average: & 4.82 & 2.70 & 612.94
FitzhughNagumo large: & 6.25 & 3.08 & 676.12
LotkaVolterra small: & 62.85 & 8.79 & 595.40
LotkaVolterra average: & 35.98 & 4.90 & 619.01
LotkaVolterra large: & 42.99 & 5.85 & 569.60
HodgkinHuxleyIK small: & 2398.80 & 2.46 & 651.07
HodgkinHuxleyIK average: & 1762.80 & 1.80 & 707.66
HodgkinHuxleyIK large: & 1345.35 & 1.33 & 692.13
GoodwinOscillator small: & 34.52 & 1.41 & 632.74
GoodwinOscillator average: & 51.20 & 2.11 & 684.57
GoodwinOscillator large: & 78.79 & 3.27 & 649.99


## MAE vs # training points

In [None]:
# Use LotkaVolterra model
model = emupints.Problems.HodgkinHuxleyIKModel
problem = emupints.Problems.load_problem(model)

# take required variables for visualisation
n_parameters = problem['n_parameters']
log_likelihood = problem['log_likelihood']
parameters = problem['parameters']
bounds = problem['bounds']


# Create data to test
train_sizes = [100 * i for i in range(1, 41, 1)]
test_size = 1000

# create data for training and testing
# genera example using normal distribution around parametrs
# train_X = parameters + np.random.randn(train_sizes[-1], n_parameters) * (0.05 * parameters)
train_X = bounds.sample(train_sizes[-1])
train_y = np.apply_along_axis(log_likelihood, 1, train_X)

# test_X = parameters + np.random.randn(test_size, n_parameters) * (0.05 * parameters)
test_X = bounds.sample(test_size)
test_y = np.apply_along_axis(log_likelihood, 1, test_X)

# Store results for each kernel in these dicts
model_test_mae = {}

for model_size in nn_models:
    print("Model size: " + model_size)

    # store result for each model
    emu_mae = []
    for size in train_sizes:
        X = train_X[:size]
        y = train_y[:size]
        
        emu = create_NN_emulator(log_likelihood, X, y, model_size)
        
        pred_y = np.apply_along_axis(emu, 1, test_X)
        mae = emumet.mae(pred_y, test_y)
        
        del emu

        print("    Size {}: {:.5f}".format(size, mae))
    
    model_test_mae[model_size] = emu_mae[::]

Model size: small
    Size 100: 14873.21742
    Size 200: 15869.14623
    Size 300: 16203.82843
    Size 400: 15903.91234
    Size 500: 16397.33722
    Size 600: 16020.26178
    Size 700: 16237.64469
    Size 800: 15586.95076
    Size 900: 16205.82755
    Size 1000: 15931.97519
    Size 1100: 15902.31511
    Size 1200: 16102.08523
    Size 1300: 16077.25310
    Size 1400: 15724.83087
    Size 1500: 15988.29022
    Size 1600: 16016.54083
    Size 1700: 15884.37477
    Size 1800: 16186.09995
    Size 1900: 15972.02634
    Size 2000: 15890.03251
    Size 2100: 15887.69518
    Size 2200: 15803.56122
    Size 2300: 16030.44963


In [10]:
train_X[:100].shape

(100, 3)