In [1]:
import boxey as bx
import numpy as np
from numpy.typing import ArrayLike
from boxey import Process, Input, Model
import matplotlib.pyplot as plt
import yaml
import random
plt.rcParams.update({'font.size': 16})

In [2]:
from core import Problem, UniformBounded
from sampler import MCMCSampler

In [3]:
# List the names of the compartments
compartments = ['soil_pfsa', 'gw_pfsa']

def get_model(scenario, training_volume, fire_volume, R_soil_pfsa, R_gw_pfsa):
    
    with open(f'data_and_constraints/{scenario}.yaml', 'r') as stream:
        data = yaml.safe_load(stream)

    k_soil = data['k_soil']
    k_gw = data['k_gw']
    
    c_pfsa = data['c_pfsa']

    # List the processes to represent.
    # Process needs: (name, timescale, compartment of origin, destination compartment)
    processes = [
                Process('soil2gw_pfsa', R_soil_pfsa/k_soil, 'soil_pfsa', 'gw_pfsa'),
                Process('gw_pfsa', R_gw_pfsa/k_gw, 'gw_pfsa', None),
            ]

    # List the inputs to use. 
    # in the 0 inputs bookending these make it go from e.g. 0 at exactly 1970 to c_prec*V a tiny time later
    inputs = [
            Input('AFFF_training_pfsa', [0., c_pfsa * training_volume, c_pfsa * training_volume, 0.], [1970.0, 1970.001, 1985.999, 1986.0], 'soil_pfsa'),
            
            Input('AFFF_fire_pfsa', [0., c_pfsa * fire_volume, c_pfsa * fire_volume, 0.], [1997.0, 1997.001, 1997.999, 1998.0], 'soil_pfsa'),]

    model = bx.create_model(compartments, processes)
    model = bx.add_inputs(model, inputs)

    return model

In [4]:
def AFFF_use_history(scenario, yearly_training_volume, fire_volume, R_soil_pfsa, R_gw_pfsa,
                    time_points=None):
    """To make iterating easier for optimizing."""
    model = get_model(scenario, training_volume = yearly_training_volume, fire_volume = fire_volume,
                     
                      R_soil_pfsa = R_soil_pfsa, 
                     
                      R_gw_pfsa = R_gw_pfsa)
    if time_points is None:
        tstart, tend = 1970, 2100
        time_points = np.arange(tstart, tend, 1)
        time_points_in = sorted(time_points)
    else:
        time_points_in = sorted(np.unique([1970]+list(time_points)))
    reservoirs, times = model.run(time_points_in, initial_conditions=None)

    gw_pfsa_reservoir = np.array([reservoirs[times.index(t),1] for t in time_points[:-1]])
    

    soil_pfsa_reservoir = reservoirs[-1, 0]
    
    return (gw_pfsa_reservoir, soil_pfsa_reservoir)

def AFFF_use_history_log(scenario, yearly_training_volume, fire_volume,  R_soil_pfsa,  R_gw_pfsa,
                    time_points=None):
    """To make iterating easier for optimizing."""
    return AFFF_use_history(scenario, 10**yearly_training_volume, 10**fire_volume,  10**R_soil_pfsa,
                            10**R_gw_pfsa,
                            time_points)

In [5]:
# class MyPrior(UniformBounded):
#     pass # just a bounded prior for now. Change as you wish

class MyPrior():
    
    def __init__(self, prior_type, param_1, param_2):
        self.prior_type = prior_type
        self.param_1 = param_1
        self.param_2 = param_2
    
    def uniform(self, proposal, idx):
        if self.param_1[idx] <= proposal <= self.param_2[idx]:
            return(0.)
        else:
            return(-1E6)
    
    def bounded_normal(self, proposal, idx):
        if proposal >= 0.:
            return(-0.5 * ((10**proposal - 10**self.param_1[idx]) / 10**self.param_2[idx])**2)
        else:
            return(-1E6)
        
    def __call__(self, proposal : ArrayLike) -> float:
        
        prior = [self.uniform(val, idx) if self.prior_type[idx] == 'U' else self.bounded_normal(val, idx) for idx, val in enumerate(proposal)]
        return(np.sum(prior))

class MyLikelihood:
    def __init__(self, scenario):
        self.scenario = scenario
        with open(f'data_and_constraints/{scenario}.yaml', 'r') as stream:
            data = yaml.safe_load(stream)

        gw_year = np.array(data['gw_year'])
        self.year = np.append(gw_year, data['soil_year'])
        self.gw_pfsa_reservoir = data['gw_pfsa_reservoir']
        self.log_soil_pfsa_reservoir = np.log10(data['soil_pfsa_reservoir'])
        
    def __call__(self, params):
        modeled_gw_pfsa_reservoir, modeled_soil_pfsa_reservoir = AFFF_use_history_log(
            self.scenario,
            params[0], params[1], params[2], params[3], time_points=self.year)
        
        #minimize the sum of squared errors for groundwater pfsa concentrations        
        likelihood = -np.sum((np.log10(self.gw_pfsa_reservoir) - np.log10(modeled_gw_pfsa_reservoir))**2)
        
        #penalize likelihood if soil pfsa reservoir proposal is more or less than one order of magnitude
        #from measurement
        if self.log_soil_pfsa_reservoir - 0.1 < np.log10(modeled_soil_pfsa_reservoir) < self.log_soil_pfsa_reservoir + 1:
            likelihood += 0
        else:
            likelihood += -1E6
         
        return(likelihood)

class MyProblem(Problem):

    def __init__(self, n_dimensions, lower_bounds, upper_bounds, scenario):
        self.n_dimensions = n_dimensions
        self.lower_bounds = np.array(lower_bounds)
        self.upper_bounds = np.array(upper_bounds)
        self.prior = MyPrior(prior_type = ['U', 'U', 'U', 'U'],
            param_1=self.lower_bounds, param_2=self.upper_bounds)
        self.likelihood = MyLikelihood(scenario)

    def get_bounds(self):
        return self.lower_bounds, self.upper_bounds

In [6]:
def fta_pfc(scenario):
    with open(f'data_and_constraints/{scenario}.yaml', 'r') as stream:
        data = yaml.safe_load(stream)
        
    yearly_training_volume = np.log10(np.array(data['V_training']))
    fire_volume = np.log10(np.array(data['V_fire']))
    R_soil_pfsa = np.log10(np.array(data['R_soil_pfsa']))
    R_gw_pfsa = np.log10(np.array(data['R_gw_pfsa']))
    
    problem = MyProblem(4,
        [yearly_training_volume[0], fire_volume[0], R_soil_pfsa[0], R_gw_pfsa[0]],
        [yearly_training_volume[1], fire_volume[1], R_soil_pfsa[1], R_gw_pfsa[1]],
        scenario = scenario)

    return(problem)

In [7]:
sampler = MCMCSampler(max_steps=100000, Nwalkers=4, Nincrement=500, target_effective_steps=2500)

In [8]:
C7_pfca = fta_pfc('C7_pfca')
C7_pfca_posterior = sampler.sample(problem = C7_pfca, alpha = 0.95)
C7_pfca_posterior_samples = 10**C7_pfca_posterior.samples
np.savetxt('posterior_samples/C7_pfca_posterior.csv', C7_pfca_posterior_samples, delimiter = ',')

  if self.log_soil_pfsa_reservoir - 0.1 < np.log10(modeled_soil_pfsa_reservoir) < self.log_soil_pfsa_reservoir + 1:


acceptance rate is 0.63 when alpha is 0.95
Sampling posterior in 500-iteration increments.
After 500 iterations, autocorr time: unavailable
After 1000 iterations, autocorr time: unavailable
After 1500 iterations, autocorr time: unavailable
After 2000 iterations, autocorr time: unavailable
After 2500 iterations, autocorr time: unavailable
After 3000 iterations, autocorr time: unavailable
After 3500 iterations, autocorr time: unavailable
After 4000 iterations, autocorr time: 76.93601783322691
After 4500 iterations, effective number of samples:                    876
After 5000 iterations, effective number of samples:                    1050
After 5500 iterations, effective number of samples:                    1063
After 6000 iterations, effective number of samples:                    1048
After 6500 iterations, effective number of samples:                    1207
After 7000 iterations, effective number of samples:                    1306
After 7500 iterations, effective number of sample