In [2]:
import ticktack
import pandas
from jax.numpy import arange, sin, pi, array, mean, greater, less, equal, where
from scipy.optimize import minimize
import matplotlib.pyplot as plt

The basic question is what is the probability of detecting consecutive events based on the distribution of the data. The first step then will be to determine the distribution of the data. This will be done be resampling the points after the event has been removed.

In [3]:
def sine(t, p):
    """
    Generates a simple sinusoidal production function. p is an array containing the amplitude of the production sinusoid and the phase of the sinusoid.
    """
    return 1.88 + p[0] * 1.88 * sin(2 * pi / 11 * t + p[1])

def rect(t, p):
    """
    Generates a rectangular miyake event. p is an array that contains the start and end times as well as the height of the production function 
    """
    condition = equal(greater(t, p[0]), less(t, p[1]))
    return where(condition, p[3], 0.0)

def prod(t, *args):
    amplitude, phase, start, end, height = args
    sine_params = array([amplitude, phase])
    rect_params = array([start, end, height])
    return sine(t, sine_params) + rect(t, rect_params)

In [15]:
annual_samples = 48                  # 4 samples per month
sample_times = arange(-360.0, 790.0)  # Times at which to collect samples
growth_bools = array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0])
params = (1.0, 1.25, 774, 775, 20.0)

In [5]:
cbm = ticktack.load_presaved_model("Guttler14", production_rate_units="atoms/cm^2/s")
cbm.compile()

In [6]:
def log_likelihood(params):
    # params = array(params) # For jax 
    miyake = pandas.read_csv("Miyake12.csv", sep=" ")   # Reading the data from the Miyake 2012
    DC14 = array([*miyake.d14c])                        # JAX array for manipulation
    SDC14 = array([*miyake.sig_d14c])                   # JAx array for manipulation 

    model, steady_state = cbm.run(sample_times, annual_samples,     # Running the carbon box model
        production=prod, steady_state_production=1.88, args=params)

    troposphere = cbm.bin_data( # Bins the data into annual values
        model[:, 2],    # Selecting only the years that I have data for 
        annual_samples, # Number of samples per year 
        sample_times,   # Times to return the binned values for 
        growth_bools    # Boolean mask of growth seasons
    )
    
    troposphere = troposphere[-29:-1]   # slicing only the period of interest 
    troposphere = 1000 * (troposphere - steady_state[2]) / steady_state[2]  # Normalising the data 
    troposphere = troposphere + mean(array([*miyake["d14c"][0:4]])) # Offseting the data to match 
    return - 0.5 * sum(((DC14 - troposphere) / SDC14) ** 2)   # Log likelhood as chi-squared

In [7]:
import emcee
from numpy import random

In [20]:
initial = params
ndim, nwalkers = len(initial), 2 * len(initial)
sampler = emcee.EnsembleSampler(nwalkers, ndim, log_likelihood)

print("Running burn-in...")
p0 = initial + 1e-5 * random.rand(nwalkers, ndim)
p0, lp, _ = sampler.run_mcmc(p0, 1000, progress=True)

print("Running production...")
sampler.reset()
sampler.run_mcmc(p0, 500, progress=True)
sampler.flatchain

Running burn-in...


100%|██████████| 100/100 [00:15<00:00,  6.40it/s]


So I guess that I need to move a whole lot of this into the loss function as I had done in the previous implementatioin. This is ok. I'm really just rehashing what I have already done for the profiling but this time I am relying on ticktack more. Fuck I hate ticktack.