# Section 2.5

Fit of a geometric-exponential model

In [None]:
%run -i ./preamble.py
%config InlineBackend.figure_format = 'retina'
%load_ext nb_black

In [None]:
import sys

print("Python version:", sys.version)
print("Numpy version:", np.__version__)
print("PyMC3 version:", pm.__version__)

tic()

In [None]:
FAST = False

if not FAST:
    numIters = 10
    numItersData = 15
    popSize = 1000
    timeout = 1000
else:
    numIters = 4
    numItersData = 8
    popSize = 100
    timeout = 30

numProcs = 40
smcArgs = {"numProcs": numProcs, "timeout": timeout, "verbose": True}

In [None]:
# Create a pseudorandom number generator
rg = default_rng(1234)

# Parameters of the true model
freq = "geometric"
sev = "exponential"
thetaTrue = (0.8, 5)

# Setting the time horizon
T = 100

# Simulating the claim data
freqs, sevs = abcre.simulate_claim_data(rg, T, freq, sev, thetaTrue)

# Simulating the observed data
psi = abcre.Psi("sum")
xData = abcre.compute_psi(freqs, sevs, psi)

In [None]:
print("Number of zeros:", np.sum(xData == 0))
print("Mean summands:", np.mean(sevs))

# ABC with Wasserstein posterior samples

In [None]:
params = ("p", "δ")
prior = abcre.IndependentUniformPrior([(0, 1), (0, 100)], params)
model = abcre.Model(freq, sev, psi, prior)

In [None]:
%time fit = abcre.smc(numIters, popSize, xData, model, **smcArgs)

# ABC with summary statistics

In [None]:
def sumstats_lp_adaptive(x):
    return (np.sum(x == 0), np.sum(x))


def lp_dist_adaptive(ssData, ssFake, t=0, p=1.0):
    numZerosData, sumData = ssData
    numZerosFake, sumFake = ssFake
    zerosDiff = np.abs(numZerosData - numZerosFake)

    # Enforce the zero-matching after a few iterations of SMC
    if t >= 5 and zerosDiff != 0:
        return np.inf

    # For early iterations, just put a penalty on zeros not matching
    zerosDiffPenalty = np.exp(t * zerosDiff)
    lpDist = np.abs(sumData - sumFake) if p == 1 else (sumData - sumFake) ** p

    return zerosDiffPenalty * lpDist

In [None]:
%%time

fitSS = abcre.smc(
    numIters,
    popSize,
    xData,
    model,
    sumstats=sumstats_lp_adaptive,
    distance=lp_dist_adaptive,
    **smcArgs
)

# MCMC posterior

In [None]:
basic_model = pm.Model()

with basic_model:
    # Priors for unknown model parameters
    p = pm.Uniform("p", 0, 1)
    δ = pm.Uniform("δ", 0, 100)

    # Log probability of the compound-sum variable
    def logp(t0, sumData):
        return T * np.log(1 - p) + (T - t0) * np.log(p / δ) - (1 - p) / δ * sumData

    exp_surv = pm.DensityDist(
        "X", logp, observed={"t0": np.sum(xData == 0), "sumData": np.sum(xData)}
    )

In [None]:
with basic_model:
    %time trace = pm.sample(1000, tune=500, chains = 1, random_seed = 1)

In [None]:
fig, axs = plt.subplots(1, len(params), tight_layout=True)
nMCMC = len(trace["p"])

for l, param in enumerate(params):
    pLims = [prior.marginals[l].isf(1), prior.marginals[l].isf(0)]

    abcre.weighted_distplot(fit.samples[:, l], fit.weights, ax=axs[l], hist=False)
    abcre.weighted_distplot(fitSS.samples[:, l], fitSS.weights, ax=axs[l], hist=False)
    abcre.weighted_distplot(trace[param], np.ones(nMCMC) / nMCMC, ax=axs[l], hist=False)
    axs[l].legend([], frameon=False)
    axs[l].set_title("$" + param + "$")
    axs[l].set_yticks([])
    axs[l].axvline(thetaTrue[l], color="k", linestyle="dashed", alpha=0.8)

sns.despine(left=True)
# save_cropped("../Figures/geometric-exponential-posterior.pdf")