# Section 3

Model selection from lognormal, gamma and weibull when the data is lognormal

In [None]:
%run -i ./preamble.py
%config InlineBackend.figure_format = 'retina'
%load_ext nb_black

In [None]:
import sys

print("Python version:", sys.version)
print("Numpy version:", np.__version__)
print("PyMC3 version:", pm.__version__)

tic()

In [None]:
FAST = False

# Processor information and SMC calibration parameters
if not FAST:
    numItersData = 25
    popSize = 1000
    popSizeModels = 1000
    epsMin = 0
    timeout = 1000
else:
    numItersData = 3
    popSize = 500
    popSizeModels = 1000
    epsMin = 1
    timeout = 30

numProcs = 4
smcArgs = {"numProcs": numProcs, "timeout": timeout, "epsMin": epsMin, "verbose": True}

Generation of claim data for all three models, though we only consider the lognormal data.

In [None]:
from math import gamma

rg = Generator(PCG64(123))

T = 200
sample_sizes = [25, 50, 75, 100, 150, 200]

claim_data = pd.DataFrame(
    {
        "lognormal": abcre.simulate_claim_sizes(rg, T, "lognormal", (0, 1)),
        "gamma": abcre.simulate_claim_sizes(rg, T, "gamma", (np.exp(1 / 2), 1)),
        "weibull": abcre.simulate_claim_sizes(
            rg, T, "weibull", (1 / 2, np.exp(1 / 2) / gamma(3 / 2))
        ),
    }
)

## ABC model probabilities

In [None]:
models_data = ["lognormal"]
models_fitted = ["gamma", "lognormal", "weibull"]

priorG = abcre.IndependentUniformPrior([(0, 5), (0, 100)], ("r", "m"))
modelG = abcre.Model(sev="gamma", prior=priorG)

priorL = abcre.IndependentUniformPrior([(-20, 20), (0, 5)], ("μ", "σ"))
modelL = abcre.Model(sev="lognormal", prior=priorL)

priorW = abcre.IndependentUniformPrior([(1e-1, 5), (0, 100)], ("k", "δ"))
modelW = abcre.Model(sev="weibull", prior=priorW)

models = [modelG, modelL, modelW]

In [None]:
model_proba_abc = pd.DataFrame(
    {"model_data": [], "model_fit": [], "ss": [], "model_probability_ABC": []}
)

# model_data = "lognormal"
for model_data in models_data:
    sevs = claim_data[model_data]
    for ss in sample_sizes:
        uData = sevs[:ss]
        %time fit = abcre.smc(numItersData, popSizeModels, uData, models, **smcArgs)
        for k in range(len(models)):
            weights = fit.weights[fit.models == k]
            res_mp = pd.DataFrame(
                {
                    "model_data": pd.Series(model_data),
                    "model_fit": pd.Series([models_fitted[k]]),
                    "ss": np.array([ss]),
                    "model_probability_ABC": pd.Series(
                        np.sum(fit.weights[fit.models == k])
                    ),
                }
            )
            model_proba_abc = pd.concat([model_proba_abc, res_mp])
            model_proba_abc


## SMC model probabilities

In [None]:
Bayesian_Summary = pd.DataFrame(
    {
        "model_data": [],
        "model_fit": [],
        "ss": [],
        "param_1": [],
        "param_2": [],
        "marginal_log_likelihood": [],
    }
)

for model_data in models_data:
    sevs = claim_data[model_data]
    for model_fitted in models_fitted:

        for ss in sample_sizes:
            uData = sevs[:ss]
            print(
                f"Fitting a {model_fitted} model to {len(uData)} data points generated from a {model_data} model"
            )

            if model_fitted == "gamma":
                with pm.Model() as model_sev:
                    r = pm.Uniform("param_1", lower=0, upper=5)
                    m = pm.Uniform("param_2", lower=0, upper=100)
                    U = pm.Gamma("U", alpha=r, beta=1 / m, observed=uData)
                    %time trace = pm.sample_smc(popSize, random_seed=1)

            elif model_fitted == "lognormal":
                with pm.Model() as model_sev:
                    μ = pm.Uniform("param_1", lower=-20, upper=20)
                    σ = pm.Uniform("param_2", lower=0, upper=5)
                    U = pm.Lognormal("U", mu=μ, sigma=σ, observed=uData)
                    %time trace = pm.sample_smc(popSize, random_seed=1)

            elif model_fitted == "weibull":
                with pm.Model() as model_sev:
                    k = pm.Uniform("param_1", lower=1e-1, upper=5)
                    δ = pm.Uniform("param_2", lower=0, upper=100)
                    U = pm.Weibull("U", alpha=k, beta=δ, observed=uData)
                    %time trace = pm.sample_smc(popSize, random_seed=1)

            # pm.plot_posterior(trace)

            ll = model_sev.marginal_log_likelihood

            res = pd.DataFrame(
                {
                    "model_data": [model_data],
                    "model_fit": [model_fitted],
                    "ss": [ss],
                    "param_1": [trace["param_1"].mean()],
                    "param_2": [trace["param_2"].mean()],
                    "marginal_log_likelihood": [ll],
                }
            )
            Bayesian_Summary = pd.concat([Bayesian_Summary, res])

In [None]:
max_marginal_log_likelihood = (
    Bayesian_Summary[["model_data", "ss", "marginal_log_likelihood"]]
    .groupby(["model_data", "ss"])
    .max()
)
max_marginal_log_likelihood.reset_index(level=["model_data", "ss"], inplace=True)
max_marginal_log_likelihood.rename(
    columns={"marginal_log_likelihood": "max_marginal_log_likelihood"}
)

Bayesian_Summary_1 = pd.merge(
    Bayesian_Summary, max_marginal_log_likelihood, how="left", on=["model_data", "ss"]
)
Bayesian_Summary_1

Bayesian_Summary_1["BF"] = np.exp(
    Bayesian_Summary_1.marginal_log_likelihood_x
    - Bayesian_Summary_1.marginal_log_likelihood_y
)

Bayesian_Summary_1
sum_BF = (
    Bayesian_Summary_1[["ss", "model_data", "BF"]].groupby(["ss", "model_data"]).sum()
)
sum_BF.reset_index(level=["model_data", "ss"], inplace=True)

Bayesian_Summary_2 = pd.merge(
    Bayesian_Summary_1, sum_BF, how="left", on=["model_data", "ss"]
)
Bayesian_Summary_2
Bayesian_Summary_2["model_probability"] = (
    Bayesian_Summary_2.BF_x / Bayesian_Summary_2.BF_y
)
Bayesian_Summary_2

In [None]:
model_proba = pd.merge(
    Bayesian_Summary_2[["model_data", "model_fit", "ss", "model_probability"]],
    model_proba_abc,
    how="left",
    on=["model_data", "model_fit", "ss"],
).round(2)
model_proba

In [None]:
print(
    pd.pivot_table(
        model_proba,
        values=["model_probability", "model_probability_ABC"],
        index=["ss"],
        columns=["model_fit"],
        aggfunc={"model_probability": np.mean, "model_probability_ABC": np.mean},
    ).to_latex()
)