# Section 3

Model selection from lognormal, gamma and weibull when the data is lognormal

In [1]:
%run -i ./preamble.py
%config InlineBackend.figure_format = 'retina'
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import sys

print("Python version:", sys.version)
print("Numpy version:", np.__version__)
print("PyMC3 version:", pm.__version__)

tic()

Python version: 3.7.7 (default, May  6 2020, 11:45:54) [MSC v.1916 64 bit (AMD64)]
Numpy version: 1.18.5
PyMC3 version: 3.9.1


<IPython.core.display.Javascript object>

In [3]:
FAST = False

# Processor information and SMC calibration parameters
if not FAST:
    numItersData = 25
    popSize = 1000
    popSizeModels = 1000
    epsMin = 0
    timeout = 1000
else:
    numItersData = 3
    popSize = 500
    popSizeModels = 1000
    epsMin = 1
    timeout = 30

numProcs = 4
smcArgs = {"numProcs": numProcs, "timeout": timeout, "epsMin": epsMin, "verbose": True}

<IPython.core.display.Javascript object>

Generation of claim data for all three models, though we only consider the lognormal data.

In [4]:
from math import gamma

rg = Generator(PCG64(123))

T = 200
sample_sizes = [25, 50, 75, 100, 150, 200]

claim_data = pd.DataFrame(
    {
        "lognormal": abcre.simulate_claim_sizes(rg, T, "lognormal", (0, 1)),
        "gamma": abcre.simulate_claim_sizes(rg, T, "gamma", (np.exp(1 / 2), 1)),
        "weibull": abcre.simulate_claim_sizes(
            rg, T, "weibull", (1 / 2, np.exp(1 / 2) / gamma(3 / 2))
        ),
    }
)

<IPython.core.display.Javascript object>

## ABC model probabilities

In [5]:
models_data = ["lognormal"]
models_fitted = ["gamma", "lognormal", "weibull"]

priorG = abcre.IndependentUniformPrior([(0, 5), (0, 100)], ("r", "m"))
modelG = abcre.Model(sev="gamma", prior=priorG)

priorL = abcre.IndependentUniformPrior([(-20, 20), (0, 5)], ("μ", "σ"))
modelL = abcre.Model(sev="lognormal", prior=priorL)

priorW = abcre.IndependentUniformPrior([(1e-1, 5), (0, 100)], ("k", "δ"))
modelW = abcre.Model(sev="weibull", prior=priorW)

models = [modelG, modelL, modelW]

<IPython.core.display.Javascript object>

In [6]:
model_proba_abc = pd.DataFrame(
    {"model_data": [], "model_fit": [], "ss": [], "model_probability_ABC": []}
)

# model_data = "lognormal"
for model_data in models_data:
    sevs = claim_data[model_data]
    for ss in sample_sizes:
        uData = sevs[:ss]
        %time fit = abcre.smc(numItersData, popSizeModels, uData, models, **smcArgs)
        for k in range(len(models)):
            weights = fit.weights[fit.models == k]
            res_mp = pd.DataFrame(
                {
                    "model_data": pd.Series(model_data),
                    "model_fit": pd.Series([models_fitted[k]]),
                    "ss": np.array([ss]),
                    "model_probability_ABC": pd.Series(
                        np.sum(fit.weights[fit.models == k])
                    ),
                }
            )
            model_proba_abc = pd.concat([model_proba_abc, res_mp])
            model_proba_abc


Final population dists <= 1.70, ESS = [ 42. 696. 203.]
	model populations = [69, 720, 211], model weights = [0.1  0.83 0.07]
Wall time: 28.3 s


Final population dists <= 1.81, ESS = [ 61. 690. 226.]
	model populations = [63, 707, 230], model weights = [0.08 0.83 0.09]
Wall time: 10.9 s


Final population dists <= 1.68, ESS = [ 53. 693. 220.]
	model populations = [63, 710, 227], model weights = [0.1  0.82 0.08]
Wall time: 11.3 s


Final population dists <= 1.61, ESS = [ 64. 736. 163.]
	model populations = [71, 760, 169], model weights = [0.09 0.84 0.07]
Wall time: 11 s


Final population dists <= 1.71, ESS = [ 50. 733. 187.]
	model populations = [54, 754, 192], model weights = [0.07 0.85 0.08]
Wall time: 11.3 s


Final population dists <= 1.63, ESS = [ 44. 779. 135.]
	model populations = [57, 803, 140], model weights = [0.07 0.87 0.06]
Wall time: 11.1 s


<IPython.core.display.Javascript object>

## SMC model probabilities

In [7]:
Bayesian_Summary = pd.DataFrame(
    {
        "model_data": [],
        "model_fit": [],
        "ss": [],
        "param_1": [],
        "param_2": [],
        "marginal_log_likelihood": [],
    }
)

for model_data in models_data:
    sevs = claim_data[model_data]
    for model_fitted in models_fitted:

        for ss in sample_sizes:
            uData = sevs[:ss]
            print(
                f"Fitting a {model_fitted} model to {len(uData)} data points generated from a {model_data} model"
            )

            if model_fitted == "gamma":
                with pm.Model() as model_sev:
                    r = pm.Uniform("param_1", lower=0, upper=5)
                    m = pm.Uniform("param_2", lower=0, upper=100)
                    U = pm.Gamma("U", alpha=r, beta=1 / m, observed=uData)
                    %time trace = pm.sample_smc(popSize, random_seed=1)

            elif model_fitted == "lognormal":
                with pm.Model() as model_sev:
                    μ = pm.Uniform("param_1", lower=-20, upper=20)
                    σ = pm.Uniform("param_2", lower=0, upper=5)
                    U = pm.Lognormal("U", mu=μ, sigma=σ, observed=uData)
                    %time trace = pm.sample_smc(popSize, random_seed=1)

            elif model_fitted == "weibull":
                with pm.Model() as model_sev:
                    k = pm.Uniform("param_1", lower=1e-1, upper=5)
                    δ = pm.Uniform("param_2", lower=0, upper=100)
                    U = pm.Weibull("U", alpha=k, beta=δ, observed=uData)
                    %time trace = pm.sample_smc(popSize, random_seed=1)

            # pm.plot_posterior(trace)

            ll = model_sev.marginal_log_likelihood

            res = pd.DataFrame(
                {
                    "model_data": [model_data],
                    "model_fit": [model_fitted],
                    "ss": [ss],
                    "param_1": [trace["param_1"].mean()],
                    "param_2": [trace["param_2"].mean()],
                    "marginal_log_likelihood": [ll],
                }
            )
            Bayesian_Summary = pd.concat([Bayesian_Summary, res])

Fitting a gamma model to 25 data points generated from a lognormal model


Sample initial stage: ...
Stage:   0 Beta: 0.010 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.049 Steps:  25 Acce: 0.496
Stage:   2 Beta: 0.129 Steps:   6 Acce: 0.406
Stage:   3 Beta: 0.285 Steps:   8 Acce: 0.298
Stage:   4 Beta: 0.818 Steps:  12 Acce: 0.288
Stage:   5 Beta: 1.000 Steps:  13 Acce: 0.353


Wall time: 34.7 s
Fitting a gamma model to 50 data points generated from a lognormal model


Sample initial stage: ...
Stage:   0 Beta: 0.005 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.025 Steps:  25 Acce: 0.497
Stage:   2 Beta: 0.068 Steps:   6 Acce: 0.421
Stage:   3 Beta: 0.158 Steps:   8 Acce: 0.302
Stage:   4 Beta: 0.437 Steps:  12 Acce: 0.291
Stage:   5 Beta: 1.000 Steps:  13 Acce: 0.328


Wall time: 33.7 s
Fitting a gamma model to 75 data points generated from a lognormal model


Sample initial stage: ...
Stage:   0 Beta: 0.003 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.016 Steps:  25 Acce: 0.500
Stage:   2 Beta: 0.043 Steps:   6 Acce: 0.403
Stage:   3 Beta: 0.098 Steps:   8 Acce: 0.320
Stage:   4 Beta: 0.283 Steps:  11 Acce: 0.314
Stage:   5 Beta: 0.934 Steps:  12 Acce: 0.325
Stage:   6 Beta: 1.000 Steps:  11 Acce: 0.333


Wall time: 37.5 s
Fitting a gamma model to 100 data points generated from a lognormal model


Sample initial stage: ...
Stage:   0 Beta: 0.002 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.012 Steps:  25 Acce: 0.500
Stage:   2 Beta: 0.032 Steps:   6 Acce: 0.424
Stage:   3 Beta: 0.071 Steps:   8 Acce: 0.311
Stage:   4 Beta: 0.206 Steps:  12 Acce: 0.302
Stage:   5 Beta: 0.676 Steps:  12 Acce: 0.338
Stage:   6 Beta: 1.000 Steps:  11 Acce: 0.322


Wall time: 38.9 s
Fitting a gamma model to 150 data points generated from a lognormal model


Sample initial stage: ...
Stage:   0 Beta: 0.002 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.008 Steps:  25 Acce: 0.498
Stage:   2 Beta: 0.022 Steps:   6 Acce: 0.425
Stage:   3 Beta: 0.051 Steps:   8 Acce: 0.290
Stage:   4 Beta: 0.160 Steps:  13 Acce: 0.310
Stage:   5 Beta: 0.505 Steps:  12 Acce: 0.317
Stage:   6 Beta: 1.000 Steps:  12 Acce: 0.280


Wall time: 41.8 s
Fitting a gamma model to 200 data points generated from a lognormal model


Sample initial stage: ...
Stage:   0 Beta: 0.001 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.006 Steps:  25 Acce: 0.500
Stage:   2 Beta: 0.015 Steps:   6 Acce: 0.387
Stage:   3 Beta: 0.033 Steps:   9 Acce: 0.314
Stage:   4 Beta: 0.096 Steps:  12 Acce: 0.320
Stage:   5 Beta: 0.300 Steps:  11 Acce: 0.332
Stage:   6 Beta: 0.961 Steps:  11 Acce: 0.322
Stage:   7 Beta: 1.000 Steps:  11 Acce: 0.269


Wall time: 47.2 s
Fitting a lognormal model to 25 data points generated from a lognormal model


Sample initial stage: ...
Stage:   0 Beta: 0.006 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.041 Steps:  25 Acce: 0.482
Stage:   2 Beta: 0.121 Steps:   7 Acce: 0.401
Stage:   3 Beta: 0.271 Steps:   8 Acce: 0.329
Stage:   4 Beta: 0.717 Steps:  11 Acce: 0.371
Stage:   5 Beta: 1.000 Steps:   9 Acce: 0.345


Wall time: 27.6 s
Fitting a lognormal model to 50 data points generated from a lognormal model


Sample initial stage: ...
Stage:   0 Beta: 0.003 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.021 Steps:  25 Acce: 0.482
Stage:   2 Beta: 0.060 Steps:   6 Acce: 0.381
Stage:   3 Beta: 0.130 Steps:   9 Acce: 0.333
Stage:   4 Beta: 0.327 Steps:  11 Acce: 0.361
Stage:   5 Beta: 1.000 Steps:  10 Acce: 0.360


Wall time: 27.9 s
Fitting a lognormal model to 75 data points generated from a lognormal model


Sample initial stage: ...
Stage:   0 Beta: 0.002 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.014 Steps:  25 Acce: 0.483
Stage:   2 Beta: 0.038 Steps:   6 Acce: 0.363
Stage:   3 Beta: 0.085 Steps:  10 Acce: 0.329
Stage:   4 Beta: 0.221 Steps:  11 Acce: 0.331
Stage:   5 Beta: 0.621 Steps:  11 Acce: 0.383
Stage:   6 Beta: 1.000 Steps:   9 Acce: 0.343


Wall time: 34.4 s
Fitting a lognormal model to 100 data points generated from a lognormal model


Sample initial stage: ...
Stage:   0 Beta: 0.002 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.011 Steps:  25 Acce: 0.474
Stage:   2 Beta: 0.029 Steps:   7 Acce: 0.396
Stage:   3 Beta: 0.061 Steps:   9 Acce: 0.311
Stage:   4 Beta: 0.160 Steps:  12 Acce: 0.341
Stage:   5 Beta: 0.466 Steps:  11 Acce: 0.370
Stage:   6 Beta: 1.000 Steps:   9 Acce: 0.332
Sample initial stage: ...


Wall time: 35.2 s
Fitting a lognormal model to 150 data points generated from a lognormal model


Stage:   0 Beta: 0.001 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.007 Steps:  25 Acce: 0.484
Stage:   2 Beta: 0.019 Steps:   6 Acce: 0.378
Stage:   3 Beta: 0.042 Steps:   9 Acce: 0.334
Stage:   4 Beta: 0.105 Steps:  11 Acce: 0.368
Stage:   5 Beta: 0.327 Steps:  10 Acce: 0.347
Stage:   6 Beta: 1.000 Steps:  10 Acce: 0.337


Wall time: 32.9 s
Fitting a lognormal model to 200 data points generated from a lognormal model


Sample initial stage: ...
Stage:   0 Beta: 0.001 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.005 Steps:  25 Acce: 0.484
Stage:   2 Beta: 0.014 Steps:   6 Acce: 0.379
Stage:   3 Beta: 0.031 Steps:   9 Acce: 0.314
Stage:   4 Beta: 0.088 Steps:  12 Acce: 0.371
Stage:   5 Beta: 0.249 Steps:   9 Acce: 0.378
Stage:   6 Beta: 0.820 Steps:   9 Acce: 0.341
Stage:   7 Beta: 1.000 Steps:  11 Acce: 0.307


Wall time: 37.3 s
Fitting a weibull model to 25 data points generated from a lognormal model


Sample initial stage: ...
Stage:   0 Beta: 0.012 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.048 Steps:  25 Acce: 0.491
Stage:   2 Beta: 0.109 Steps:   6 Acce: 0.386
Stage:   3 Beta: 0.241 Steps:   9 Acce: 0.320
Stage:   4 Beta: 0.698 Steps:  11 Acce: 0.315
Stage:   5 Beta: 1.000 Steps:  12 Acce: 0.364


Wall time: 30 s
Fitting a weibull model to 50 data points generated from a lognormal model


Sample initial stage: ...
Stage:   0 Beta: 0.006 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.025 Steps:  25 Acce: 0.505
Stage:   2 Beta: 0.057 Steps:   6 Acce: 0.388
Stage:   3 Beta: 0.116 Steps:   9 Acce: 0.297
Stage:   4 Beta: 0.322 Steps:  13 Acce: 0.298
Stage:   5 Beta: 1.000 Steps:  13 Acce: 0.383


Wall time: 30.4 s
Fitting a weibull model to 75 data points generated from a lognormal model


Sample initial stage: ...
Stage:   0 Beta: 0.004 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.016 Steps:  25 Acce: 0.501
Stage:   2 Beta: 0.037 Steps:   6 Acce: 0.383
Stage:   3 Beta: 0.075 Steps:   9 Acce: 0.304
Stage:   4 Beta: 0.212 Steps:  12 Acce: 0.291
Stage:   5 Beta: 0.659 Steps:  13 Acce: 0.368
Stage:   6 Beta: 1.000 Steps:  10 Acce: 0.330


Wall time: 35.1 s
Fitting a weibull model to 100 data points generated from a lognormal model


Sample initial stage: ...
Stage:   0 Beta: 0.003 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.012 Steps:  25 Acce: 0.499
Stage:   2 Beta: 0.027 Steps:   6 Acce: 0.392
Stage:   3 Beta: 0.057 Steps:   9 Acce: 0.314
Stage:   4 Beta: 0.173 Steps:  12 Acce: 0.300
Stage:   5 Beta: 0.580 Steps:  12 Acce: 0.359
Stage:   6 Beta: 1.000 Steps:  10 Acce: 0.337


Wall time: 36.3 s
Fitting a weibull model to 150 data points generated from a lognormal model


Sample initial stage: ...
Stage:   0 Beta: 0.002 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.008 Steps:  25 Acce: 0.507
Stage:   2 Beta: 0.019 Steps:   6 Acce: 0.395
Stage:   3 Beta: 0.040 Steps:   9 Acce: 0.315
Stage:   4 Beta: 0.120 Steps:  12 Acce: 0.284
Stage:   5 Beta: 0.392 Steps:  13 Acce: 0.361
Stage:   6 Beta: 1.000 Steps:  10 Acce: 0.338


Wall time: 36.1 s
Fitting a weibull model to 200 data points generated from a lognormal model


Sample initial stage: ...
Stage:   0 Beta: 0.001 Steps:  25 Acce: 1.000
Stage:   1 Beta: 0.006 Steps:  25 Acce: 0.503
Stage:   2 Beta: 0.014 Steps:   6 Acce: 0.390
Stage:   3 Beta: 0.028 Steps:   9 Acce: 0.293
Stage:   4 Beta: 0.083 Steps:  13 Acce: 0.316
Stage:   5 Beta: 0.255 Steps:  12 Acce: 0.371
Stage:   6 Beta: 0.827 Steps:   9 Acce: 0.322
Stage:   7 Beta: 1.000 Steps:  11 Acce: 0.295


Wall time: 41 s


<IPython.core.display.Javascript object>

In [8]:
max_marginal_log_likelihood = (
    Bayesian_Summary[["model_data", "ss", "marginal_log_likelihood"]]
    .groupby(["model_data", "ss"])
    .max()
)
max_marginal_log_likelihood.reset_index(level=["model_data", "ss"], inplace=True)
max_marginal_log_likelihood.rename(
    columns={"marginal_log_likelihood": "max_marginal_log_likelihood"}
)

Bayesian_Summary_1 = pd.merge(
    Bayesian_Summary, max_marginal_log_likelihood, how="left", on=["model_data", "ss"]
)
Bayesian_Summary_1

Bayesian_Summary_1["BF"] = np.exp(
    Bayesian_Summary_1.marginal_log_likelihood_x
    - Bayesian_Summary_1.marginal_log_likelihood_y
)

Bayesian_Summary_1
sum_BF = (
    Bayesian_Summary_1[["ss", "model_data", "BF"]].groupby(["ss", "model_data"]).sum()
)
sum_BF.reset_index(level=["model_data", "ss"], inplace=True)

Bayesian_Summary_2 = pd.merge(
    Bayesian_Summary_1, sum_BF, how="left", on=["model_data", "ss"]
)
Bayesian_Summary_2
Bayesian_Summary_2["model_probability"] = (
    Bayesian_Summary_2.BF_x / Bayesian_Summary_2.BF_y
)
Bayesian_Summary_2

Unnamed: 0,model_data,model_fit,ss,param_1,param_2,marginal_log_likelihood_x,marginal_log_likelihood_y,BF_x,BF_y,model_probability
0,lognormal,gamma,25.0,1.477746,1.298752,-44.117143,-44.117143,1.0,2.287336,0.43719
1,lognormal,gamma,50.0,1.253679,1.570124,-86.526719,-85.730619,0.451085,1.663973,0.271089
2,lognormal,gamma,75.0,1.324136,1.330436,-120.283342,-117.492313,0.061358,1.07374,0.057144
3,lognormal,gamma,100.0,1.407956,1.17688,-152.50159,-147.753857,0.008671,1.010003,0.008585
4,lognormal,gamma,150.0,1.34864,1.300512,-235.390588,-228.349934,0.000876,1.000943,0.000875
5,lognormal,gamma,200.0,1.381733,1.201641,-301.249859,-291.294539,4.7e-05,1.000049,4.7e-05
6,lognormal,lognormal,25.0,0.185802,1.014138,-45.06267,-44.117143,0.388475,2.287336,0.169837
7,lognormal,lognormal,50.0,0.157304,1.024865,-85.730619,-85.730619,1.0,1.663973,0.600971
8,lognormal,lognormal,75.0,0.100519,0.955438,-117.492313,-117.492313,1.0,1.07374,0.931324
9,lognormal,lognormal,100.0,0.083314,0.910056,-147.753857,-147.753857,1.0,1.010003,0.990096


<IPython.core.display.Javascript object>

In [9]:
model_proba = pd.merge(
    Bayesian_Summary_2[["model_data", "model_fit", "ss", "model_probability"]],
    model_proba_abc,
    how="left",
    on=["model_data", "model_fit", "ss"],
).round(2)
model_proba

Unnamed: 0,model_data,model_fit,ss,model_probability,model_probability_ABC
0,lognormal,gamma,25.0,0.44,0.1
1,lognormal,gamma,50.0,0.27,0.08
2,lognormal,gamma,75.0,0.06,0.1
3,lognormal,gamma,100.0,0.01,0.09
4,lognormal,gamma,150.0,0.0,0.07
5,lognormal,gamma,200.0,0.0,0.07
6,lognormal,lognormal,25.0,0.17,0.83
7,lognormal,lognormal,50.0,0.6,0.83
8,lognormal,lognormal,75.0,0.93,0.82
9,lognormal,lognormal,100.0,0.99,0.84


<IPython.core.display.Javascript object>

In [10]:
print(
    pd.pivot_table(
        model_proba,
        values=["model_probability", "model_probability_ABC"],
        index=["ss"],
        columns=["model_fit"],
        aggfunc={"model_probability": np.mean, "model_probability_ABC": np.mean},
    ).to_latex()
)

\begin{tabular}{lrrrrrr}
\toprule
{} & \multicolumn{3}{l}{model\_probability} & \multicolumn{3}{l}{model\_probability\_ABC} \\
model\_fit &             gamma & lognormal & weibull &                 gamma & lognormal & weibull \\
ss    &                   &           &         &                       &           &         \\
\midrule
25.0  &              0.44 &      0.17 &    0.39 &                  0.10 &      0.83 &    0.07 \\
50.0  &              0.27 &      0.60 &    0.13 &                  0.08 &      0.83 &    0.09 \\
75.0  &              0.06 &      0.93 &    0.01 &                  0.10 &      0.82 &    0.08 \\
100.0 &              0.01 &      0.99 &    0.00 &                  0.09 &      0.84 &    0.07 \\
150.0 &              0.00 &      1.00 &    0.00 &                  0.07 &      0.85 &    0.08 \\
200.0 &              0.00 &      1.00 &    0.00 &                  0.07 &      0.87 &    0.06 \\
\bottomrule
\end{tabular}



<IPython.core.display.Javascript object>