In [3]:
%run preamble_scripts.py

# Simulation study: the well specified case 250 observations

In [7]:
n_sim, nobs, expo = 100, 250, 250
err_pnl, popSize_screening, popsize_capi = 3e5, 1000, 5000

In [9]:
# Setting the true model
f1, f2 = bs.loss_model("Gamma", ["r1", "m1"]), bs.loss_model("Lomax", ["α2", "σ2"])
f_true = bs.spliced_loss_model(f1, f2, "continuous")
parms_true = np.array([1/2, 1, 2.5, 3, 1.5])

f_true.set_ppf(), f_true.set_pdf(),f_true.set_cdf()

# We set the priority to the 90% quantile and the limit to the 0.99% quantile
P, L = f_true.ppf(parms_true, 0.9), f_true.ppf(parms_true, 0.99)
premiums = f_true.PP(parms_true), f_true.XOLP(parms_true, P, L)

%time PnLs = np.array(f_true.PnL(parms_true, P, L, expo, premiums, safety_loadings = [0.05, 0.05],  n_sim = err_pnl))
true_VaRs = [f_true.ppf(parms_true, prob) for prob in [0.95, 0.99, 0.995]]
true_cap = np.quantile(PnLs, [0.005, 0.01, 0.05])
true_cap

CPU times: user 22min 33s, sys: 440 ms, total: 22min 34s
Wall time: 22min 33s


array([-151.6106319 , -112.36919931,  -47.38145119])

We are interested in the estimations of the extreme quantile of the claim size distribution (of order 0.95, 0.99, 0.995) and the quantile of the aggregate losses over one year with a XOL reinsurance agreement

In [6]:
Xs = [f_true.sample(parms_true, nobs) for k in range(n_sim)]
ρ, c, n_step_max, err, paralell, n_proc, verbose = 1/2, 0.99, 25, 1e-6, False, 4, False
dfs = []
for k in range(n_sim):
    print("Simulation #"+str(k))
    def fit_spliced_models(i):
        trace, log_marg, DIC, WAIC = bs.smc(Xs[k], fs[i], popSize_screening, prior_spliced_model[i], ρ, c,n_step_max, err, paralell, 4, verbose)
        Wass_dist = bs.compute_Wasserstein(Xs[k], fs[i], trace.mean().values, 1)
        return(np.array([k, f_names[i], nobs, trace["γ"].mean(), log_marg, Wass_dist]))
    %time res = Parallel(n_jobs= 40)(delayed(fit_spliced_models)(i) for i in range(len(fs)))
    df = pd.DataFrame(res, columns = ["sim", "model_name", "nobs", "γ_map", "log_marg", "Wass_dist"])
    df[df.columns[2:]] = df[df.columns[2:]].astype(float)

    df["posterior_probability"] = np.exp(df["log_marg"] - np.max(df["log_marg"])) / np.sum(np.exp(df["log_marg"] - np.max(df["log_marg"]))) 
    dfs.append(df)


Simulation #0
CPU times: user 1min 56s, sys: 1.11 s, total: 1min 57s
Wall time: 2min 4s
Simulation #1
CPU times: user 1min 57s, sys: 700 ms, total: 1min 57s
Wall time: 2min 4s


In [7]:
pd.concat(dfs).to_csv("../../Data/Simulations/simu_postp_wellspec_"+str(nobs)+".csv", sep=',')

In [8]:
def compute_cap(k):
    df = dfs[k]
    s = (df["sim"] == str(k)).values & (df["nobs"] == nobs).values
    sorted_df = df[s].sort_values("posterior_probability", ascending = False)
    if method  == "true":
        selected_models = sorted_df[sorted_df.model_name == "Gamma_Lomax_continuous"]
    elif method  == "best":
        selected_models = sorted_df[:1]
    elif method  == "BMA":
        selected_models = sorted_df[:5]
    model_names_s = selected_models.model_name
    model_names_s

    traces, log_margs = [], []
    for model_name in model_names_s.values:
        f, prior, X = f_spliced_dic[model_name], prior_dic[model_name], Xs[k]
        trace, log_marg, DIC, WAIC = bs.smc(X, f, popsize_capi, prior, verbose = False)
        traces.append(trace), log_margs.append(log_marg)
    trace_dic = dict(zip(model_names_s.values, traces))
    model_weights_s = np.exp(log_margs - np.max(log_margs)) / np.sum(np.exp(log_margs - np.max(log_margs)))
    PNLS_post = []
    for i in range(int(err_pnl)):
        model_name = model_names_s.sample(1, weights = model_weights_s, replace = True).iloc[0]
        f = f_spliced_dic[model_name]
        PNLS_post.append( f.PnL(trace_dic[model_name].mean().values, P, L, 
                                expo, premiums, n_sim = 1)[0])
    return([k, method, nobs] + np.quantile(PNLS_post, [0.005, 0.01, 0.05]).tolist())


In [9]:
methods = ["true", "best", "BMA"]
dfs_cap = []
for method in methods: 
    print(method)
    %time res = Parallel(n_jobs= 40)(delayed(compute_cap)(i) for i in range(n_sim))
    df_cap = pd.DataFrame(res, columns = ["sim", "method", "nobs", "cap005", "cap01", "cap05"])
    dfs_cap.append(df_cap)

true
CPU times: user 2.14 s, sys: 49.8 ms, total: 2.19 s
Wall time: 13 s
best
CPU times: user 2.22 s, sys: 48 ms, total: 2.27 s
Wall time: 6.95 s
BMA
CPU times: user 2.16 s, sys: 12 ms, total: 2.17 s
Wall time: 23.4 s


In [10]:
pd.concat(dfs_cap).join(pd.DataFrame(
    [true_cap for k in range(n_sim * len(methods))], 
    columns = ["cap005_true", "cap01_true", "cap05_true"])).to_csv(
    "../../Data/Simulations/simu_capi_wellspec_"+str(nobs)+".csv", sep=',')

In [11]:
pd.concat(dfs_cap).join(pd.DataFrame(
    [true_cap for k in range(n_sim * len(methods))], 
    columns = ["cap005_true", "cap01_true", "cap05_true"]))

Unnamed: 0,sim,method,nobs,cap005,cap01,cap05,cap005_true,cap01_true,cap05_true
0,0,true,250,-376.089863,-370.166371,-322.778429,-69.592662,-66.573243,-42.41789
0,0,best,250,-2738.308244,-2618.479075,-1659.845721,-69.592662,-66.573243,-42.41789
0,0,BMA,250,-201.439986,-193.694123,-131.727223,-69.592662,-66.573243,-42.41789
1,1,true,250,-22.063328,-21.841611,-20.06787,-69.592662,-66.573243,-42.41789
1,1,best,250,-1.975616,-1.491454,2.381842,-69.592662,-66.573243,-42.41789
1,1,BMA,250,-41.062683,-39.680097,-28.619414,-69.592662,-66.573243,-42.41789
