In [1]:
%run preamble_scripts.py

# Simulation study: the well specified case 250 observations

In [2]:
n_sim, nobs, expo = 100, 250, 250
err_pnl, popSize_screening, popsize_capi = 3e5, 1000, 5000

In [3]:
# Setting the true model
f1, f2 = bs.loss_model("Gamma", ["r1", "m1"]), bs.loss_model("Lomax", ["α2", "σ2"])
f_true = bs.spliced_loss_model(f1, f2, "continuous")
parms_true = np.array([1/2, 1, 2.5, 3, 1.5])

f_true.set_ppf(), f_true.set_pdf(),f_true.set_cdf()

# We set the priority to the 90% quantile and the limit to the 0.99% quantile
P, L = f_true.ppf(parms_true, 0.9), f_true.ppf(parms_true, 0.99)
premiums = f_true.PP(parms_true), f_true.XOLP(parms_true, P, L)

%time PnLs = np.array(f_true.PnL(parms_true, P, L, expo, premiums, safety_loadings = [0.05, 0.05],  n_sim = err_pnl))
true_VaRs = [f_true.ppf(parms_true, prob) for prob in [0.95, 0.99, 0.995]]
true_cap = np.quantile(PnLs, [0.005, 0.01, 0.05])
true_cap

CPU times: user 6min 51s, sys: 180 ms, total: 6min 51s
Wall time: 6min 51s


array([-151.23528817, -112.485274  ,  -47.50983931])

We are interested in the estimations of the extreme quantile of the claim size distribution (of order 0.95, 0.99, 0.995) and the quantile of the aggregate losses over one year with a XOL reinsurance agreement

In [4]:
Xs = [f_true.sample(parms_true, nobs) for k in range(n_sim)]
ρ, c, n_step_max, err, paralell, n_proc, verbose = 1/2, 0.99, 25, 1e-6, False, 4, False
dfs = []
for k in range(n_sim):
    print("Simulation #"+str(k))
    def fit_spliced_models(i):
        trace, log_marg, DIC, WAIC = bs.smc(Xs[k], fs[i], popSize_screening, prior_spliced_model[i], ρ, c,n_step_max, err, paralell, 4, verbose)
        Wass_dist = bs.compute_Wasserstein(Xs[k], fs[i], trace.mean().values, 1)
        return(np.array([k, f_names[i], nobs, trace["γ"].mean(), log_marg, Wass_dist]))
    %time res = Parallel(n_jobs= 40)(delayed(fit_spliced_models)(i) for i in range(len(fs)))
    df = pd.DataFrame(res, columns = ["sim", "model_name", "nobs", "γ_map", "log_marg", "Wass_dist"])
    df[df.columns[2:]] = df[df.columns[2:]].astype(float)

    df["posterior_probability"] = np.exp(df["log_marg"] - np.max(df["log_marg"])) / np.sum(np.exp(df["log_marg"] - np.max(df["log_marg"]))) 
    dfs.append(df)


Simulation #0
CPU times: user 1min 57s, sys: 832 ms, total: 1min 57s
Wall time: 2min 8s
Simulation #1
CPU times: user 1min 57s, sys: 304 ms, total: 1min 57s
Wall time: 2min 8s
Simulation #2
CPU times: user 1min 57s, sys: 244 ms, total: 1min 57s
Wall time: 2min 7s
Simulation #3
CPU times: user 1min 57s, sys: 196 ms, total: 1min 57s
Wall time: 2min 8s
Simulation #4
CPU times: user 1min 56s, sys: 236 ms, total: 1min 57s
Wall time: 2min 8s
Simulation #5
CPU times: user 1min 56s, sys: 216 ms, total: 1min 57s
Wall time: 2min 7s
Simulation #6
CPU times: user 1min 57s, sys: 204 ms, total: 1min 57s
Wall time: 2min 7s
Simulation #7
CPU times: user 1min 57s, sys: 164 ms, total: 1min 57s
Wall time: 2min 7s
Simulation #8
CPU times: user 1min 56s, sys: 216 ms, total: 1min 57s
Wall time: 2min 7s
Simulation #9
CPU times: user 1min 57s, sys: 268 ms, total: 1min 57s
Wall time: 2min 7s
Simulation #10
CPU times: user 1min 56s, sys: 216 ms, total: 1min 57s
Wall time: 2min 7s
Simulation #11
CPU times: user 



CPU times: user 1min 57s, sys: 1.02 s, total: 1min 58s
Wall time: 2min 10s
Simulation #15
CPU times: user 1min 56s, sys: 260 ms, total: 1min 57s
Wall time: 2min 8s
Simulation #16
CPU times: user 1min 57s, sys: 224 ms, total: 1min 57s
Wall time: 2min 7s
Simulation #17
CPU times: user 1min 56s, sys: 184 ms, total: 1min 56s
Wall time: 2min 7s
Simulation #18
CPU times: user 1min 56s, sys: 204 ms, total: 1min 56s
Wall time: 2min 6s
Simulation #19
CPU times: user 1min 56s, sys: 196 ms, total: 1min 56s
Wall time: 2min 7s
Simulation #20
CPU times: user 1min 56s, sys: 148 ms, total: 1min 56s
Wall time: 2min 6s
Simulation #21
CPU times: user 1min 56s, sys: 204 ms, total: 1min 57s
Wall time: 2min 7s
Simulation #22
CPU times: user 1min 56s, sys: 316 ms, total: 1min 56s
Wall time: 2min 9s
Simulation #23
CPU times: user 1min 56s, sys: 204 ms, total: 1min 56s
Wall time: 2min 7s
Simulation #24
CPU times: user 1min 57s, sys: 184 ms, total: 1min 57s
Wall time: 2min 8s
Simulation #25
CPU times: user 1min

In [5]:
pd.concat(dfs).to_csv("../../Data/Simulations/simu_postp_wellspec_"+str(nobs)+".csv", sep=',')

In [6]:
def compute_cap(k):
    df = dfs[k]
    s = (df["sim"] == str(k)).values & (df["nobs"] == nobs).values
    sorted_df = df[s].sort_values("posterior_probability", ascending = False)
    if method  == "true":
        selected_models = sorted_df[sorted_df.model_name == "Gamma_Lomax_continuous"]
    elif method  == "best":
        selected_models = sorted_df[:1]
    elif method  == "BMA":
        selected_models = sorted_df[:5]
    model_names_s = selected_models.model_name
    model_names_s

    traces, log_margs = [], []
    for model_name in model_names_s.values:
        f, prior, X = f_spliced_dic[model_name], prior_dic[model_name], Xs[k]
        trace, log_marg, DIC, WAIC = bs.smc(X, f, popsize_capi, prior, verbose = False)
        traces.append(trace), log_margs.append(log_marg)
    trace_dic = dict(zip(model_names_s.values, traces))
    model_weights_s = np.exp(log_margs - np.max(log_margs)) / np.sum(np.exp(log_margs - np.max(log_margs)))
    PNLS_post = []
    for i in range(int(err_pnl)):
        model_name = model_names_s.sample(1, weights = model_weights_s, replace = True).iloc[0]
        f = f_spliced_dic[model_name]
        PNLS_post.append( f.PnL(trace_dic[model_name].mean().values, P, L, 
                                expo, premiums, n_sim = 1)[0])
    return([k, method, nobs] + np.quantile(PNLS_post, [0.005, 0.01, 0.05]).tolist())


In [7]:
methods = ["true", "best", "BMA"]
dfs_cap = []
for method in methods: 
    print(method)
    %time res = Parallel(n_jobs= 40)(delayed(compute_cap)(i) for i in range(n_sim))
    df_cap = pd.DataFrame(res, columns = ["sim", "method", "nobs", "cap005", "cap01", "cap05"])
    dfs_cap.append(df_cap)

true
CPU times: user 2min 42s, sys: 1.17 s, total: 2min 43s
Wall time: 44min 28s
best
CPU times: user 3min, sys: 1.42 s, total: 3min 2s
Wall time: 47min 21s
BMA
CPU times: user 3min 7s, sys: 1.71 s, total: 3min 8s
Wall time: 49min 37s


In [8]:
pd.concat(dfs_cap).join(pd.DataFrame(
    [true_cap for k in range(n_sim * len(methods))], 
    columns = ["cap005_true", "cap01_true", "cap05_true"])).to_csv(
    "../../Data/Simulations/simu_capi_wellspec_"+str(nobs)+".csv", sep=',')

In [9]:
pd.concat(dfs_cap).join(pd.DataFrame(
    [true_cap for k in range(n_sim * len(methods))], 
    columns = ["cap005_true", "cap01_true", "cap05_true"]))

Unnamed: 0,sim,method,nobs,cap005,cap01,cap05,cap005_true,cap01_true,cap05_true
0,0,true,250,-1800.584176,-931.338074,-203.273082,-151.235288,-112.485274,-47.509839
0,0,best,250,-1820.158645,-987.264037,-215.133142,-151.235288,-112.485274,-47.509839
0,0,BMA,250,-2303.867644,-1209.716408,-253.320360,-151.235288,-112.485274,-47.509839
1,1,true,250,-4801.085524,-2565.916311,-595.686418,-151.235288,-112.485274,-47.509839
1,1,best,250,-4065.078395,-2101.573662,-543.872825,-151.235288,-112.485274,-47.509839
...,...,...,...,...,...,...,...,...,...
98,98,best,250,-51825.753266,-12132.206135,-657.485577,-151.235288,-112.485274,-47.509839
98,98,BMA,250,-2022.023520,-795.891316,-121.049583,-151.235288,-112.485274,-47.509839
99,99,true,250,-1882.051798,-996.024142,-222.904855,-151.235288,-112.485274,-47.509839
99,99,best,250,-1632.480198,-909.610471,-201.462383,-151.235288,-112.485274,-47.509839
