# Danish fire insurance data

In [1]:
%run preamble.py
danish = pd.read_csv("Data/Danish/danish.csv").x

# Fitting single loss models to the data

In [14]:
X = danish.values
f_names = np.array(["Exp", "Gamma", "Weibull", "Inverse-Gaussian", "Lognormal", "Log-Logistic", "Lomax", "Burr", "Pareto", "GPD", "Gamma-Pareto", "Lognormal-Pareto", "Weibull-Pareto", "Inverse-Gamma", "Inverse-Weibull"])
# f_names = np.array(["Exp", "Gamma"])
f_param_names = [['λ'], ["r", "m"], ["k", "β"], ["μ", "λ"], ["μ", "σ"], ["β", "σ"], ["α", "σ"], ["α", "β", "σ"], ["α", "γ"], ["ξ","σ", "γ"], ["r", "α", "γ"], ["σ", "α", "γ"], ["k", "α", "γ"],["r", "m"], ["k", "β"]  ]
model_priors= [[bsm.prior_model('gamma',f_param_names[0][0], 1, 1)],
                [bsm.prior_model('gamma',f_param_names[1][0], 1, 1), bsm.prior_model('gamma',f_param_names[1][1], 1, 1)], 
                [bsm.prior_model('gamma',f_param_names[2][0], 1, 1), bsm.prior_model('gamma',f_param_names[2][1], 1, 1)],
                [bsm.prior_model('gamma',f_param_names[3][0], 1, 1), bsm.prior_model('gamma',f_param_names[3][1], 1, 1)],
                [bsm.prior_model('normal',f_param_names[4][0], 0, 10), bsm.prior_model('gamma',f_param_names[4][1], 1, 1)],
                [bsm.prior_model('gamma',f_param_names[5][0], 1, 1), bsm.prior_model('gamma',f_param_names[5][1], 1, 1)],
                [bsm.prior_model('gamma',f_param_names[6][0], 1, 1), bsm.prior_model('gamma',f_param_names[6][1], 1, 1)],
                [bsm.prior_model('gamma',f_param_names[7][0], 1, 1), bsm.prior_model('gamma',f_param_names[7][1], 1, 1), bsm.prior_model('gamma',f_param_names[7][2], 1, 1)],
                [bsm.prior_model('gamma',f_param_names[8][0], 1, 1), bsm.prior_model('fixed',f_param_names[8][1], min(X), 1)],
                [bsm.prior_model('gamma',f_param_names[9][0], 1, 1), bsm.prior_model('gamma',f_param_names[9][1], 1, 1), bsm.prior_model('fixed',f_param_names[9][2], min(X), 1)], 
               [bsm.prior_model('gamma',f_param_names[10][0], 1, 1), bsm.prior_model('gamma',f_param_names[10][1], 1, 1), bsm.prior_model('uniform',f_param_names[10][2], min(X), max(X))],
               [bsm.prior_model('gamma',f_param_names[11][0], 1, 1), bsm.prior_model('gamma',f_param_names[11][1], 1, 1), bsm.prior_model('uniform',f_param_names[11][2], min(X), max(X))], 
               [bsm.prior_model('gamma',f_param_names[12][0], 1, 1), bsm.prior_model('gamma',f_param_names[12][1], 1, 1), bsm.prior_model('uniform',f_param_names[12][2], min(X), max(X))], 
               [bsm.prior_model('gamma',f_param_names[13][0], 1, 1), bsm.prior_model('gamma',f_param_names[13][1], 1, 1)],
               [bsm.prior_model('gamma',f_param_names[14][0], 1, 1), bsm.prior_model('gamma',f_param_names[14][1], 1, 1)]
]
fs, prior_single_model = [], []
for i in range(len(f_names)):
    fs.append(bsm.loss_model(f_names[i], f_param_names[i]))
    prior_single_model.append(bsm.independent_priors(model_priors[i]))
    fs[i].set_ppf()
f_single_dic = dict(zip(f_names, fs))

In [15]:
popSize, ρ, c, n_step_max, err, paralell, n_proc, verbose = 10000, 1/2, 0.99, 25, 1e-6, False, 4, False
def fit_single_models(i):     
    trace, log_marg, DIC, WAIC = bsm.smc_likelihood_annealing(X, fs[i], popSize, prior_single_model[i], ρ, c,n_step_max, err, paralell, 4, verbose)
    return([trace, log_marg, DIC, WAIC])

%time res = Parallel(n_jobs=15)(delayed(fit_single_models)(i) for i in range(len(f_names)))

CPU times: user 305 ms, sys: 178 ms, total: 483 ms
Wall time: 9min 59s


In [16]:
fit_single_models_dic = dict(zip(f_names, res))
γ_map = min(X) * np.ones(len(fit_single_models_dic))
selec = np.where(np.array([np.any(fit_single_models_dic[f_name][0].columns.values == "γ") for f_name in f_names]))
γ_map[selec] = np.array([fit_single_models_dic[f_name][0]['γ'].mean() for f_name in f_names[selec]])

single_models_df = pd.DataFrame({'model':f_names, 
                                     "d": np.array([f.d for f in fs]),
                                     "γ_map": γ_map,
                                     'log_marg':  np.array([fit_single_models_dic[f_names[k]][1] for k in range(len(fit_single_models_dic))]), 
                                     "DIC": np.array([fit_single_models_dic[f_names[k]][2] for k in range(len(fit_single_models_dic))]), 
                                     "WAIC":np.array([fit_single_models_dic[f_names[k]][3] for k in range(len(fit_single_models_dic))])})


%time single_models_df["Wass_dist"] = np.array([bsm.compute_Wasserstein(X, f_single_dic[model_name], fit_single_models_dic[model_name][0].mean().values, 1) for model_name in single_models_df["model"].values])
single_models_df.sort_values(by='log_marg', ascending=False)

CPU times: user 8.82 s, sys: 58.3 ms, total: 8.88 s
Wall time: 8.88 s


Unnamed: 0,model,d,γ_map,log_marg,DIC,WAIC,Wass_dist
7,Burr,3,0.313404,-3858.984147,7665.300382,7679.744349,655.299812
12,Weibull-Pareto,3,1.015928,-3866.237361,7675.154609,7690.166386,1301.96854
11,Lognormal-Pareto,3,1.211404,-3882.903629,7725.666586,7745.174803,672.451159
10,Gamma-Pareto,3,1.237495,-3900.764544,7721.174834,7735.565321,593.003278
14,Inverse-Weibull,2,0.313404,-3976.324518,7929.476212,7939.086594,1665.218599
13,Inverse-Gamma,2,0.313404,-4108.925241,8191.758751,8200.222627,1927.972005
5,Log-Logistic,2,0.313404,-4290.319408,8556.914764,8565.974035,2317.016747
4,Lognormal,2,0.313404,-4445.053548,8863.592348,8874.157993,2362.387009
3,Inverse-Gaussian,2,0.313404,-4526.179993,9028.52563,9046.535927,2674.278653
9,GPD,3,0.313404,-4682.179895,9345.112486,9354.034257,2105.613433


In [17]:
model_names = single_models_df.sort_values(by='log_marg', ascending=False)["model"][:5]
for model_name in model_names:
    f, trace = f_single_dic[model_name], fit_single_models_dic[model_name][0]
    # print(trace.mean().values)
    # bsm.posterior_plots(f, trace)
    # bsm.trace_plots(f, trace)
#     bsm.qq_plot(X, f, trace.mean().values)


In [19]:
import pickle
single_models_df.to_csv("Data/Danish/danish_single_model_fit.csv", sep=',')
with open('Data/Danish/trace_danish_single_models.obj', 'wb') as fp:
    pickle.dump(fit_single_models_dic, fp)