# Danish fire insurance data

In [1]:
import sys
# sys.path.insert(0,'/home/pgoffard/BayesSplicedModels/')
%run ../../preamble.py
danish = pd.read_csv("../../Data/Danish/danish.csv").x

import random
random.seed(123)

In [2]:
# The data
X = danish.values
# Model for the bulk distribution
body_model_names = ["Exp", "Weibull", "Burr"]
# body_model_names = ["Exp", "Gamma"]
body_model_param_names = [["λ1"], ["k1", "β1"], ["α1", "β1", "σ1"]]
# Prior distributions over the parameters of the bulk distribution
body_model_priors= [
    [bs.prior_model('gamma',body_model_param_names[0][0], 1, 1)], 
    [bs.prior_model('gamma',body_model_param_names[1][0], 1, 1), bs.prior_model('gamma',body_model_param_names[1][1], 1, 1)], 
    [bs.prior_model('gamma',body_model_param_names[2][0], 1, 1), bs.prior_model('gamma',body_model_param_names[2][1], 1, 1), bs.prior_model('gamma',body_model_param_names[2][2], 1, 1)]
]


# Spliced model with free threshold

In [3]:
γ_prior = bs.prior_model('uniform', "γ", min(X), max(X))

In [4]:
# Model for the tail of the distribution
tail_model_names = ["Pareto-Tail", "GPD-Tail"]
# tail_model_names = ["Exp", "Gamma", "Weibull"]

tail_model_param_names = [["α2"], ["ξ2","σ2"]]

# Prior distributions over the parameters of the bulk distribution
tail_model_priors= [[bs.prior_model('gamma',tail_model_param_names[0][0], 1, 1)],
                [bs.prior_model('gamma',tail_model_param_names[1][0], 1, 1), bs.prior_model('gamma',tail_model_param_names[1][1], 1, 1)],
                ]
p_prior = bs.prior_model('uniform',"p", 0, 1)

#Splicing model type
splicing_types = ["continuous", "simple", "disjoint"]
# splicing_types = ["continuous"]
# Setting the models
fs, f_names, prior_spliced_model = [], [], []
for i in range(len(body_model_names)):
    for j in range(len(tail_model_names)):
        for splicing_type in splicing_types:
            f1, f2 =  bs.loss_model(body_model_names[i], body_model_param_names[i]), bs.loss_model(tail_model_names[j], tail_model_param_names[j])
            fs.append(bs.spliced_loss_model(f1 , f2, splicing_type))
            f_names.append(body_model_names[i] +"_"+ tail_model_names[j]+"_"+splicing_type)
            if splicing_type == "disjoint": 
                prior_spliced_model.append(bs.independent_priors(body_model_priors[i] + tail_model_priors[j] + [γ_prior, p_prior]))
            else:
                prior_spliced_model.append(bs.independent_priors(body_model_priors[i] + tail_model_priors[j] + [γ_prior]))  
for f in fs:
    f.set_ppf()
f_spliced_dic = dict(zip(f_names, fs))
splits = 1
len(f_names)/splits

18.0

In [5]:
import time
computing_times = []
for paralell in [False, True]:
    print(paralell)
    for popSize in [5000, 10000, 20000] :
        print(popSize)
        ρ, c, n_step_max, err, n_proc, verbose =  1/2, 0.99, 25, 1e-6, 4, False
        def fit_spliced_models(i):
            print(f_names[i])
            trace, log_marg, DIC, WAIC = bs.smc(X, fs[i], popSize, prior_spliced_model[i], ρ, c,n_step_max, err, paralell, 4, verbose)
            return([trace, log_marg, DIC, WAIC])


        for k in range(len(fs)):
            start = time.time()
            fit_spliced_models(k)
            end = time.time()
            computing_times.append(
            pd.DataFrame({'body' : fs[k].body.name, 'tail' : fs[k].tail.name, 'model_type' : fs[k].model_type, 'popSize': popSize, 'paralell':paralell ,  'computing_time': [end - start]  
            })
            )
    
res = pd.concat(computing_times)
res.to_csv("../../Data/danish/computing_time_free.csv", index = False)



False
5000
Exp_Pareto-Tail_continuous
Exp_Pareto-Tail_simple
Exp_Pareto-Tail_disjoint
Exp_GPD-Tail_continuous
Exp_GPD-Tail_simple
Exp_GPD-Tail_disjoint
Weibull_Pareto-Tail_continuous
Weibull_Pareto-Tail_simple
Weibull_Pareto-Tail_disjoint
Weibull_GPD-Tail_continuous
Weibull_GPD-Tail_simple
Weibull_GPD-Tail_disjoint
Burr_Pareto-Tail_continuous
Burr_Pareto-Tail_simple
Burr_Pareto-Tail_disjoint
Burr_GPD-Tail_continuous
Burr_GPD-Tail_simple
Burr_GPD-Tail_disjoint
10000
Exp_Pareto-Tail_continuous
Exp_Pareto-Tail_simple
Exp_Pareto-Tail_disjoint
Exp_GPD-Tail_continuous
Exp_GPD-Tail_simple
Exp_GPD-Tail_disjoint
Weibull_Pareto-Tail_continuous
Weibull_Pareto-Tail_simple
Weibull_Pareto-Tail_disjoint
Weibull_GPD-Tail_continuous
Weibull_GPD-Tail_simple
Weibull_GPD-Tail_disjoint
Burr_Pareto-Tail_continuous
Burr_Pareto-Tail_simple
Burr_Pareto-Tail_disjoint
Burr_GPD-Tail_continuous
Burr_GPD-Tail_simple
Burr_GPD-Tail_disjoint
20000
Exp_Pareto-Tail_continuous
Exp_Pareto-Tail_simple
Exp_Pareto-Tail_disjo

In [13]:
import time
computing_times = []
for paralell in [False, True]:
    print(paralell)
    for popSize in [5000, 10000, 20000] :
        print(popSize)
        ρ, c, n_step_max, err, n_proc, verbose =  1/2, 0.99, 25, 1e-6, 4, False
        def fit_spliced_models(i):
            print(f_names[i])
            trace, log_marg, DIC, WAIC = bs.smc(X, fs[i], popSize, prior_spliced_model[i], ρ, c,n_step_max, err, paralell, 4, verbose)
            return([trace, log_marg, DIC, WAIC])


        for k in range(len(fs)):
            start = time.time()
            fit_spliced_models(k)
            end = time.time()
            computing_times.append(
            pd.DataFrame({'body' : fs[k].body.name, 'tail' : fs[k].tail.name, 'model_type' : fs[k].model_type, 'popSize': [popSize], 'paralell':paralell 
                          # ,  'computing_time': [end - start]  
            })
            )
    
res = pd.concat(computing_times)

res.to_csv("../../Data/danish/computing_time_free.csv", index = False)



False
5000
10000
20000
True
5000
10000
20000
