# Danish fire insurance data

In [9]:
import sys
# sys.path.insert(0,'/home/pgoffard/BayesSplicedModels/')
%run ../../preamble.py
danish = pd.read_csv("../../Data/Danish/danish.csv").x
import random
random.seed(123)

# Martin and Hansjoerg method

In [10]:
X = danish.values
n, k = len(X), 30
def compute_Tk(X, k):
    n = len(X)
    X_sorted = np.sort(X)
    Y_k = np.flip(X_sorted[n-k:] / X_sorted[n-1-k])
    num = np.cumsum(np.log(Y_k))/np.arange(1, k+1, 1)
    denom = 1 + np.flip(np.cumsum(np.flip(np.append(1/np.arange(2, k+1, 1), 0))))
    Tk = num / denom 
    return(Tk)

def fp(p):
    res = (1-np.exp(1-2*p)*(1-2*p)*sp.exp1(1-2*p)-np.exp(2-2*p)*sp.exp1(1-p)**2) / p**2/ (1-p)**2 + \
    2*(np.exp(2-p)*sp.exp1(1-p)*sp.exp1(1)-1+np.exp(1-p)*(1-p)*sp.exp1(1-p)) / p**2 / (1-p) + \
    (1-np.exp(1)*sp.exp1(1)-np.exp(2)*sp.exp1(1)**2) / p**2
    return(res)


def optimal_threshold(X, p = -1):
    n = len(X)
    X_sorted = np.sort(X)
    C = 0.502727
    emp_var = np.array([np.var(compute_Tk(X, k)) for k in np.arange(2, len(X) - int(len(X)/5), 1)])
    k_ast = np.where(emp_var == np.min(emp_var))[0][0]
    k0_ast = k_ast * (C / fp(p)/ (1-p)**2)**(-1/(1-2*p))
    return(X_sorted[n-1-int(k0_ast)]) 

optimal_threshold(X, p = -1)

2.781844802

# Fitting splicing models to the data with fixed threshold

In [13]:
# The data
X = danish.values
# Model for the bulk distribution
body_model_names = ["Burr", "Exp", "Weibull"]
body_model_param_names = [["α1","β1", "σ1"], ["λ1"], ["k1", "β1"]]

# Prior distributions over the parameters of the bulk distribution
body_model_priors= [
    [bs.prior_model('gamma',body_model_param_names[0][0], 1, 1), 
     bs.prior_model('gamma',body_model_param_names[0][1], 1, 1),
     bs.prior_model('gamma',body_model_param_names[0][2], 1, 1)], 
    [bs.prior_model('gamma',body_model_param_names[1][0], 1, 1)], 
     [bs.prior_model('gamma',body_model_param_names[2][0], 1, 1),
     bs.prior_model('gamma',body_model_param_names[2][1], 1, 1)]
]
# Model for the tail of the distribution
tail_model_names = ["Pareto-Tail", "GPD-Tail"]


tail_model_param_names = [ ["α2"], ["ξ2","σ2"]]

# Prior distributions over the parameters of the bulk distribution
tail_model_priors= [
                [bs.prior_model('gamma',tail_model_param_names[0][0], 1, 1)],
                [bs.prior_model('gamma',tail_model_param_names[1][0], 1, 1), bs.prior_model('gamma',tail_model_param_names[1][1], 1, 1)]
]
             
γ_prior, p_prior = bs.prior_model('fixed', "γ", optimal_threshold(X, p = -1), max(X)), bs.prior_model('uniform',"p", 0, 1)

#Splicing model type
splicing_types = ["disjoint", "simple", "continuous"]

# Setting the models
fs, f_names, prior_spliced_model = [], [], []
for i in range(len(body_model_names)):
    for j in range(len(tail_model_names)):
        for splicing_type in splicing_types:
            f1, f2 =  bs.loss_model(body_model_names[i], body_model_param_names[i]), bs.loss_model(tail_model_names[j], tail_model_param_names[j])
            fs.append(bs.spliced_loss_model(f1 , f2, splicing_type))
            f_names.append(body_model_names[i] +"_"+ tail_model_names[j]+"_"+splicing_type)
            if splicing_type == "disjoint": 
                prior_spliced_model.append(bs.independent_priors(body_model_priors[i] + tail_model_priors[j] + [γ_prior, p_prior]))
            else:
                prior_spliced_model.append(bs.independent_priors(body_model_priors[i] + tail_model_priors[j] + [γ_prior]))  
for f in fs:
    f.set_ppf()
f_spliced_dic = dict(zip(f_names, fs))
len(fs)

18

In [15]:
import time
computing_times = []
for paralell in [False, True]:
    print(paralell)
    for popSize in [5000, 10000, 20000] :
        print(popSize)
        ρ, c, n_step_max, err, n_proc, verbose =  1/2, 0.99, 25, 1e-6, 4, False
        def fit_spliced_models(i):
            print(f_names[i])
            trace, log_marg, DIC, WAIC = bs.smc(X, fs[i], popSize, prior_spliced_model[i], ρ, c,n_step_max, err, paralell, 4, verbose)
            return([trace, log_marg, DIC, WAIC])


        for k in range(len(fs)):
            start = time.time()
            fit_spliced_models(k)
            end = time.time()
            computing_times.append(
            pd.DataFrame({'body' : fs[k].body.name, 'tail' : fs[k].tail.name, 'model_type' : fs[k].model_type, 'popSize': popSize, 'paralell':paralell ,  'computing_time': [end - start]  
            })
            )
    
res = pd.concat(computing_times)
res.to_csv("../../Data/danish/computing_time_fixed.csv", index = False)



True
5000
Burr_Pareto-Tail_disjoint
Weibull_Pareto-Tail_continuous
Weibull_GPD-Tail_disjoint
20000
Burr_Pareto-Tail_disjoint
Weibull_Pareto-Tail_continuous
Weibull_GPD-Tail_disjoint


In [16]:
res

Unnamed: 0,body,tail,model_type,popSize,paralell,computing_time
0,Burr,Pareto-Tail,disjoint,5000,True,82.048763
0,Weibull,Pareto-Tail,continuous,5000,True,42.153293
0,Weibull,GPD-Tail,disjoint,5000,True,54.191193
0,Burr,Pareto-Tail,disjoint,20000,True,259.880702
0,Weibull,Pareto-Tail,continuous,20000,True,117.020868
0,Weibull,GPD-Tail,disjoint,20000,True,186.655932
