In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pystan
from pystan import StanModel 
from numpy import polyval, place, extract, any, asarray, nan, inf, pi
from numpy import (where, arange, putmask, ravel, sum, shape,
                   log, sqrt, exp, arctanh, tan, sin, arcsin, arctan,
                   tanh, cos, cosh, sinh, log1p, expm1)

from scipy.stats import rv_continuous
from scipy.stats import f


class frechet_gen(rv_continuous):
#     def _argcheck(self, c):
#         c = asarray(c)
#         self.b = where(c < 0, 1.0/abs(c), inf)
#         return where(c == 0, 0, 1)

#     def _pdf(self, x, alpha1, alpha2, beta):
#         Px = 1 / beta / ss.beta(alpha1, alpha2) * pow(x/beta, asarray(alpha1-1.0)) * pow(1 + x/beta, asarray(- alpha1 - alpha2))
#         return Px

#     def _logpdf(self, x, alpha1, alpha2, beta):
#         return (alpha1 - 1) * np.log(x) - alpha1 * np.log(beta) - np.log(ss.beta(alpha1, alpha2)) - (alpha1 + alpha2) * np.log(1 + x/beta)

    def _cdf(self, x, beta):
        return exp(-pow(x, -1/beta))
#     def _ppf(self, q, c):
#         vals = 1.0/c * (pow(1-q, -c)-1)
#         return vals

#     def _munp(self, n, c):
#         k = arange(0, n+1)
#         val = (-1.0/c)**n * sum(comb(n, k)*(-1)**k / (1.0-c*k), axis=0)
#         return where(c*n < 1, val, inf)

#     def _entropy(self, c):
#         if (c > 0):
#             return 1+c
#         else:
#             self.b = -1.0 / c
#             return rv_continuous._entropy(self, c)
frechet = frechet_gen(a=0.0, name='frechet') # we specify the support [a,b], no b means b = infinity

In [2]:
# we need a function to get a excesses
def k_greatest_values(a,k):
    """returns k greatest elements from the list and k-1 value starting from which we consider values to be extreme"""
    u = np.sort(a, axis=None)[-1-k]
    a = np.sort(a, axis=None)[-1-k+1:]
    a = asarray([a-u for x in a])
    return(a[1].tolist(), u) # u the starting value from which we consider others as excesses

In [3]:
GPD = """
functions {
  real myGPD_lpdf(real y, real alpha, real beta) {
      return -(alpha + 1)*( log(1+y/beta) )+(log(alpha) - log(beta));
  }
  
  real myBetaPrior(real x, real beta) {
      return -log(beta); // log(1/beta) = log(1) - log(beta) = - log(beta)
  }
  
  
}
data { 
  int N;
  real y[N]; // points sampled from gpd in python with some(known) parameters, by mcmc we recover true values of those params
}
parameters { 
  real alpha;
  real beta;
}
model {
  // Priors; no priors - we assume improper priors on params
  alpha ~ gamma(1,1);
  beta ~ gamma(1,1);

// Likelihood
  for(n in 1:N) {
    target += myGPD_lpdf( y[n] | alpha, beta );
  }

}
generated quantities{}
"""

In [4]:
Fisher = """
functions { 
 real myFisher_lpdf(real y, real alpha1, real alpha2, real beta) {
      return -lbeta(alpha1,alpha2)-log(beta)+(alpha1-1)*log(y/beta)-(alpha1+alpha2)*log(1+y/beta);
  }
  
// to recover more general distribution of Fisher parametrized by three parameters we need to multiply the above distribution 
// by: df1**df1/2
// we have alpha1,2 = df1,2/2, beta = df2/df1
}

data { 
  int N;
  real y[N]; // points sampled from fisher in python with some(known) parameters, by mcmc we recover true values of those params
}
parameters { 
  //parameters of the Fisher
  //real df1;
  //real df2;
  real<lower=0> alpha1;
  real<lower=0> alpha2;
  real<lower=0> beta;
  
}
model {
  // when we deliberately do not specify priors then Stan works with improper priors
  alpha1 ~ gamma(1,1);
  alpha2 ~ gamma(1,1);
  beta ~ gamma(1,1);
   // Likelihood
  for(n in 1:N) {
    target += myFisher_lpdf( y[n] |alpha1, alpha2, beta);
  }
}

generated quantities{}
"""

In [5]:
def quantiles_GPD(excesses, k, u): 
    """ 
    we forward to function array of the quantiles as excesses, the number of excesses, k, and the border value u
    """
    q = [0.9, 0.95, 0.975, 0.99, 0.999]
    quant_GPD = np.zeros(len(q)) 
    bayesian_quant_GPD = np.zeros(len(q))
    
#     k = 100 # number of excesses
#     frechet, u = k_greatest_values(r, k)
    
    # here we fit GPD to excesses via PyStan
    data = dict(N = k,  y = excesses) 
    fit = StanModel(model_code=GPD).sampling(data=data,iter=1000,warmup=200, chains=1) 
    
    # we save the params from the fit to calculate GPD quantiles and their traceplots to calculate Bayesian GPD quantiles
    traceplot_beta_GPD = list(fit.extract().values())[1].tolist() 
    traceplot_alpha = list(fit.extract().values())[0].tolist()
    traceplot_gamma = np.divide(np.ones(len(traceplot_alpha)), traceplot_alpha)
    beta_GPD = np.mean(list(fit.extract().values())[1].tolist())
    alpha = np.mean(list(fit.extract().values())[0].tolist())
    gamma = 1 / alpha 
    
    # we also want to keep track of parameters from each fit
#     values_of_beta_GPD = np.zeros(n)
    
    for i in range(len(q)):
        quant_GPD[i] = u + beta_GPD*( pow( N * (1-q[i]) / k, -gamma ) - 1 ) 
        for j in range(len(traceplot_gamma)):
                bayesian_quant_GPD[i] = bayesian_quant_GPD[i] + u + traceplot_beta_GPD[j] * (pow( N * (1 - q[i]) / k, - traceplot_gamma[j] ) - 1)
    bayesian_quant_GPD = bayesian_quant_GPD / len(traceplot_gamma)
    list_of_params = [alpha, beta_GPD]
    return(quant_GPD, bayesian_quant_GPD, list_of_params ) # it return arrays: quant_GPD, bayesian_quant_GPD and values alpha, beta_GPD

# now the same as above but for Fisher quantiles
def quantiles_Fisher(excesses, k, u):
    q = [0.9, 0.95, 0.975, 0.99, 0.999]
    quant_Fisher = np.zeros(len(q)) 
    bayesian_quant_Fisher = np.zeros(len(q))
    
#     k = 100 # number of excesses
#     frechet, u = k_greatest_values(r, k)
    
    # here we fit GPD to excesses via PyStan
    data = dict(N = k,  y = excesses) 
    fit = StanModel(model_code=Fisher).sampling(data=data,iter=1000,warmup=200, chains=1) 
    
    # we save the params from the fit to calculate Fisher quantiles and their traceplots to calculate Bayesian Fisher quantiles
    traceplot_beta = list(fit.extract().values())[2].tolist()
    traceplot_alpha1 = list(fit.extract().values())[1].tolist()
    traceplot_alpha2 = list(fit.extract().values())[0].tolist()
    beta = np.mean(list(fit.extract().values())[2].tolist())
    alpha2 = np.mean(list(fit.extract().values())[1].tolist())
    alpha1 = np.mean(list(fit.extract().values())[0].tolist())
    beta0 = alpha2/alpha1
    
    for i in range(len(q)):
        quant_Fisher[i] = u + beta0 / beta * f.isf(N / k * (1-q[i]), 2 * alpha1, 2 * alpha2, loc=0, scale=1)
        for j in range(len(traceplot_alpha1)):
                bayesian_quant_Fisher[i] = bayesian_quant_Fisher[i] + u + traceplot_alpha2[j] / traceplot_alpha1[j] / traceplot_beta[j] * f.isf(N / k *(1- q[i]), 2 * traceplot_alpha1[j], 2 * traceplot_alpha2[j], loc=0, scale=1)
    bayesian_quant_Fisher = bayesian_quant_Fisher / len(traceplot_alpha1)
    list_of_params = [alpha1, alpha2, beta]
    return(quant_Fisher, bayesian_quant_Fisher, list_of_params) # it return arrays: quant_Fisher, bayesian_quant_Fisher and values of params


In [6]:
def k_greatest_values_matrices(a,k):
    """returns k greatest elements from the list a and k - 1 value starting from which we consider greater values as extremes"""
    # we prepare the matrix for the excesses, the last value of 1 could be substituted by any value within the range
    mat = np.zeros( len(a[-1 - k + 1 : , 1] ) ) 
    # we prepare the vector in which we will return u values for each dataset in column of a
    u = np.zeros(len(r[0 , : ] )) 
    for i in range(len(a[0,:])): 
        # index i goes through the columns, instead of len(a[0,:]) there could be len(a[i,:]) for i in range of columns
        u[i] = a[-1 - k, i] 
        # u is a list of values s.t. bigger values are considered as excesses, for each set of data, i.e. for each column we save an u value
        mat = np.column_stack( (mat,a[ -1 - k + 1 : , i]) ) 
    # in mat matrix we return the values of excesses but not yet transformed (y_i = x_i - u)
    return(mat, u) # u is the starting value from which we consider others as excesses

In [7]:
n = 3 # number of sampled dataset over which we average the quantiles
N, beta, k = 1000,  1/2, 100
q = [0.9, 0.95, 0.975, 0.99, 0.999]

r = frechet.rvs(beta, size=N)
for i in range(n):
    r = np.column_stack( (r, frechet.rvs(beta, size=N) ) ) 

# we need to sort in increasing order gathered data
for i in range(len(r[0,:])):
    r[:,i] = np.sort(r[:,i], axis=None)# we sort data sampled from frechet, each dataset is in separate column
    
# in data frechet we keep the matrix of excesses, each sampled data set is in 
data_frechet,u = k_greatest_values_matrices(r,k) 

# delete first column of a matrix A, to match the sizes 
A = np.delete(data_frechet, 0, 1)
# form the array of u values we create matrix, in columns we have repeated u values   
B = [ [x] * k for x in u ] 

# here we subtract u_i from excesses in each dataset
C = np.array(A) - np.array(B).transpose() 

In [8]:
averaged_quant_GPD, averaged_bayesian_quant_GPD, averaged_params_GPD = np.zeros(len(q)), np.zeros(len(q)), np.zeros(2)
averaged_quant_Fisher, averaged_bayesian_quant_Fisher, averaged_params_Fisher = np.zeros(len(q)), np.zeros(len(q)), np.zeros(3)

# to compute standard deviation of the quantiles we need to store the values of quantiles after each iteration and use np.sd(array)
# sd_averaged_quant_GPD_q90 = np.zeros(n)
# sd_averaged_bayesian_quant_GPD_q90 = np.zeros(n)
# sd_averaged_quant_Fisher_q90 = np.zeros(n)
# sd_averaged_bayesian_quant_Fisher_q90 = np.zeros(n)
# sd_averaged_params_Fisher_q90 = np.zeros(n)

# sd_averaged_quant_GPD_q95 = np.zeros(n)
# sd_averaged_bayesian_quant_GPD_q95 = np.zeros(n)
# sd_averaged_quant_Fisher_q95 = np.zeros(n)
# sd_averaged_bayesian_quant_Fisher_q95 = np.zeros(n)
# sd_averaged_params_Fisher_q95 = np.zeros(n)

# sd_averaged_quant_GPD_q975 = np.zeros(n)
# sd_averaged_bayesian_quant_GPD_q975 = np.zeros(n)
# sd_averaged_quant_Fisher_q975 = np.zeros(n)
# sd_averaged_bayesian_quant_Fisher_q975 = np.zeros(n)
# sd_averaged_params_Fisher_q975 = np.zeros(n)

# sd_averaged_quant_GPD_q99 = np.zeros(n)
# sd_averaged_bayesian_quant_GPD_q99 = np.zeros(n)
# sd_averaged_quant_Fisher_q99 = np.zeros(n)
# sd_averaged_bayesian_quant_Fisher_q99 = np.zeros(n)
# sd_averaged_params_Fisher_q99 = np.zeros(n)

# sd_averaged_quant_GPD_q999 = np.zeros(n)
# sd_averaged_bayesian_quant_GPD_q999 = np.zeros(n)
# sd_averaged_quant_Fisher_q999 = np.zeros(n)
# sd_averaged_bayesian_quant_Fisher_q999 = np.zeros(n)
# sd_averaged_params_Fisher_q999 = np.zeros(n)




for i in range(n):   
    excesses_array = asarray(C.transpose().tolist()[i])
    quant_GPD, bayesian_quant_GPD, params_GPD = quantiles_GPD(excesses_array, k, u[i])
    quant_Fisher, bayesian_quant_Fisher, params_Fisher = quantiles_Fisher(excesses_array, k, u[i])
    
#     sd_averaged_quant_GPD_q90[i] = quant_GPD[0]
#     sd_averaged_bayesian_quant_GPD_q90[i] = bayesian_quant_GPD[1]
#     sd_averaged_quant_Fisher_q90[i] = quant_Fisher[2]
#     sd_averaged_bayesian_quant_Fisher_q90[i] = bayesian_quant_Fisher[3]
    
#     sd_averaged_quant_GPD_q95 = np.zeros(n)
#     sd_averaged_bayesian_quant_GPD_q95 = np.zeros(n)
#     sd_averaged_quant_Fisher_q95 = np.zeros(n)
#     sd_averaged_bayesian_quant_Fisher_q95 = np.zeros(n)
#     sd_averaged_params_Fisher_q95 = np.zeros(n)
    
    averaged_quant_GPD += quant_GPD
    averaged_bayesian_quant_GPD += bayesian_quant_GPD
    averaged_params_GPD += params_GPD 

    averaged_quant_Fisher += quant_Fisher
    averaged_bayesian_quant_Fisher += bayesian_quant_Fisher
    averaged_params_Fisher += params_Fisher 

averaged_quant_GPD = averaged_quant_GPD / ( n  ) # as we iterate from i = [ 0 to i = n ) we have n simulations in total
averaged_bayesian_quant_GPD = averaged_bayesian_quant_GPD / ( n  )
averaged_params_GPD = asarray(averaged_params_GPD) / ( n  )

averaged_quant_Fisher = averaged_quant_Fisher / ( n  ) # as we iterate from i = 0 to i = n we have n simulations in total
averaged_bayesian_quant_Fisher = averaged_bayesian_quant_Fisher / ( n  )
averaged_params_Fisher = asarray(averaged_params_Fisher) / ( n  )



INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_1561b2fac3108872ed6bafb6d2492041 NOW.
  elif np.issubdtype(np.asarray(v).dtype, float):
INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_04eca6d0a20a624d371a8c595ed628be NOW.
INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_1561b2fac3108872ed6bafb6d2492041 NOW.
INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_04eca6d0a20a624d371a8c595ed628be NOW.
INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_1561b2fac3108872ed6bafb6d2492041 NOW.
INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_04eca6d0a20a624d371a8c595ed628be NOW.


In [9]:
quant_th = np.zeros(len(q))
for i in range(len(q)):
    quant_th[i] = pow(-log(q[i]), -beta)

m = "3" # how much we round
rounding = "%." + m + "f"
m = int(m)

In [10]:
from prettytable import PrettyTable

t_quantiles = PrettyTable(['how obtained',  'mean', 'sd', '92.5%', '95%', '97.5%', '99%', '99.9%'])
# t_quantiles.add_row(['numpy', 
#            "%.3f" % round(np.mean(r),m),
#            "%.3f" % round(np.std(r),m),
#            "%.3f" % round(np.percentile(r, 92.5),m), 
#            "%.3f" % round(np.percentile(r, 95),m), 
#            "%.3f" % round(np.percentile(r, 97.5),m), 
#            "%.3f" % round(np.percentile(r, 99),m),
#            "%.3f" % round(np.percentile(r, 99.9),m)])
t_quantiles.add_row(['theoretically', '--', '--',
                     rounding % round(quant_th[0],m),  
                     rounding % round(quant_th[1], m), 
                     rounding % round(quant_th[2], m),
                     rounding % round(quant_th[3], m),
                     rounding % round(quant_th[4], m) ])
t_quantiles.add_row(['Bayes Fisher',  '--', '--',
                     rounding % round(averaged_bayesian_quant_Fisher[0],m),  
                     rounding % round(averaged_bayesian_quant_Fisher[1], m), 
                     rounding % round(averaged_bayesian_quant_Fisher[2], m),
                     rounding % round(averaged_bayesian_quant_Fisher[3], m),
                     rounding % round(averaged_bayesian_quant_Fisher[4], m) ])
t_quantiles.add_row(['Fisher',  '--', '--',
                     rounding % round(averaged_quant_Fisher[0],m),  
                     rounding % round(averaged_quant_Fisher[1], m), 
                     rounding % round(averaged_quant_Fisher[2], m),
                     rounding % round(averaged_quant_Fisher[3], m),
                     rounding % round(averaged_quant_Fisher[4], m) ])
t_quantiles.add_row(['Bayes GPD', '--', '--',
                     rounding % round(averaged_bayesian_quant_GPD[0],m),  
                     rounding % round(averaged_bayesian_quant_GPD[1], m), 
                     rounding % round(averaged_bayesian_quant_GPD[2], m),
                     rounding % round(averaged_bayesian_quant_GPD[3], m),
                     rounding % round(averaged_bayesian_quant_GPD[4], m) ])
t_quantiles.add_row(['GPD',  '--', '--',
                     rounding % round(averaged_quant_GPD[0],m),  
                     rounding % round(averaged_quant_GPD[1], m), 
                     rounding % round(averaged_quant_GPD[2], m),
                     rounding % round(averaged_quant_GPD[3], m),
                     rounding % round(averaged_quant_GPD[4], m) ])
print(t_quantiles)

+---------------+------+----+-------+-------+-------+-------+--------+
|  how obtained | mean | sd | 92.5% |  95%  | 97.5% |  99%  | 99.9%  |
+---------------+------+----+-------+-------+-------+-------+--------+
| theoretically |  --  | -- | 3.081 | 4.415 | 6.285 | 9.975 | 31.615 |
|  Bayes Fisher |  --  | -- | 3.122 | 3.776 | 4.788 | 7.354 | 39.160 |
|     Fisher    |  --  | -- | 3.122 | 3.766 | 4.690 | 6.700 | 21.555 |
|   Bayes GPD   |  --  | -- | 0.000 | 0.000 | 0.000 | 0.000 | 0.000  |
|      GPD      |  --  | -- | 0.000 | 0.000 | 0.000 | 0.000 | 0.000  |
+---------------+------+----+-------+-------+-------+-------+--------+


In [11]:
print(" averaged_params_GPD: ", averaged_params_GPD, "\n", "averaged_params_Fisher: ", averaged_params_Fisher)

 averaged_params_GPD:  [0. 0.] 
 averaged_params_Fisher:  [1.17343837 1.62755757 2.08602232]


In [12]:
# variability plots for n = 10 replications but for 50 different values of k 

In [15]:
jump_excesses = np.linspace(20, N/2, 50).astype(int) # astype function converts the values in the array to integers
jump_excesses[8]

98