In [None]:
# We simulate data from 3 distributions, we will be interested in excesses: k statistics for k \in 50, 100, 150, ..., 
# to each excess we then fit GPD and Fisher distributions, with priors as already written 

# 1) we need to sample from exactly specified distributions: a) either create a class and sample rv from it or
#                                                            b) use the transformation proposed by Julyan (easier)
# 2) make replications of data sets, for each of the set obtain excesses, for each excess obtain the parameters
# 3) try changing values of k, keep fitting the parameters

In [None]:
# genereting random numbers from Frechet & Burr & log-gamma 
import pystan
import numpy as np
from pystan import StanModel 
from scipy.stats import frechet_r, loggamma, burr, invweibull
# from scipy.stats import burr
# from scipy.stats import loggamma
N = 5000
c = 1
# frechet_r.pdf(x, c) =c*x**(-c-1)*exp(-x**-c)
frechet = invweibull.rvs(c, size=N) # this is exactly the same as in publication, there beta = 1/c 

# burr distribution, burr(1, 1/2, 2)
# burr_r.pdf = c * d * x**(-c-1) * ( 1 + x**( -c ) )**(- d - 1)
c, d = 1, 0.5
burr = burr.rvs(c, d, size=N)

# log-gamma
# probability density for loggamma: loggamma.pdf(x, c) = exp(c*x-exp(x)) / gamma(c)
c = 2
loggamma = loggamma.rvs(c, size=N)

# i assume the pdfs are correct, now we need to save the k- greatest values from each distribution,
# save the k-th greatest value u, 
# get the array of x_i - u and to those data fit both GPD and Fisher distribution

In [None]:
# to get k-greatest value from which we subtract the border value
# a - list, k - we get k greatest values, 

def k_greatest_values(a,k):
    """returns k greatest elements from the list and k-1 value starting from which we consider values to be extreme"""
    u = np.sort(a, axis=None)[-1-k]
    a = np.sort(a, axis=None)[-1-k+1:]
    a = [a-u for x in a]
    return(a[1].tolist(), u) # u the starting value from which we consider others as excesses

In [None]:
# for instance we save k = 100 greatest values generated form frechet distribution
k = 100
data_frechet,u = k_greatest_values(frechet,k)
data_frechet

In [None]:
GPD = """
functions {
  real myGPD_lpdf(real y2, real alpha, real beta) {
      //return -(1+1/c)*log(1+c*y2);
      return -(alpha + 1)*( log(1+y2/beta) )+(log(alpha) - log(beta));
  }
  // above distribution is a special case of the distribution in the paper for alpha = beta = 1 / c
  // c = 1/beta
}
data { 
  int N;
  real y2[N]; // points sampled from gpd in python with some(known) parameters, by mcmc we recover true values of those params
}
parameters { 
  //real c; 
  real alpha;
  real beta;
}
model {
  // Priors

  // c ~ uniform(0,1);

// Likelihood
  for(n in 1:N) {
    target += myGPD_lpdf( y2[n] | alpha, beta );
  }

}

generated quantities{}
"""

In [None]:
# gpd
# genpareto.pdf(x, c) = (1 + c * x)**(-1 - 1/c)
from scipy.stats import genpareto
c, N, beta  = 1, 100, 1
r = genpareto.rvs(c, size=N)*beta*c
r = r.tolist()
np.sort(r)
# [x - 5 for x in r]

In [None]:
data = dict(N = N,  y2 = r) # we provide data for our model, y is primarly an array, it needs to be converted to a list
sm = StanModel(model_code=GPD) # we put the created model to the stan 
fit = sm.sampling(data=data,iter=1000, warmup=200, chains=1)# we sample from the provided data ;
print(fit)

In [None]:
# saving values of parameters of the fit
beta = np.mean(list(fit.extract().values())[0].tolist())
alpha = np.mean(list(fit.extract().values())[1].tolist())
# alpha2 = list(fit.extract().values())[2].tolist()

In [None]:
# plot histogtram of excess data adequatly transformed 
myHist = plt.hist(data_frechet, 100, normed=True)
plt.show()


In [None]:
# estimation of quantiles 
def quantile_GPD(N, k, p, beta, gamma, u):
    return( u + beta*( ( N * p / k )**( -gamma ) - 1 ) ) # p = 0.05

def quantile_Fisher(N, F_y):
    return(u + F_y) # F_y is inversed survival function available for programmed .f distribution in python

In [None]:
np.median(data_frechet)