In [1]:
print("In this document we check whether fitting GPD is good for different values of parameters sampled from it.")

In this document we check whether fitting GPD is good for different values of parameters sampled from it.


In [2]:
# from scipy.stats import genpareto
import pystan
import numpy as np
from pystan import StanModel

In [3]:
GPD = """
functions {
  real myGPD_lpdf(real y, real alpha, real beta) {
      //return -(1+1/c)*log(1+c*y2);
      return -(alpha + 1)*( log(1+y/beta) )+(log(alpha) - log(beta));
  }
  // above distribution is a special case of the distribution in the paper for alpha = beta = 1 / c
  // c = 1/beta
}
data { 
  int N;
  real y[N]; // points sampled from gpd in python with some(known) parameters, by mcmc we recover true values of those params
}
parameters { 
  //real c; 
  real alpha;
  real beta;
}
model {
  // Priors
  // no priors - we assume improper priors on params
  // c ~ uniform(0,1);

// Likelihood
  for(n in 1:N) {
    target += myGPD_lpdf( y[n] | alpha, beta );
  }

}
generated quantities{}
"""

In [4]:
print("We want to sample data exactly from reparametrized form of GPD distribution ")

We want to sample data exactly from reparametrized form of GPD distribution 


In [5]:
from numpy import polyval, place, extract, any, asarray, nan, inf, pi
from numpy import (where, arange, putmask, ravel, sum, shape,
                   log, sqrt, exp, arctanh, tan, sin, arcsin, arctan,
                   tanh, cos, cosh, sinh, log1p, expm1)

from scipy.stats import rv_continuous

class genpareto_gen(rv_continuous):
    """A generalized Pareto continuous random variable.
    %(before_notes)s
    Notes
    -----
    The probability density function for `genpareto` is::
        genpareto.pdf(x, c) = (1 + c * x)**(-1 - 1/c)
    for ``c != 0``, and for ``x >= 0`` for all c,
    and ``x < 1/abs(c)`` for ``c < 0``.
    %(example)s
    """
#     def _argcheck(self, c):
#         c = asarray(c)
#         self.b = where(c < 0, 1.0/abs(c), inf)
#         return where(c == 0, 0, 1)

    def _pdf(self, x, alpha, beta):
        Px = alpha / beta * pow(1+x/beta, asarray(-1.0-alpha))
        return Px

    def _logpdf(self, x, alpha, beta):
        return (-1.0-alpha) * np.log(1 + x/beta) + np.log(alpha) - np.log(beta)

#     def _cdf(self, x, c):
#         return 1.0 - pow(1+c*x, asarray(-1.0/c))

#     def _ppf(self, q, c):
#         vals = 1.0/c * (pow(1-q, -c)-1)
#         return vals

#     def _munp(self, n, c):
#         k = arange(0, n+1)
#         val = (-1.0/c)**n * sum(comb(n, k)*(-1)**k / (1.0-c*k), axis=0)
#         return where(c*n < 1, val, inf)

#     def _entropy(self, c):
#         if (c > 0):
#             return 1+c
#         else:
#             self.b = -1.0 / c
#             return rv_continuous._entropy(self, c)
genpareto = genpareto_gen(a=0.0, name='genpareto')

In [6]:
N, alpha, beta = 1000, 1, 1
r = genpareto.rvs(alpha, beta, size=N)


In [7]:
r

array([1.74991590e-01, 7.87123350e-01, 2.44590712e+00, 2.35081766e+00,
       5.43069863e+00, 1.35684290e+00, 8.24227006e-01, 8.10707233e-02,
       8.18506668e+01, 2.94920801e+00, 7.41575014e-02, 2.78281313e-01,
       6.63943633e-01, 6.83667771e+00, 2.89722907e-01, 4.05456209e-01,
       3.50489727e-01, 2.25075744e-01, 3.10804957e-01, 4.31231356e+00,
       1.45174538e-01, 1.25178201e+00, 2.59273263e-01, 7.27394466e-01,
       4.44114463e-01, 1.84428137e+01, 1.09484790e+01, 8.14212209e-01,
       9.52040380e+00, 1.61926153e+00, 4.78149950e+00, 3.87868223e-02,
       2.93898384e+00, 3.89971292e+01, 6.20300050e-01, 5.44273103e+00,
       3.84030062e-01, 2.04631899e-01, 1.62055689e-01, 8.66016609e-02,
       1.52214838e+00, 8.05545301e+00, 3.09864916e-01, 2.32926136e+00,
       5.87253942e-01, 6.04764100e-02, 4.19324703e-01, 1.48122472e+00,
       4.71575208e+00, 6.72416242e+00, 2.16216146e-01, 1.01658553e+00,
       8.57363529e-01, 1.21324283e-01, 1.93901008e-01, 1.68417732e-01,
      

In [8]:
print("We expect to obtain \n alpha = ", alpha, "\n beta = ", beta)

We expect to obtain 
 alpha =  1 
 beta =  1


In [9]:
data = dict(N = N,  y = r) 
fit = StanModel(model_code=GPD).sampling(data=data,iter=1000,warmup=200, chains=1) #we sample from the provided data ;
print(fit)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_1f5534f12167b1e11be9c0c4dfd4126d NOW.
  elif np.issubdtype(np.asarray(v).dtype, float):


Inference for Stan model: anon_model_1f5534f12167b1e11be9c0c4dfd4126d.
1 chains, each with iter=1000; warmup=200; thin=1; 
post-warmup draws per chain=800, total post-warmup draws=800.

        mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
alpha   1.01  5.9e-3   0.07   0.88   0.96    1.0   1.05   1.15    132   1.02
beta    0.95  9.9e-3   0.11   0.75   0.87   0.94   1.01   1.19    129   1.02
lp__   -1929    0.08    1.0  -1932  -1930  -1929  -1929  -1928    168    1.0

Samples were drawn using NUTS at Mon May 14 11:13:06 2018.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
