In [None]:
print("In this document we check whether fitting Fisher distribution is reasable for different values of parameters sampled from it.")

In [None]:
import pystan
import numpy as np
from pystan import StanModel 
# from scipy.stats import f # fisher distribution to check whether it works well 

In [7]:
import pystan
import numpy as np
from pystan import StanModel 
from numpy import polyval, place, extract, any, asarray, nan, inf, pi
from numpy import (where, arange, putmask, ravel, sum, shape,
                   log, sqrt, exp, arctanh, tan, sin, arcsin, arctan,
                   tanh, cos, cosh, sinh, log1p, expm1)
import scipy.special as ss

from scipy.stats import rv_continuous

class Fischer_gen(rv_continuous):
    """A generalized Pareto continuous random variable.
    %(before_notes)s
    Notes
    -----
    The probability density function for `genpareto` is::
        genpareto.pdf(x, c) = (1 + c * x)**(-1 - 1/c)
    for ``c != 0``, and for ``x >= 0`` for all c,
    and ``x < 1/abs(c)`` for ``c < 0``.
    %(example)s
    """
#     def _argcheck(self, c):
#         c = asarray(c)
#         self.b = where(c < 0, 1.0/abs(c), inf)
#         return where(c == 0, 0, 1)

    def _pdf(self, x, alpha1, alpha2, beta):
        Px = 1 / beta / ss.beta(alpha1, alpha2) * pow(x/beta, asarray(alpha1-1)) * pow(1+x/beta, asarray(-alpha1-alpha2))
        return Px

#     def _logpdf(self, x, alpha, beta):
#         return (-1.0-alpha) * np.log(1 + x/beta) + np.log(alpha) - np.log(beta)

#     def _cdf(self, x, c):
#         return 1.0 - pow(1+c*x, asarray(-1.0/c))

#     def _ppf(self, q, c):
#         vals = 1.0/c * (pow(1-q, -c)-1)
#         return vals

#     def _munp(self, n, c):
#         k = arange(0, n+1)
#         val = (-1.0/c)**n * sum(comb(n, k)*(-1)**k / (1.0-c*k), axis=0)
#         return where(c*n < 1, val, inf)

#     def _entropy(self, c):
#         if (c > 0):
#             return 1+c
#         else:
#             self.b = -1.0 / c
#             return rv_continuous._entropy(self, c)
Fischer = Fischer_gen(a=0.0, name='Fischer')

In [8]:
N, alpha1, alpha2, beta = 10000, 1, 2, 3
r = Fischer.rvs(alpha1, alpha2, beta, size=N)


In [9]:
from prettytable import PrettyTable
m = 3 # how much we round
t = PrettyTable(['pr.distr.', 'mean', 'sd', '2.5%', '25%', '50%', '75%', '97.5%'])
t.add_row(['Fréchet',
           "%.3f" % round(np.mean(r),m),
           "%.3f" % round(np.std(r),m),
           "%.3f" % round(np.percentile(r, 2.5),m), 
           "%.3f" % round(np.percentile(r, 25),m), 
           "%.3f" % round(np.percentile(r, 50),m), 
           "%.3f" % round(np.percentile(r, 75),m), 
           "%.3f" % round(np.percentile(r, 97.5),m)])
# t.add_row(['Bob', 19])
print(t)

+-----------+-------+-------+-------+-------+-------+-------+--------+
| pr.distr. |  mean |   sd  |  2.5% |  25%  |  50%  |  75%  | 97.5%  |
+-----------+-------+-------+-------+-------+-------+-------+--------+
|  Fréchet  | 3.098 | 8.319 | 0.040 | 0.481 | 1.278 | 3.053 | 16.670 |
+-----------+-------+-------+-------+-------+-------+-------+--------+


In [10]:
Fisher = """
functions { 
 real myFisher_lpdf(real y, real alpha1, real alpha2, real beta) {
      return -lbeta(alpha1,alpha2)-log(beta)+(alpha1-1)*log(y/beta)-(alpha1+alpha2)*log(1+y/beta);
  }
  
// to recover more general distribution of Fisher parametrized by three parameters we need to multiply the above distribution 
// by: df1**df1/2
// we have alpha1,2 = df1,2/2, beta = df2/df1
}

data { 
  int N;
  real y[N]; // points sampled from fisher in python with some(known) parameters, by mcmc we recover true values of those params
}
parameters { 
  //parameters of the Fisher
  //real df1;
  //real df2;
  real<lower=0> alpha1;
  real<lower=0> alpha2;
  real<lower=0> beta;
  
}
model {
  // when we deliberately do not specify priors then Stan works with improper priors
  alpha1 ~ gamma(2,2);
  alpha2 ~ gamma(2,2);
  beta ~ gamma(2,2);
   // Likelihood
  for(n in 1:N) {
    target += myFisher_lpdf( y[n] |alpha1, alpha2, beta);
  }
}

generated quantities{}
"""

In [11]:
data = dict(N = N,  y = r) 
fit = StanModel(model_code=Fisher).sampling(data=data,iter=1000,warmup=200, chains=1) #we sample from the provided data ;
print(fit)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_d4729f2aae7e8246b7fdb2f0d877cfb5 NOW.
  elif np.issubdtype(np.asarray(v).dtype, float):


Inference for Stan model: anon_model_d4729f2aae7e8246b7fdb2f0d877cfb5.
1 chains, each with iter=1000; warmup=200; thin=1; 
post-warmup draws per chain=800, total post-warmup draws=800.

         mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
alpha1   1.03  1.4e-3   0.02   0.99   1.02   1.03   1.05   1.07    204   1.01
alpha2   1.92  4.2e-3   0.06    1.8   1.87   1.92   1.96   2.04    210   1.01
beta     2.81    0.01   0.17   2.49   2.69   2.81   2.93   3.15    178   1.01
lp__   -1.9e4    0.07   1.15 -1.9e4 -1.9e4 -1.9e4 -1.9e4 -1.9e4    310    1.0

Samples were drawn using NUTS at Sat May 26 15:56:36 2018.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).


In [12]:
traceplot_alpha1 = list(fit.extract().values())[0].tolist()
beta = np.mean(list(fit.extract().values())[2].tolist())
alpha1 = np.mean(list(fit.extract().values())[0].tolist())
alpha2 = np.mean(list(fit.extract().values())[1].tolist())

print(" alpha1 = ", alpha1, "\n alpha2 = ", alpha2, "\n beta = ", beta)

 alpha1 =  1.031160831658588 
 alpha2 =  1.9188920831802476 
 beta =  2.814958547795438


In [13]:
import matplotlib.pyplot as plt

# plot traceplots of parameters with the histogram displayed on the right side and rotated
# f = plt.figure()
# define window size, output and axes
fig, ax = plt.subplots(1, 2, sharey=True, figsize=[14,8])

# set x-axis name
ax[0].set_xlabel("number of iteration")
ax[0].plot(traceplot_alpha1)
ax[0].set_ylabel("value of alpha1")

# set y-axis name
ax[1].set_xlabel("quantity of records")
# ax[1].set_ylabel("number of records")

plt.suptitle('traceplot with histogram of values of alpha1 parameter inferred by PyStan')

# titles of subplots, here we don't use it 
# ax[0].set_title("traceplot of beta in GPD(alpha, beta)")
# ax[1].set_title("Values of beta in GPD(alpha, beta) fitted to the excesses from Frechet ")


# create histogram within output
Nb, bins, patches = ax[1].hist(traceplot_alpha1, bins=50, color="#777777",  orientation="horizontal") #initial color of all bins

plt.show()
fig.savefig("PyStan_recovers_good_values_of_Fischer.pdf", bbox_inches='tight')

<Figure size 1400x800 with 2 Axes>

In [None]:
df1, df2, N = 10, 10, 1000 # for parameters not too large we recover good values of fitted distribution

print("With this setup we should recover following values of the parameters: \n alpha1 = ", df1/2, "\n alpha2 = ", df2/2, "\n beta = ", df2 / df1)

r = f.rvs(df1, df2, size=N) # we create data sampled from Fisher distribution

In [None]:
# reparametrizing the distribution
alpha1, alpha2, beta = 0.5, 4, 2
N = 1000 # for parameters not too large we recover good values of fitted distribution

print("With this setup we should recover following values of the parameters: \n alpha1 = ", alpha1, "\n alpha2 = ", alpha2, "\n beta = ", beta)

# print("With this setup we should recover following values of the parameters: \n alpha1 = ", df1/2, "\n alpha2 = ", df2/2, "\n beta = ", df2 / df1)

f_michal = f.rvs(2 * alpha1, 2 * alpha2, size=N) / beta / beta #/ alpha2 * alpha1 # we create data sampled from Fisher distribution
f_michal

In [None]:
print("Again, we try the different parameters:")

In [None]:
# reparametrizing the distribution
alpha1, alpha2, beta = 10, 3, 7
N = 1000 # for parameters not too large we recover good values of fitted distribution

print("With this setup we should recover following values of the parameters: \n alpha1 = ", alpha1, "\n alpha2 = ", alpha2, "\n beta = ", beta)

# print("With this setup we should recover following values of the parameters: \n alpha1 = ", df1/2, "\n alpha2 = ", df2/2, "\n beta = ", df2 / df1)

f_michal = f.rvs(2 * alpha1, 2 * alpha2, size=N) / beta / beta  # we create data sampled from Fisher distribution

In [None]:
data = dict(N = N,  y = f_michal) 
fit = StanModel(model_code=Fisher).sampling(data=data,iter=1200,warmup=200, chains=1) #we sample from the provided data ;
print(fit)

In [None]:
s = float(np.random.uniform(0,15,1))
s

In [None]:
alpha1, alpha2, beta = float(np.random.uniform(0,15,1)), float(np.random.uniform(0,15,1)), float(np.random.uniform(0,15,1))
N = 1000 # for parameters not too large we recover good values of fitted distribution

print("With this setup we should recover following values of the parameters: \n alpha1 = ", alpha1, "\n alpha2 = ", alpha2, "\n beta = ", beta)

# print("With this setup we should recover following values of the parameters: \n alpha1 = ", df1/2, "\n alpha2 = ", df2/2, "\n beta = ", df2 / df1)

f_michal = f.rvs(2 * alpha1, 2 * alpha2, size=N) / beta / beta  # we create data sampled from Fisher distribution

In [None]:
data = dict(N = N,  y = f_michal) 
fit = StanModel(model_code=Fisher).sampling(data=data,iter=1200,warmup=200, chains=1) #we sample from the provided data ;
print(fit)

In [None]:
print("For 1200 iterations, 200 warm-up, 1 chain and params values sampled from uniform (0,15) we are able to recover values not totally out of the blue but not really correct neither. We try to increase number of iterations.")

In [None]:
alpha1, alpha2, beta = float(np.random.uniform(0,20,1)), float(np.random.uniform(0,20,1)), float(np.random.uniform(0,20,1))
N = 1000 # for parameters not too large we recover good values of fitted distribution

print("With this setup we should recover following values of the parameters: \n alpha1 = ", alpha1, "\n alpha2 = ", alpha2, "\n beta = ", beta)

# print("With this setup we should recover following values of the parameters: \n alpha1 = ", df1/2, "\n alpha2 = ", df2/2, "\n beta = ", df2 / df1)

f_michal = f.rvs(2 * alpha1, 2 * alpha2, size=N) / beta / beta  # we create data sampled from Fisher distribution

In [None]:
data = dict(N = N,  y = f_michal) 
fit = StanModel(model_code=Fisher).sampling(data=data,iter=2000,warmup=200, chains=2) #we sample from the provided data ;
print(fit)

In [None]:
print("Some problems recovering true value of beta ? ")

In [None]:
print("I try the same values of params, but increase the number of iterations.")

In [None]:
data = dict(N = N,  y = f_michal) 
fit = StanModel(model_code=Fisher).sampling(data=data,iter=4000,warmup=200, chains=2) #we sample from the provided data ;
print(fit)

In [None]:
print("OK, still the fit is not perfect, maybe better for integer values ?")

In [None]:
alpha1, alpha2, beta = int(np.random.uniform(0,20,1)), int(np.random.uniform(0,20,1)), int(np.random.uniform(0,20,1))
N = 1000 # for parameters not too large we recover good values of fitted distribution

print("With this setup we should recover following values of the parameters: \n alpha1 = ", alpha1, "\n alpha2 = ", alpha2, "\n beta = ", beta)

# print("With this setup we should recover following values of the parameters: \n alpha1 = ", df1/2, "\n alpha2 = ", df2/2, "\n beta = ", df2 / df1)

f_michal = f.rvs(2 * alpha1, 2 * alpha2, size=N) / beta / beta  # we create data sampled from Fisher distribution

In [None]:
data = dict(N = N,  y = f_michal) 
fit = StanModel(model_code=Fisher).sampling(data=data,iter=4000,warmup=200, chains=2) #we sample from the provided data ;
print(fit)