In [1]:
print("In this document we check whether fitting Fisher distribution is reasable for different values of parameters sampled from it.")

In this document we check whether fitting Fisher distribution is reasable for different values of parameters sampled from it.


In [2]:
import pystan
import numpy as np
from pystan import StanModel 
import scipy.special as ss

In [3]:
Fisher = """
functions { 
 real myFisher_lpdf(real y, real alpha1, real alpha2, real beta) {
      return -lbeta(alpha1,alpha2)-log(beta)+(alpha1-1)*log(y/beta)-(alpha1+alpha2)*log(1+y/beta);
  }
  
// to recover more general distribution of Fisher parametrized by three parameters we need to multiply the above distribution 
// by: df1**df1/2
// we have alpha1,2 = df1,2/2, beta = df2/df1
}

data { 
  int N;
  real y[N]; // points sampled from fisher in python with some(known) parameters, by mcmc we recover true values of those params
}
parameters { 
  //parameters of the Fisher
  //real df1;
  //real df2;
  real<lower=0> alpha1;
  real<lower=0> alpha2;
  real<lower=0> beta;
  
}
model {
  // when we deliberately do not specify priors then Stan works with improper priors
  //alpha1 ~ uniform(0,2);
  //alpha2 ~ gamma(2,2);
  //beta ~ gamma(2,2);
   // Likelihood
  for(n in 1:N) {
    target += myFisher_lpdf( y[n] |alpha1, alpha2, beta);
  }
}

generated quantities{}
"""

In [31]:
from numpy import polyval, place, extract, any, asarray, nan, inf, pi
from numpy import (where, arange, putmask, ravel, sum, shape,
                   log, sqrt, exp, arctanh, tan, sin, arcsin, arctan,
                   tanh, cos, cosh, sinh, log1p, expm1)

from scipy.stats import rv_continuous

class f_gen(rv_continuous):
    """A generalized Pareto continuous random variable.
    %(before_notes)s
    Notes
    -----
    The probability density function for `genpareto` is::
        genpareto.pdf(x, c) = (1 + c * x)**(-1 - 1/c)
    for ``c != 0``, and for ``x >= 0`` for all c,
    and ``x < 1/abs(c)`` for ``c < 0``.
    %(example)s
    """
#     def _argcheck(self, c):
#         c = asarray(c)
#         self.b = where(c < 0, 1.0/abs(c), inf)
#         return where(c == 0, 0, 1)

    def _pdf(self, x, alpha1, alpha2, beta):
        Px = 1 / beta / ss.beta(alpha1, alpha2) * pow(x/beta, asarray(alpha1-1.0)) * pow(1 + x/beta, asarray(- alpha1 - alpha2))
        return Px

    def _logpdf(self, x, alpha1, alpha2, beta):
        return (alpha1 - 1) * np.log(x) - alpha1 * np.log(beta) - np.log(ss.beta(alpha1, alpha2)) - (alpha1 + alpha2) * np.log(1 + x/beta)

    def _isf(self, q, c):
        return -ss.boxcox(q, -c) # inverse survival function
    
#     def _cdf(self, x, c):
#         return 1.0 - pow(1+c*x, asarray(-1.0/c))

#     def _ppf(self, q, c):
#         vals = 1.0/c * (pow(1-q, -c)-1)
#         return vals

#     def _munp(self, n, c):
#         k = arange(0, n+1)
#         val = (-1.0/c)**n * sum(comb(n, k)*(-1)**k / (1.0-c*k), axis=0)
#         return where(c*n < 1, val, inf)

#     def _entropy(self, c):
#         if (c > 0):
#             return 1+c
#         else:
#             self.b = -1.0 / c
#             return rv_continuous._entropy(self, c)
f = f_gen(a=0.0, name='f')

In [32]:
N, alpha1, alpha2, beta = 1000, 5, 2, 1
r = f.rvs(alpha1, alpha2, beta, size=N)
print("We expect to obtain \n alpha1 = ", alpha1,"\n alpha2 = ", alpha2, "\n beta = ", beta)

We expect to obtain 
 alpha1 =  5 
 alpha2 =  2 
 beta =  1


In [None]:
# now how to use inverse survival function in those data

In [39]:
q = 0.05
f.isf(alpha1 = 5, alpha2 = 2, beta = 1)

TypeError: isf() missing 1 required positional argument: 'q'

In [28]:
data = dict(N = N,  y = r) 
fit = StanModel(model_code=Fisher).sampling(data=data,iter=1000,warmup=200, chains=1) #we sample from the provided data ;
print(fit)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_2593dbc6fd5eec19bf3b62623c410a5e NOW.
  elif np.issubdtype(np.asarray(v).dtype, float):


Inference for Stan model: anon_model_2593dbc6fd5eec19bf3b62623c410a5e.
1 chains, each with iter=1000; warmup=200; thin=1; 
post-warmup draws per chain=800, total post-warmup draws=800.

         mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
alpha1   4.01    0.05   0.59   3.05   3.57   3.96   4.39   5.26    116   1.01
alpha2    2.3    0.02    0.2   1.94   2.17   2.29   2.43   2.72    168   1.01
beta     1.57    0.03   0.39   0.93   1.28   1.53   1.81   2.38    144   1.01
lp__    -2389    0.08   1.18  -2392  -2390  -2389  -2388  -2388    209    1.0

Samples were drawn using NUTS at Mon May 14 11:20:29 2018.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).


In [29]:
N, alpha1, alpha2, beta = 1000, 18, 3, 11
r = f.rvs(alpha1, alpha2, beta, size=N)
print("We expect to obtain \n alpha1 = ", alpha1,"\n alpha2 = ", alpha2, "\n beta = ", beta)

We expect to obtain 
 alpha1 =  18 
 alpha2 =  3 
 beta =  11


In [30]:
data = dict(N = N,  y = r) 
fit = StanModel(model_code=Fisher).sampling(data=data,iter=1000,warmup=200, chains=1) #we sample from the provided data ;
print(fit)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_2593dbc6fd5eec19bf3b62623c410a5e NOW.


Inference for Stan model: anon_model_2593dbc6fd5eec19bf3b62623c410a5e.
1 chains, each with iter=1000; warmup=200; thin=1; 
post-warmup draws per chain=800, total post-warmup draws=800.

         mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
alpha1  10.35    0.24    2.9    6.5   8.34   9.77  11.87  17.39    149    1.0
alpha2   3.35    0.02   0.29   2.84   3.13   3.33   3.54   3.94    147    1.0
beta    25.02    0.74   8.64  11.38  18.36  24.03  30.35  44.09    136    1.0
lp__    -5375    0.06   1.09  -5377  -5375  -5374  -5374  -5373    291   1.01

Samples were drawn using NUTS at Mon May 14 11:23:30 2018.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).


  elif np.issubdtype(np.asarray(v).dtype, float):
