In [1]:
print("In this document we check whether fitting Fisher distribution is reasable for different values of parameters sampled from it.")

In this document we check whether fitting Fisher distribution is reasable for different values of parameters sampled from it.


In [2]:
import pystan
import numpy as np
from pystan import StanModel 
from scipy.stats import f # fisher distribution to check whether it works well 

In [3]:
df1, df2, N = 10, 10, 1000 # for parameters not too large we recover good values of fitted distribution

print("With this setup we should recover following values of the parameters: \n alpha1 = ", df1/2, "\n alpha2 = ", df2/2, "\n beta = ", df2 / df1)

r = f.rvs(df1, df2, size=N) # we create data sampled from Fisher distribution

With this setup we should recover following values of the parameters: 
 alpha1 =  5.0 
 alpha2 =  5.0 
 beta =  1.0


In [4]:
# reparametrizing the distribution
alpha1, alpha2, beta = 0.5, 4, 2
N = 1000 # for parameters not too large we recover good values of fitted distribution

print("With this setup we should recover following values of the parameters: \n alpha1 = ", alpha1, "\n alpha2 = ", alpha2, "\n beta = ", beta)

# print("With this setup we should recover following values of the parameters: \n alpha1 = ", df1/2, "\n alpha2 = ", df2/2, "\n beta = ", df2 / df1)

f_michal = f.rvs(2 * alpha1, 2 * alpha2, size=N) / beta / beta #/ alpha2 * alpha1 # we create data sampled from Fisher distribution
f_michal

With this setup we should recover following values of the parameters: 
 alpha1 =  0.5 
 alpha2 =  4 
 beta =  2


array([2.19221467e-03, 6.83542014e-01, 3.56439995e-01, 2.72429841e-03,
       2.12339439e-01, 5.38654879e-02, 1.41445060e+00, 1.98004417e-01,
       3.79494781e-01, 1.15499004e-02, 3.40858145e-02, 3.55717229e-02,
       3.68690653e-01, 3.77619837e-02, 7.14516319e-01, 4.47060093e+00,
       5.18318898e-01, 1.38283516e-01, 1.02331452e-01, 8.45734150e-01,
       2.96940335e-01, 1.09120203e-01, 1.74057425e-01, 1.65837083e-01,
       3.23441802e-01, 1.14054321e+00, 7.89121610e-01, 1.48863323e+00,
       2.71182736e-01, 8.98091819e-02, 2.64298326e+00, 6.76422090e-01,
       9.98597103e-01, 3.78541627e-03, 2.16938716e-02, 1.02918335e-01,
       6.70271768e-02, 2.21417420e-01, 2.47706483e-01, 3.28300420e+00,
       1.59758773e-01, 1.19254444e-02, 6.48379547e-01, 3.71392766e-01,
       1.04381076e+00, 7.01615648e-02, 6.55900746e-01, 2.29986164e-02,
       2.13154934e-01, 3.04688362e-02, 3.46172600e-02, 2.29799393e-03,
       1.96724831e+00, 5.32839106e-02, 1.31260643e-04, 2.76952616e-01,
      

In [5]:
Fisher = """
functions { 
 real myFisher_lpdf(real y, real alpha1, real alpha2, real beta) {
      return -lbeta(alpha1,alpha2)-log(beta)+(alpha1-1)*log(y/beta)-(alpha1+alpha2)*log(1+y/beta);
  }
  
// to recover more general distribution of Fisher parametrized by three parameters we need to multiply the above distribution 
// by: df1**df1/2
// we have alpha1,2 = df1,2/2, beta = df2/df1
}

data { 
  int N;
  real y[N]; // points sampled from fisher in python with some(known) parameters, by mcmc we recover true values of those params
}
parameters { 
  //parameters of the Fisher
  //real df1;
  //real df2;
  real<lower=0> alpha1;
  real<lower=0> alpha2;
  real<lower=0> beta;
  
}
model {
  // when we deliberately do not specify priors then Stan works with improper priors
  //alpha1 ~ uniform(0,2);
  //alpha2 ~ gamma(2,2);
  //beta ~ gamma(2,2);
   // Likelihood
  for(n in 1:N) {
    target += myFisher_lpdf( y[n] |alpha1, alpha2, beta);
  }
}

generated quantities{}
"""

In [6]:
data = dict(N = N,  y = f_michal) 
fit = StanModel(model_code=Fisher).sampling(data=data,iter=1000,warmup=200, chains=1) #we sample from the provided data ;
print(fit)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_2593dbc6fd5eec19bf3b62623c410a5e NOW.
  elif np.issubdtype(np.asarray(v).dtype, float):


Inference for Stan model: anon_model_2593dbc6fd5eec19bf3b62623c410a5e.
1 chains, each with iter=1000; warmup=200; thin=1; 
post-warmup draws per chain=800, total post-warmup draws=800.

         mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
alpha1    0.5  1.2e-3   0.02   0.46   0.48    0.5   0.51   0.54    317    1.0
alpha2   4.59    0.09   1.41   2.81   3.68   4.35   5.14   7.82    244    1.0
beta     2.56    0.07   1.06   1.26   1.89   2.38   2.94    5.0    243    1.0
lp__    351.8    0.06   1.14 348.89 351.14 352.14 352.69 353.15    372    1.0

Samples were drawn using NUTS at Sun May 13 09:37:43 2018.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).


In [7]:
print("Again, we try the different parameters:")

Again, we try the different parameters:


In [8]:
# reparametrizing the distribution
alpha1, alpha2, beta = 10, 3, 7
N = 1000 # for parameters not too large we recover good values of fitted distribution

print("With this setup we should recover following values of the parameters: \n alpha1 = ", alpha1, "\n alpha2 = ", alpha2, "\n beta = ", beta)

# print("With this setup we should recover following values of the parameters: \n alpha1 = ", df1/2, "\n alpha2 = ", df2/2, "\n beta = ", df2 / df1)

f_michal = f.rvs(2 * alpha1, 2 * alpha2, size=N) / beta / beta  # we create data sampled from Fisher distribution

With this setup we should recover following values of the parameters: 
 alpha1 =  10 
 alpha2 =  3 
 beta =  7


In [9]:
data = dict(N = N,  y = f_michal) 
fit = StanModel(model_code=Fisher).sampling(data=data,iter=1200,warmup=200, chains=1) #we sample from the provided data ;
print(fit)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_2593dbc6fd5eec19bf3b62623c410a5e NOW.


Inference for Stan model: anon_model_2593dbc6fd5eec19bf3b62623c410a5e.
1 chains, each with iter=1200; warmup=200; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=1000.

         mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
alpha1   8.97    0.17   2.59   5.88   7.43   8.42    9.8   15.6    236    1.0
alpha2   3.34    0.02    0.3   2.79   3.13   3.31   3.55   3.94    320    1.0
beta   8.2e-3  1.7e-4 2.6e-3 3.5e-3 6.6e-3 8.0e-3 9.6e-3   0.01    253    1.0
lp__   2751.0    0.07   1.31 2747.7 2750.5 2751.4 2751.9 2752.4    345   1.01

Samples were drawn using NUTS at Sun May 13 09:41:51 2018.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).


  elif np.issubdtype(np.asarray(v).dtype, float):


In [14]:
s = float(np.random.uniform(0,15,1))
s

7.731185691324624

In [15]:
alpha1, alpha2, beta = float(np.random.uniform(0,15,1)), float(np.random.uniform(0,15,1)), float(np.random.uniform(0,15,1))
N = 1000 # for parameters not too large we recover good values of fitted distribution

print("With this setup we should recover following values of the parameters: \n alpha1 = ", alpha1, "\n alpha2 = ", alpha2, "\n beta = ", beta)

# print("With this setup we should recover following values of the parameters: \n alpha1 = ", df1/2, "\n alpha2 = ", df2/2, "\n beta = ", df2 / df1)

f_michal = f.rvs(2 * alpha1, 2 * alpha2, size=N) / beta / beta  # we create data sampled from Fisher distribution

With this setup we should recover following values of the parameters: 
 alpha1 =  6.909347783501519 
 alpha2 =  5.227145372066175 
 beta =  3.489357835988672


In [16]:
data = dict(N = N,  y = f_michal) 
fit = StanModel(model_code=Fisher).sampling(data=data,iter=1200,warmup=200, chains=1) #we sample from the provided data ;
print(fit)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_2593dbc6fd5eec19bf3b62623c410a5e NOW.


Inference for Stan model: anon_model_2593dbc6fd5eec19bf3b62623c410a5e.
1 chains, each with iter=1200; warmup=200; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=1000.

         mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
alpha1   5.19    0.05   0.63   4.16   4.76   5.11   5.57   6.54    168    1.0
alpha2   7.19     0.1   1.23   5.24   6.33   7.09   7.84   10.1    160    1.0
beta     0.12  3.0e-3   0.04   0.07    0.1   0.12   0.14    0.2    153    1.0
lp__   1555.7    0.07   1.19 1552.6 1555.1 1556.0 1556.6 1557.0    333    1.0

Samples were drawn using NUTS at Sun May 13 09:47:25 2018.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).


  elif np.issubdtype(np.asarray(v).dtype, float):


In [17]:
print("For 1200 iterations, 200 warm-up, 1 chain and params values sampled from uniform (0,15) we are able to recover values not totally out of the blue but not really correct neither. We try to increase number of iterations.")

For 1200 iterations, 200 warm-up, 1 chain and params values sampled from uniform (0,15) we are able to recover values not totally out of the blue but not really correct neither. We try to increase number of iterations.


In [18]:
alpha1, alpha2, beta = float(np.random.uniform(0,20,1)), float(np.random.uniform(0,20,1)), float(np.random.uniform(0,20,1))
N = 1000 # for parameters not too large we recover good values of fitted distribution

print("With this setup we should recover following values of the parameters: \n alpha1 = ", alpha1, "\n alpha2 = ", alpha2, "\n beta = ", beta)

# print("With this setup we should recover following values of the parameters: \n alpha1 = ", df1/2, "\n alpha2 = ", df2/2, "\n beta = ", df2 / df1)

f_michal = f.rvs(2 * alpha1, 2 * alpha2, size=N) / beta / beta  # we create data sampled from Fisher distribution

With this setup we should recover following values of the parameters: 
 alpha1 =  4.262588613950948 
 alpha2 =  4.518947391956019 
 beta =  6.150954399432402


In [19]:
data = dict(N = N,  y = f_michal) 
fit = StanModel(model_code=Fisher).sampling(data=data,iter=2000,warmup=200, chains=2) #we sample from the provided data ;
print(fit)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_2593dbc6fd5eec19bf3b62623c410a5e NOW.
  elif np.issubdtype(np.asarray(v).dtype, float):


Inference for Stan model: anon_model_2593dbc6fd5eec19bf3b62623c410a5e.
2 chains, each with iter=2000; warmup=200; thin=1; 
post-warmup draws per chain=1800, total post-warmup draws=3600.

         mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
alpha1   4.53    0.02   0.65    3.5   4.08   4.45   4.89   6.02    686   1.01
alpha2   3.98    0.02   0.49   3.18   3.65   3.93   4.25    5.1    789   1.01
beta     0.02  2.5e-4 6.6e-3   0.01   0.02   0.02   0.03   0.04    708   1.01
lp__   2525.0    0.04   1.26 2521.8 2524.4 2525.3 2525.9 2526.4    835    1.0

Samples were drawn using NUTS at Sun May 13 09:52:02 2018.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).


In [20]:
print("Some problems recovering true value of beta ? ")

Some problems recovering true value of beta ? 


In [21]:
print("I try the same values of params, but increase the number of iterations.")

I try the same values of params, but increase the number of iterations.


In [22]:
data = dict(N = N,  y = f_michal) 
fit = StanModel(model_code=Fisher).sampling(data=data,iter=4000,warmup=200, chains=2) #we sample from the provided data ;
print(fit)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_2593dbc6fd5eec19bf3b62623c410a5e NOW.
  elif np.issubdtype(np.asarray(v).dtype, float):


Inference for Stan model: anon_model_2593dbc6fd5eec19bf3b62623c410a5e.
2 chains, each with iter=4000; warmup=200; thin=1; 
post-warmup draws per chain=3800, total post-warmup draws=7600.

         mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
alpha1   4.59    0.02   0.66   3.53   4.14   4.51   4.94   6.12   1148   1.01
alpha2   3.93    0.01   0.48   3.15   3.59   3.89    4.2    5.0   1160    1.0
beta     0.02  1.9e-4 6.4e-3   0.01   0.02   0.02   0.03   0.04   1093   1.01
lp__   2524.9    0.03    1.3 2521.4 2524.3 2525.3 2525.8 2526.4   1740    1.0

Samples were drawn using NUTS at Sun May 13 09:57:35 2018.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).


In [None]:
print("OK, still the fit is not perfect, maybe better for integer values ?")

In [23]:
alpha1, alpha2, beta = int(np.random.uniform(0,20,1)), int(np.random.uniform(0,20,1)), int(np.random.uniform(0,20,1))
N = 1000 # for parameters not too large we recover good values of fitted distribution

print("With this setup we should recover following values of the parameters: \n alpha1 = ", alpha1, "\n alpha2 = ", alpha2, "\n beta = ", beta)

# print("With this setup we should recover following values of the parameters: \n alpha1 = ", df1/2, "\n alpha2 = ", df2/2, "\n beta = ", df2 / df1)

f_michal = f.rvs(2 * alpha1, 2 * alpha2, size=N) / beta / beta  # we create data sampled from Fisher distribution

With this setup we should recover following values of the parameters: 
 alpha1 =  10 
 alpha2 =  19 
 beta =  16


In [24]:
data = dict(N = N,  y = f_michal) 
fit = StanModel(model_code=Fisher).sampling(data=data,iter=4000,warmup=200, chains=2) #we sample from the provided data ;
print(fit)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_2593dbc6fd5eec19bf3b62623c410a5e NOW.
  elif np.issubdtype(np.asarray(v).dtype, float):


Inference for Stan model: anon_model_2593dbc6fd5eec19bf3b62623c410a5e.
2 chains, each with iter=4000; warmup=200; thin=1; 
post-warmup draws per chain=3800, total post-warmup draws=7600.

         mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
alpha1   7.41    0.42   0.42   6.99   6.99   7.41   7.83   7.83      1    nan
alpha2 2.6e12  1.2e12 1.2e12 1.5e12 1.5e12 2.6e12 3.8e12 3.8e12      1 4.8e10
beta    1.5e9   7.5e8  7.5e8  7.7e8  7.7e8  1.5e9  2.3e9  2.3e9      1 4.2e10
lp__   5170.6    13.4   13.4 5157.2 5157.2 5170.6 5183.9 5183.9      1  44.64

Samples were drawn using NUTS at Sun May 13 11:07:53 2018.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
