In [None]:
using Distributions, Statistics, CSV, DataFrames, Gadfly, PMP, Cairo, Fontconfig, Optim, Random

This notebook creates multiple samples out of a Pearson type I distribution with a=0 fixed, b = 50, $\alpha$ = 2 and $\beta$ = 2. Then, it fits models to those samples. This notebook tests 6 different sample sizes (100, 500, 1000, 2000, 5000, 8000), with each having 100 samples.

This code is not optimized. The running time can be very long.

---
## Distribution A
### Parameters b=50, $\alpha$=2, $\beta$=2
### Size = 100

In [None]:
d1 = PearsonType1b(50, 2, 2)
taille = 100;
nb = 100;

In [None]:
sample_a = CSV.read("sample-d1-a.csv", DataFrame);

#### Gibbs

In [None]:
# Gibbs
params_a = DataFrame(b = Float64[], α = Float64[], β = Float64[])

for i in 1:100
    fit = fit_bayes_MH(PearsonType1b, sample_a[!,i])
    estimation = DataFrame(b=mean(fit[1]), α=mean(fit[2]), β=mean(fit[3]))
    append!(params_a, estimation)
    print(i)
end

df11 = DataFrame(taille = 100, b = mean(params_a.b), α = mean(params_a.α), β = mean(params_a.β))

quantile_b = quantile(params_a.b, [.025, .975])
quantile_α = quantile(params_a.α, [.025, .975])
quantile_β = quantile(params_a.β, [.025, .975])

df11.bq2_5 = [quantile_b[1]]
df11.bq97_5 = [quantile_b[2]]

df11.αq2_5 = [quantile_α[1]]
df11.αq97_5 = [quantile_α[2]]

df11.βq2_5 = [quantile_β[1]]
df11.βq97_5 = [quantile_β[2]]

df11

#### NUTS
Doesn't converge for all samples

In [None]:

params_an = DataFrame(b = Float64[], α = Float64[], β = Float64[])

for i in 1:100
    fit = fit_bayes(PearsonType1b, sample_a[!,i])
    estimation = DataFrame(b=mean(fit[1]), α=mean(fit[2]), β=mean(fit[3]))
    append!(params_an, estimation)
    print(i)
end

df11n = DataFrame(taille = 100, b = mean(params_an.b), α = mean(params_an.α), β = mean(params_an.β))

quantile_b = quantile(params_an.b, [.025, .975])
quantile_α = quantile(params_an.α, [.025, .975])
quantile_β = quantile(params_an.β, [.025, .975])

df11n.bq2_5 = [quantile_b[1]]
df11n.bq97_5 = [quantile_b[2]]

df11n.αq2_5 = [quantile_α[1]]
df11n.αq97_5 = [quantile_α[2]]

df11n.βq2_5 = [quantile_β[1]]
df11n.βq97_5 = [quantile_β[2]]

df11n


---
### Size = 500

In [None]:
taille = 500
sample_b = CSV.read("sample-d1-b.csv", DataFrame);

#### Gibbs

In [None]:
params_b = DataFrame(b = Float64[], α = Float64[], β = Float64[])

for i in 1:100
    fit = fit_bayes_MH(PearsonType1b, sample_b[!,i])
    estimation = DataFrame(b=mean(fit[1]), α=mean(fit[2]), β=mean(fit[3]))
    append!(params_b, estimation)
    print(i)
end

df12 = DataFrame(taille = 500, b = mean(params_b.b), α = mean(params_b.α), β = mean(params_b.β))

quantile_b = quantile(params_b.b, [.025, .975])
quantile_α = quantile(params_b.α, [.025, .975])
quantile_β = quantile(params_b.β, [.025, .975])

df12.bq2_5 = [quantile_b[1]]
df12.bq97_5 = [quantile_b[2]]

df12.αq2_5 = [quantile_α[1]]
df12.αq97_5 = [quantile_α[2]]

df12.βq2_5 = [quantile_β[1]]
df12.βq97_5 = [quantile_β[2]]

df12

#### NUTS

In [None]:
params_bn = DataFrame(b = Float64[], α = Float64[], β = Float64[])

for i in 1:100
    fit = fit_bayes(PearsonType1b, sample_b[!, i])
    estimation = DataFrame(b=mean(fit[1]), α=mean(fit[2]), β=mean(fit[3]))
    append!(params_bn, estimation)
    print(i)
end

df12n = DataFrame(taille = 500, b = mean(params_bn.b), α = mean(params_bn.α), β = mean(params_bn.β))

quantile_b = quantile(params_bn.b, [.025, .975])
quantile_α = quantile(params_bn.α, [.025, .975])
quantile_β = quantile(params_bn.β, [.025, .975])

df12n.bq2_5 = [quantile_b[1]]
df12n.bq97_5 = [quantile_b[2]]

df12n.αq2_5 = [quantile_α[1]]
df12n.αq97_5 = [quantile_α[2]]

df12n.βq2_5 = [quantile_β[1]]
df12n.βq97_5 = [quantile_β[2]]

df12n


---
### Size = 1000

In [None]:
taille = 1000
sample_c = CSV.read("sample-d1-c.csv", DataFrame);

#### Gibbs

In [None]:
params_c = DataFrame(b = Float64[], α = Float64[], β = Float64[])

for i in 1:100
    fit = fit_bayes_MH(PearsonType1b, sample_c[!,i])
    estimation = DataFrame(b=mean(fit[1]), α=mean(fit[2]), β=mean(fit[3]))
    append!(params_c, estimation)
    print(i)
end

df13 = DataFrame(taille = 1000, b = mean(params_c.b), α = mean(params_c.α), β = mean(params_c.β))

quantile_b = quantile(params_c.b, [.025, .975])
quantile_α = quantile(params_c.α, [.025, .975])
quantile_β = quantile(params_c.β, [.025, .975])

df13.bq2_5 = [quantile_b[1]]
df13.bq97_5 = [quantile_b[2]]

df13.αq2_5 = [quantile_α[1]]
df13.αq97_5 = [quantile_α[2]]

df13.βq2_5 = [quantile_β[1]]
df13.βq97_5 = [quantile_β[2]]

df13b

#### NUTS

In [None]:
params_cn = DataFrame(b = Float64[], α = Float64[], β = Float64[])

for i in 1:100
    fit = fit_bayes(PearsonType1b, sample_c[!,i])
    estimation = DataFrame(b=mean(fit[1]), α=mean(fit[2]), β=mean(fit[3]))
    append!(params_cn, estimation)
    print(i)
end

df13n = DataFrame(taille = 1000, b = mean(params_cn.b), α = mean(params_cn.α), β = mean(params_cn.β))

quantile_b = quantile(params_cn.b, [.025, .975])
quantile_α = quantile(params_cn.α, [.025, .975])
quantile_β = quantile(params_cn.β, [.025, .975])

df13n.bq2_5 = [quantile_b[1]]
df13n.bq97_5 = [quantile_b[2]]

df13n.αq2_5 = [quantile_α[1]]
df13n.αq97_5 = [quantile_α[2]]

df13n.βq2_5 = [quantile_β[1]]
df13n.βq97_5 = [quantile_β[2]]

df13n


---
### Size = 2000

In [None]:
taille = 2000
sample_d = CSV.read("sample-d1-d.csv", DataFrame);

#### Gibbs

In [None]:
params_d = DataFrame(b = Float64[], α = Float64[], β = Float64[])

for i in 1:100
    fit = fit_bayes_MH(PearsonType1b, sample_d[!,i])
    estimation = DataFrame(b=mean(fit[1]), α=mean(fit[2]), β=mean(fit[3]))
    append!(params_d, estimation)
    print(i)
end

df14 = DataFrame(taille = 2000, b = mean(params_d.b), α = mean(params_d.α), β = mean(params_d.β))

quantile_b = quantile(params_d.b, [.025, .975])
quantile_α = quantile(params_d.α, [.025, .975])
quantile_β = quantile(params_d.β, [.025, .975])

df14.bq2_5 = [quantile_b[1]]
df14.bq97_5 = [quantile_b[2]]

df14.αq2_5 = [quantile_α[1]]
df14.αq97_5 = [quantile_α[2]]

df14.βq2_5 = [quantile_β[1]]
df14.βq97_5 = [quantile_β[2]]

df14

#### NUTS

In [None]:
params_dn = DataFrame(b = Float64[], α = Float64[], β = Float64[])

for i in 1:100
    fit = fit_bayes(PearsonType1b, sample_d[!,i])
    estimation = DataFrame(b=mean(fit[1]), α=mean(fit[2]), β=mean(fit[3]))
    append!(params_dn, estimation)
    print(i)
end

df14n = DataFrame(taille = 2000, b = mean(params_dn.b), α = mean(params_dn.α), β = mean(params_dn.β))

quantile_b = quantile(params_dn.b, [.025, .975])
quantile_α = quantile(params_dn.α, [.025, .975])
quantile_β = quantile(params_dn.β, [.025, .975])

df14n.bq2_5 = [quantile_b[1]]
df14n.bq97_5 = [quantile_b[2]]

df14n.αq2_5 = [quantile_α[1]]
df14n.αq97_5 = [quantile_α[2]]

df14n.βq2_5 = [quantile_β[1]]
df14n.βq97_5 = [quantile_β[2]]

df14n


---
### Size = 5000

In [None]:
taille = 5000
sample_e = CSV.read("sample-d1-e.csv", DataFrame);

#### Gibbs

In [None]:
params_e = DataFrame(b = Float64[], α = Float64[], β = Float64[])

for i in 1:100
    fit = fit_bayes_MH(PearsonType1b, sample_e[!,i])
    estimation = DataFrame(b=mean(fit[1]), α=mean(fit[2]), β=mean(fit[3]))
    append!(params_e, estimation)
    print(i)
end

df15 = DataFrame(taille = 5000, b = mean(params_e.b), α = mean(params_e.α), β = mean(params_e.β))

quantile_b = quantile(params_e.b, [.025, .975])
quantile_α = quantile(params_e.α, [.025, .975])
quantile_β = quantile(params_e.β, [.025, .975])

df15.bq2_5 = [quantile_b[1]]
df15.bq97_5 = [quantile_b[2]]

df15.αq2_5 = [quantile_α[1]]
df15.αq97_5 = [quantile_α[2]]

df15.βq2_5 = [quantile_β[1]]
df15.βq97_5 = [quantile_β[2]]

df15

#### NUTS

In [None]:
params_en = DataFrame(b = Float64[], α = Float64[], β = Float64[])

for i in 1:100
    fit = fit_bayes(PearsonType1b, sample_e[!,i])
    estimation = DataFrame(b=mean(fit[1]), α=mean(fit[2]), β=mean(fit[3]))
    append!(params_en, estimation)
    print(i)
end

df15n = DataFrame(taille = 5000, b = mean(params_en.b), α = mean(params_en.α), β = mean(params_en.β))

quantile_b = quantile(params_en.b, [.025, .975])
quantile_α = quantile(params_en.α, [.025, .975])
quantile_β = quantile(params_en.β, [.025, .975])

df15n.bq2_5 = [quantile_b[1]]
df15n.bq97_5 = [quantile_b[2]]

df15n.αq2_5 = [quantile_α[1]]
df15n.αq97_5 = [quantile_α[2]]

df15n.βq2_5 = [quantile_β[1]]
df15n.βq97_5 = [quantile_β[2]]

df15n


---
### Size = 8000

In [None]:
taille = 8000
sample_f = CSV.read("sample-d1-f.csv", DataFrame);

#### Gibbs

In [None]:
params_f = DataFrame(b = Float64[], α = Float64[], β = Float64[])

for i in 1:100
    fit = fit_bayes_MH(PearsonType1b, sample_f[!,i])
    estimation = DataFrame(b=mean(fit[1]), α=mean(fit[2]), β=mean(fit[3]))
    append!(params_f, estimation)
    print(i)
end

df16 = DataFrame(taille = 8000, b = mean(params_f.b), α = mean(params_f.α), β = mean(params_f.β))

quantile_b = quantile(params_f.b, [.025, .975])
quantile_α = quantile(params_f.α, [.025, .975])
quantile_β = quantile(params_f.β, [.025, .975])

df16.bq2_5 = [quantile_b[1]]
df16.bq97_5 = [quantile_b[2]]

df16.αq2_5 = [quantile_α[1]]
df16.αq97_5 = [quantile_α[2]]

df16.βq2_5 = [quantile_β[1]]
df16.βq97_5 = [quantile_β[2]]

df16

#### NUTS

In [None]:
params_fn = DataFrame(b = Float64[], α = Float64[], β = Float64[])

for i in 1:100
    fit = PMP.fit_bayes(PearsonType1b, sample_f[!,i])
    estimation = DataFrame(b=mean(fit[1]), α=mean(fit[2]), β=mean(fit[3]))
    append!(params_fn, estimation)
    print(i)
end

df16n = DataFrame(taille = 8000, b = mean(params_fn.b), α = mean(params_fn.α), β = mean(params_fn.β))

quantile_b = quantile(params_fn.b, [.025, .975])
quantile_α = quantile(params_fn.α, [.025, .975])
quantile_β = quantile(params_fn.β, [.025, .975])

df16n.bq2_5 = [quantile_b[1]]
df16n.bq97_5 = [quantile_b[2]]

df16n.αq2_5 = [quantile_α[1]]
df16n.αq97_5 = [quantile_α[2]]

df16n.βq2_5 = [quantile_β[1]]
df16n.βq97_5 = [quantile_β[2]]

df16n


---
## Graphics
#### Gibbs

In [None]:
df1 = append!(df11, df12)
append!(df1, df13)
append!(df1, df14)
append!(df1, df15)
append!(df1, df16)

p1 = plot(df1, x=:taille, y=:b, Geom.line, Geom.point, 
            layer(yintercept=[50], Theme(default_color=color("black")), Geom.hline(style=:dot)), 
            layer(x=:taille, y=:bq2_5, Geom.line, Theme(default_color=color("red"))),
            layer(x=:taille, y=:bq97_5, Geom.line, Theme(default_color=color("red"))),
    Guide.Title("Gibbs"), Guide.xlabel("Size"),
    Coord.cartesian(ymin=45, ymax=75, xmin=90),
    style(major_label_font_size=16pt, minor_label_font_size=14pt))

#### NUTS

In [None]:
df1n = append!(df11n, df12n)
append!(df1n, df13n)
append!(df1n, df14n)
append!(df1n, df15n)
append!(df1n, df16n)

p1n = plot(df1n, x=:taille, y=:b, Geom.line, Geom.point, 
            layer(yintercept=[50], Theme(default_color=color("black")), Geom.hline(style=:dot)), 
            layer(x=:taille, y=:bq2_5, Geom.line, Theme(default_color=color("red"))),
            layer(x=:taille, y=:bq97_5, Geom.line, Theme(default_color=color("red"))),
    Guide.Title("NUTS"), Guide.xlabel("Size"),
    Coord.cartesian(ymin=45, ymax=75, xmin=90),
    style(major_label_font_size=16pt, minor_label_font_size=14pt))