# Bayesian Inference

In [None]:
using Distributions
using Random
using Gadfly
using CSV
using DataFrames

## MCMC for GEV parameter estimation
The data is the values at Saint-Jerôme, with 24h duration.

In [None]:
csv = CSV.read("7037400.csv");
df = DataFrame(csv)
data = df[:, Symbol("24 h")]
n = length(data);

#### Iterations
The parameters μ, σ and ξ will be estimated over k iterations

#### Initial values

μ = 0.0  
ϕ = log(σ)  
ϕ = 0.0  
ξ = 0.0

#### step
μ - 3.0  
ϕ - 0.2  
ξ - 0.2

In [None]:
k = 2000
u = Uniform(0,1)

p = ["μ", "σ", "ξ"]
θs = [[0.0], [0.0], [0.0]]
step = [3.0, 0.2, 0.2]
acc = [zeros(Int64,k), zeros(Int64,k), zeros(Int64,k)];

#### Functions
`getDistribution` returns the distribution for f(x|θ) using latest approximations  
`getMutatedDistribution` returns the distribution for f(x|θ*) with the correct parameter mutated

In [None]:
function getDistribution()
    return GeneralizedExtremeValue(θs[1][end], exp(θs[2][end]), θs[3][end])
end

getMutatedDistribution = [
    mutateValue -> GeneralizedExtremeValue(mutateValue, exp(θs[2][end]), θs[3][end]),
    mutateValue -> GeneralizedExtremeValue(θs[1][end], exp(mutateValue), θs[3][end]),
    mutateValue -> GeneralizedExtremeValue(θs[1][end], exp(θs[2][end]), mutateValue)
];

### Random Step Metropolis Hastings
The step used follows a normal distribution with variance 0.9.
#### Acceptance probability
The values obtained by calculating the probability of acceptance are too close to 0. Multiplying them together leads to instability. The logarithms of theses probabilities are taken and added together to stabilize the numerical values.
#### Prior law
Improper laws:

f(μ) ∝ 1  
f(ϕ) ∝ 1  
f(ξ) ∝ 1

They multiply the probability by 1, or add log(1) = 0 to the log of the probability. Therefore, they do not modify the result and only the likelihood can be used.

In [None]:
for iteration in 2:k
    for i in 1:length(p)
        θ = rand(Normal(θs[i][end], step[i]))
    
        logpP = sum(log.(pdf.(getMutatedDistribution[i](θ), data)))
        logpC = sum(log.(pdf.(getDistribution(), data)))
        
        logα = min(0,logpP - logpC)
        logu = log(rand(u))
        
        if logu < logα
            push!(θs[i], θ)
            acc[i][iteration] = 1
        else
            push!(θs[i], θs[i][end])
        end
    end
end

#### Estimation of parameters
The parameters are estimated using the mean of their values after a certain threshold. This threshold is selected by looking at the graph and selecting the start point where the lines are stable.

In [None]:
start = 250
stableμ = θs[1][start:end]
stableσ = exp.(θs[2][start:end])
stableξ = θs[3][start:end]

μmean =  mean(stableμ)
σmean =  mean(stableσ)
ξmean =  mean(stableξ)

println("μ")
println("Mean : ", μmean)
println("Standard deviation : ", sqrt(var(stableμ)))
println("Acceptance : ", mean(acc[1][start:end]))

println("\nσ")
println("Mean : ", σmean)
println("Standard deviation : ", sqrt(var(stableσ)))
println("Acceptance : ", mean(acc[2][start:end]))

println("\nξ")
println("Mean : ", ξmean)
println("Standard deviation : ", sqrt(var(stableξ)))
println("Acceptance : ", mean(acc[3][start:end]))


layerμ = layer(x = 1:k, y = θs[1], Geom.line, Theme(default_color="green"))
layerσ = layer(x = 1:k, y = exp.(θs[2]), Geom.line, Theme(default_color="purple"))
layerξ = layer(x = 1:k, y = θs[3], Geom.line, Theme(default_color="orange"))

plot(layerμ, layerσ, layerξ, Guide.title("MCMC"),
    Coord.cartesian(ymin = -1),
    Guide.manual_color_key("Parameter", ["μ", "σ", "ξ"], ["green", "purple", "orange"]),
    xintercept=[start], Geom.vline(style=[:solid]))