# GEV
## Data : Saint-Jérôme, 24h

In [None]:
using CSV
using DataFrames
using Extremes
using Distributions
using Gadfly

### Data loading

In [None]:
csv = CSV.read("7037400.csv");

In [None]:
df = DataFrame(csv)
d = df[:, Symbol("24 h")];

### Model validation functions

`getModelValidationPlots` is the function that should be called to display all the graphs 

In [None]:
function getProbabilityPlot(z::Array{Float64}, m::Int64, i::Array{Float64}, gev::GeneralizedExtremeValue)
    G̃ = @. i / (m + 1)
    Ĝ = @. ℯ ^ (-(1 + gev.ξ * (z - gev.μ) / gev.σ) ^ (-1/gev.ξ))

    l1 = layer(x = G̃, y = Ĝ, Geom.point, Theme(default_color="green"))
    l2 = layer(x = 0:0.5:1, y = 0:0.5:1, Geom.line)
    
    return plot(l1, l2, Guide.xlabel("Empirical"), Guide.ylabel("Model"), Guide.title("Probability plot"))
end

function getQuantilePlot(z::Array{Float64}, m::Int64, i::Array{Float64}, gev::GeneralizedExtremeValue)
    Ĝ⁻¹ = @. gev.μ - gev.σ / gev.ξ * (1 - (-log(i / (m + 1))) ^ (-gev.ξ))

    l1 = layer(x = Ĝ⁻¹, y = z, Geom.point, Theme(default_color="green"))
    l2 = layer(x = Ĝ⁻¹[1]:(Ĝ⁻¹[m] - Ĝ⁻¹[1])/2:Ĝ⁻¹[m], y = Ĝ⁻¹[1]:(Ĝ⁻¹[m] - Ĝ⁻¹[1])/2:Ĝ⁻¹[m], Geom.line)
    
    return plot(l1, l2, Guide.title("Quantile Plot"), Guide.xlabel("Model"), Guide.ylabel("Empirical"),
        Coord.cartesian(xmin = Ĝ⁻¹[1], ymin = z[1]))
end

function getReturnLevelPlot(gev::GeneralizedExtremeValue)
    T = 2:10
    logT = log.(T)
    p = @. 1 - 1 / T

    q = quantile.(gev, p)
    layers = []
    push!(layers, layer(x = logT, y = q, Geom.point, Theme(default_color="green")))

    shape = [-0.2, 0.0, 0.2]
    for ξ in shape
        pd = GeneralizedExtremeValue(gev.μ, gev.σ, ξ)
        q = quantile.(pd, p)
        push!(layers, layer(x = logT, y = q, Geom.line))
    end
    return plot(layers..., Guide.title("Return Level Plot"), Guide.xlabel("Return Period"), Guide.ylabel("Return Level"),
        Coord.cartesian(xmin = logT[1]))
end

function getModelValidationPlots(data::Array{Float64}, gev::GeneralizedExtremeValue)
    z = sort(data)
    m = length(z)
    i = collect(1.0:m)
    
    probabilityPlot = getProbabilityPlot(z, m, i, gev)
    quantilePlot = getQuantilePlot(z, m, i, gev)
    returnLevelPlot = getReturnLevelPlot(gev)
    
    hstack(probabilityPlot, quantilePlot, returnLevelPlot)
end;

### Using Extremes library to estimate GEV params

#### Maximum Likelihood

In [None]:
gevML = gevfit(d)

In [None]:
getModelValidationPlots(d, gevML)

#### Bayesian Inference

In [None]:
gevBs = gevfitbayes(d, stepSize = [3.5, 0.2, 0.15])
μ = mean([g.μ for g in gevBs])
σ = mean([g.σ for g in gevBs])
ξ = mean([g.ξ for g in gevBs])
gevB = GeneralizedExtremeValue(μ, σ, ξ)

In [None]:
getModelValidationPlots(d, gevB)

### Interpretation

In [None]:
T = 2:10
p = @. 1 - 1 / T

index = 1
print("Every year, the value $(quantile.(gevML, p[index])) has $((1 - p[index]) * 100)% chance of being exceeded")
print("\nor\n")
print("The value $(quantile.(gevML, p[index])) should be exceeded on average every $(T[index]) years")