# Coles TODO : CHECK ALL RESULTS

In [None]:
using DataFrames
using Extremes
using Distributions
using Gadfly

### Model validation functions

`validationplots` is the function that should be called to display all the graphs 

In [None]:
function probabilityplot(z::Array{Float64}, m::Int64, i::Array{Float64}, gev::GeneralizedExtremeValue) #TODO : strucEVA
    G̃ = @. i / (m + 1)
    Ĝ = cdf.(gev, z)

    l1 = layer(x = G̃, y = Ĝ, Geom.point, Theme(default_color="green"))
    l2 = layer(x = 0.0:0.5:1.0, y = 0.0:0.5:1.0, Geom.line)
    
    return plot(l1, l2, Guide.xlabel("Empirical"), Guide.ylabel("Model"), Guide.title("Probability plot"))
end

function quantileplot(z::Array{Float64}, m::Int64, i::Array{Float64}, gev::GeneralizedExtremeValue)
    Ĝ⁻¹ = @. quantile(gev, i / (m + 1))

    l1 = layer(x = Ĝ⁻¹, y = z, Geom.point, Theme(default_color="green"))
    l2 = layer(x = Ĝ⁻¹[1]:(Ĝ⁻¹[m] - Ĝ⁻¹[1])/2:Ĝ⁻¹[m], y = Ĝ⁻¹[1]:(Ĝ⁻¹[m] - Ĝ⁻¹[1])/2:Ĝ⁻¹[m], Geom.line)
    
    return plot(l1, l2, Guide.title("Quantile Plot"), Guide.xlabel("Model"), Guide.ylabel("Empirical"),
        Coord.cartesian(xmin = Ĝ⁻¹[1], ymin = z[1]))
end

function returnlevelplot(data::Array{Float64}, gev::GeneralizedExtremeValue)
    sorted = sort(data)
    n = length(data)
    T = 2:0.2:10
    logT = log.(T)
    p = @. 1 - 1 / T

    q = @. sorted[Int(round(p * n))]
    layers = []
    push!(layers, layer(x = logT, y = q, Geom.point, Theme(default_color="green")))

    shape = [-0.2, 0.0, 0.2]
    for ξ in shape
        pd = GeneralizedExtremeValue(gev.μ, gev.σ, ξ)
        q = quantile.(pd, p)
        push!(layers, layer(x = logT, y = q, Geom.line))
    end
    return plot(layers..., Guide.title("Return Level Plot"), Guide.xlabel("Period"), Guide.ylabel("Level"),
        Coord.cartesian(xmin = logT[1]))
end

function densityplot(gev::GeneralizedExtremeValue, data::Array{Float64}, start::Float64, step::Float64, finish::Float64)
    n = length(data)
    
    function density(z::Float64)
        return sum((data .>= (z - step / 2) * ones(n)) .& (data .<= (z + step / 2) * ones(n)))
    end
    
    zb = (start + step/2):step:(finish + step/2)
    db = density.(zb) * 1 / (n * step)
    lb = layer(x = zb, y = db, Geom.BarGeometry)
    
    zl = start:0.01:finish
    dl = pdf.(gev, zl)
    ll = layer(x = zl, y = dl, Geom.line, Theme(default_color = "Green"))
    
    plot(ll, lb, Guide.title("Density Plot"), Guide.xlabel("z"), Guide.ylabel("f(z)"))
end

function validationplots(data::Array{Float64}, gev::GeneralizedExtremeValue, dstart::Float64, dstep::Float64, dfinish::Float64)
    z = sort(data)
    m = length(z)
    i = collect(1.0:m)
    
    probabilityPlot = probabilityplot(z, m, i, gev)
    quantilePlot = quantileplot(z, m, i, gev)
    returnLevelPlot = returnlevelplot(data, gev)
    densityPlot = densityplot(gev, data, dstart, dstep, dfinish)
    
    gridstack([probabilityPlot quantilePlot; returnLevelPlot densityPlot])
end;

### Utility function

`printparams` prints the parameter estimation for a block maxima structure

In [None]:
function printparams(eva::Extremes.BlockMaxima)
    for (index, value) in pairs(eva.model.paramindex)
        println(index, " : ", eva.θ̂[value])
    end
end;

## 3.4.1 Annual Maximum Sea-levels at Port Pirie

In [None]:
raw = load("portpirie")
df = DataFrame(raw)
data = df[:, :SeaLevel];

In [None]:
gevEVA = gevfit(data)
gev = Extremes.getdistribution(gevEVA)

In [None]:
validationplots(data, gev, 3.5, 0.1, 4.7)

## 3.4.2 Glass Fiber Strength Example 

In [None]:
raw = load("glass")
df = DataFrame(raw)
data = -1 * df[:, :Strength]; # minima

In [None]:
gevEVA = gevfit(data)
gev = Extremes.getdistribution(gevEVA)

In [None]:
validationplots(data, gev, -2.5, 0.15, -0.4)

## 4.4.1 Daily Rainfall Data

In [None]:
raw = load("rain")
df = DataFrame(raw)
data = df[:, :Rainfall]

threshold = 30
exceedances = data[data.>threshold] .- threshold

gpEVA = Extremes.gpfit(exceedances, threshold = [threshold]) # TODO: Export
gp = Extremes.getdistribution(gpEVA)

In [None]:
# TODO: VALIDATION PLOTS

## Dow Jones Index Series

In [None]:
raw = lraw = load("dowjones")
df = DataFrame(raw)
data = df[:, :Index]

# transform
datatrans = copy(data)
push!(datatrans, 0.0)
pushfirst!(data, 0.0)
data = @. log(data) - log(datatrans)
pop!(data)
popfirst!(data)
data = 100 * data

threshold = 2
exceedances = data[data.>threshold] .- threshold

gpEVA = Extremes.gpfit(exceedances, threshold = [threshold]) # TODO : export
gp = Extremes.getdistribution(gpEVA)

In [None]:
# TODO: VALIDATION PLOTS

## 6.3.1 Annual Maximum Sea-levels

In [None]:
quantileχ²₁ = quantile(Chisq(1), 0.95)

### Port Pirie

In [None]:
raw = load("portpirie")
df = DataFrame(raw)
data = df[:, :SeaLevel]

t = collect(1:length(data))

d = Dict(:data => data, :t => t, :n => length(data));

#### Stationnary

In [None]:
gevEVA = gevfit(data)
printparams(gevEVA)
gev = Extremes.getdistribution(gevEVA)

mlogls = sum(@. log(pdf(gev, data)));

#### μ linear

In [None]:
cov = Dict(:μ => [:t], :ϕ => Symbol[], :ξ => Symbol[])
EVA = gevfit(d, :data, Covariate = cov)
printparams(EVA)
linμgev = Extremes.getdistribution(EVA);

mloglμlinear = sum(@. log(pdf(linμgev, data)))

D = 2(mloglμlinear - mlogls)

println()
println("linear μ vs. stationnary μ")
println(D, " < ", quantileχ²₁)
println("Stationnary is a better represantation")

### Fremantle

In [None]:
raw = load("fremantle")
df = DataFrame(raw)
data = df[:, :SeaLevel]
year = df[:, :Year]
soi = df[:, :SOI]

t = collect(1:length(data))
t2 = t.^2

d = Dict(:data => data, :t => t, :t2 => t2, :soi => soi, :n => length(data));

#### Stationnary

In [None]:
gevEVA = gevfit(data)
printparams(gevEVA)
gev = Extremes.getdistribution(gevEVA)

mloglstationnary = sum(@. log(pdf(gev, data)));

#### μ linear

In [None]:
cov = Dict(:μ => [:t], :ϕ => Symbol[], :ξ => Symbol[])
EVA = gevfit(d, :data, Covariate = cov)
printparams(EVA)
linμgev = Extremes.getdistribution(EVA);

mloglμlinear = sum(@. log(pdf(linμgev, data)))

println()
println("linear μ vs. stationnary μ")
D = 2(mloglμlinear - mloglstationnary)
println(D, " > ", quantileχ²₁)
println("Linear is a better representation")

In [None]:
t₁ =  1897

l1 = layer(x = year, y = data)

μs = EVA.θ̂[EVA.model.paramindex[:μ]]
println("μ̂  = ", μs)

β̂₀ = μs[1]
β̂₁ = μs[2]
l = @. β̂₀ + β̂₁ * (year - t₁)

l2 = layer(x = year, y = l, Geom.line, Theme(default_color = "green"))

plot(l2, l1, Coord.cartesian(xmin = year[1]),
    Guide.title("Fitted estimates for μ"), Guide.xlabel("Year"), Guide.ylabel("Sea-level"))

####  μ quadratic

In [None]:
cov = Dict(:μ => [:t, :t2], :ϕ => Symbol[], :ξ => Symbol[])
EVA = gevfit(d, :data, Covariate = cov)
printparams(EVA)
quadμgev = Extremes.getdistribution(EVA);

mloglμquadratic = sum(@. log(pdf(quadμgev, data)))

println()
println("quadratic μ vs. linear μ")
D = 2(mloglμquadratic - mloglμlinear)
println(D, " < ", quantileχ²₁)
println("Linear is a better representation")

#### σ linear

In [None]:
cov = Dict(:μ => [:t], :ϕ => [:t], :ξ => Symbol[])
EVA = gevfit(d, :data, Covariate = cov)
printparams(EVA)
linσgev = Extremes.getdistribution(EVA);

mloglμlinearσlinear = sum(@. log(pdf(linσgev, data)))

println()
println("linear σ vs. stationnary σ")
D = 2(mloglμlinearσlinear - mloglμlinear)
println(D, " < ", quantileχ²₁)
println("Stationnary is a better representation")

#### μ SOI

In [None]:
cov = Dict(:μ => [:soi], :ϕ => Symbol[], :ξ => Symbol[])
EVA = gevfit(d, :data, Covariate = cov)
printparams(EVA)
soiμgev = Extremes.getdistribution(EVA);

mloglSOIμ = sum(@. log(pdf(soiμgev, data)))

println()
println("SOI μ vs. stationnary μ")
D = 2(mloglSOIμ - mloglstationnary)
println(D, " > ", quantileχ²₁)
println("SOI is a better representation")

#### μ linear + SOI

In [None]:
cov = Dict(:μ => [:t, :soi], :ϕ => Symbol[], :ξ => Symbol[])
EVA = gevfit(d, :data, Covariate = cov)
printparams(EVA)
linsoiμgev = Extremes.getdistribution(EVA);

mloglSOIlinearμ = sum(@. log(pdf(linsoiμgev, data)))

println()
println("linear + SOI μ vs. linear μ")
D = 2(mloglSOIlinearμ - mloglμlinear)
println(D, " > ", quantileχ²₁)
println("Linear + SOI is a better representation")

## 6.3.4 Daily Rainfall Data 

In [None]:
raw = load("rain")
df = DataFrame(raw)
data = df[:, :Rainfall];

In [None]:
# TODO : pareto + \sigma linear + deviance statistic