# Coles

In [None]:
using CSV
using DataFrames
using Extremes
using Distributions
using Gadfly

### Model validation functions

`validationplots` is the function that should be called to display all the graphs 

In [None]:
function probabilityplot(z::Array{Float64}, m::Int64, i::Array{Float64}, gev::GeneralizedExtremeValue)
    G̃ = @. i / (m + 1)
    Ĝ = @. ℯ ^ (-(1 + gev.ξ * (z - gev.μ) / gev.σ) ^ (-1/gev.ξ))

    l1 = layer(x = G̃, y = Ĝ, Geom.point, Theme(default_color="green"))
    l2 = layer(x = 0.0:0.5:1.0, y = 0.0:0.5:1.0, Geom.line)
    
    return plot(l1, l2, Guide.xlabel("Empirical"), Guide.ylabel("Model"), Guide.title("Probability plot"))
end

function quantileplot(z::Array{Float64}, m::Int64, i::Array{Float64}, gev::GeneralizedExtremeValue)
    Ĝ⁻¹ = @. gev.μ - gev.σ / gev.ξ * (1 - (-log(i / (m + 1))) ^ (-gev.ξ))

    l1 = layer(x = Ĝ⁻¹, y = z, Geom.point, Theme(default_color="green"))
    l2 = layer(x = Ĝ⁻¹[1]:(Ĝ⁻¹[m] - Ĝ⁻¹[1])/2:Ĝ⁻¹[m], y = Ĝ⁻¹[1]:(Ĝ⁻¹[m] - Ĝ⁻¹[1])/2:Ĝ⁻¹[m], Geom.line)
    
    return plot(l1, l2, Guide.title("Quantile Plot"), Guide.xlabel("Model"), Guide.ylabel("Empirical"),
        Coord.cartesian(xmin = Ĝ⁻¹[1], ymin = z[1]))
end

function returnlevelplot(data::Array{Float64}, gev::GeneralizedExtremeValue)
    sorted = sort(data)
    n = length(data)
    T = 2:0.2:10
    logT = log.(T)
    p = @. 1 - 1 / T

    q = @. sorted[Int(round(p * n))]
    layers = []
    push!(layers, layer(x = logT, y = q, Geom.point, Theme(default_color="green")))

    shape = [-0.2, 0.0, 0.2]
    for ξ in shape
        pd = GeneralizedExtremeValue(gev.μ, gev.σ, ξ)
        q = quantile.(pd, p)
        push!(layers, layer(x = logT, y = q, Geom.line))
    end
    return plot(layers..., Guide.title("Return Level Plot"), Guide.xlabel("Period"), Guide.ylabel("Level"),
        Coord.cartesian(xmin = logT[1]))
end

function densityplot(gev::GeneralizedExtremeValue, data::Array{Float64}, start::Float64, step::Float64, finish::Float64)
    n = length(data)
    
    function density(z::Float64)
        return sum((data .>= (z - step / 2) * ones(n)) .& (data .<= (z + step / 2) * ones(n)))
    end
    
    zb = (start + step/2):step:(finish + step/2)
    db = density.(zb) * 1 / (n * step)
    lb = layer(x = zb, y = db, Geom.BarGeometry)
    
    zl = start:0.01:finish
    dl = pdf.(gev, zl)
    ll = layer(x = zl, y = dl, Geom.line, Theme(default_color = "Green"))
    
    plot(ll, lb, Guide.title("Density Plot"), Guide.xlabel("z"), Guide.ylabel("f(z)"))
end

function validationplots(data::Array{Float64}, gev::GeneralizedExtremeValue, dstart::Float64, dstep::Float64, dfinish::Float64)
    z = sort(data)
    m = length(z)
    i = collect(1.0:m)
    
    probabilityPlot = probabilityplot(z, m, i, gev)
    quantilePlot = quantileplot(z, m, i, gev)
    returnLevelPlot = returnlevelplot(data, gev)
    densityPlot = densityplot(gev, data, dstart, dstep, dfinish)
    
    gridstack([probabilityPlot quantilePlot; returnLevelPlot densityPlot])
end;

## 3.4.1 Annual Maximum Sea-levels at Port Pirie

In [None]:
csv = CSV.read("portpirie.csv")
df = DataFrame(csv)
data = df[:, :SeaLevel];

In [None]:
gevEVA = gevfit(data)
gev = Extremes.getdistribution(gevEVA)

In [None]:
validationplots(data, gev, 3.5, 0.1, 4.7)

## 3.4.2 Glass Fiber Strength Example 

In [None]:
csv = CSV.read("glass.csv", header=false)
df = DataFrame(csv)
data = -1 * df[:, :Column1]; # minima

In [None]:
gevEVA = gevfit(data)
gev = Extremes.getdistribution(gevEVA)

In [None]:
validationplots(data, gev, -2.5, 0.15, -0.4)

## 6.3.1 Annual Maximum Sea-levels

In [None]:
quantileχ²₁ = quantile(Chisq(1), 0.95)

### Port Pirie

In [None]:
csv = CSV.read("portpirie.csv")
df = DataFrame(csv)
data = df[:, :SeaLevel];

In [None]:
gevEVA = gevfit(data)
gev = Extremes.getdistribution(gevEVA)

mlogls = sum(@. log(pdf(gev, data)))
mloglμlinear = 4.37 # TODO μ linear

D = 2(mloglμlinear - mlogls)

println("linear μ vs. stationnary μ")
println(D, " < ", quantileχ²₁, " - X")

### Fremantle

In [None]:
csv = CSV.read("fremantle.csv")
df = DataFrame(csv)
data = df[:, :SeaLevel];
year = df[:, :Year];
soi = df[:, :SOI];

In [None]:
gevEVA = gevfit(data)
gev = Extremes.getdistribution(gevEVA)

# μ linear

mloglstationnary = sum(@. log(pdf(gev, data)))
mloglμlinear = 49.9  # TODO μ linear

D = 2(mloglμlinear - mloglstationnary)
println("linear μ vs. stationnary μ")
println(D, " > ", quantileχ²₁, " - ✓")

# μ quadratic

mloglμquadratic = 50.6 # TODO μ quadratic

D = 2(mloglμquadratic - mloglμlinear)
println("\nquadratic μ vs. linear μ")
println(D, " < ", quantileχ²₁, " - X")

# σ linear

mloglμlinearσlinear = 50.7 # TODO μ linear, σ quadratic

D = 2(mloglμlinearσlinear - mloglμlinear)
println("\nlinear σ vs. stationnary σ")
println(D, " < ", quantileχ²₁, " - X")
# TODO : print params

# μ SOI

mloglSOIμ = 47.2 # TODO μ SOI

D = 2(mloglSOIμ - mloglstationnary)
println("\nSOI μ vs. stationnary μ")
println(D, " > ", quantileχ²₁, " - ✓")

# μ linear + SOI

mloglSOIlinearμ = 53.9 # TODO μ linear + SOI

D = 2(mloglSOIlinearμ - mloglμlinear)
println("\nlinear + SOI μ vs. linear μ")
println(D, " > ", quantileχ²₁, " - ✓")
# TODO : print params

#### Linear case

In [None]:
t₁ =  1897

l1 = layer(x = year, y = data)

β̂₀ = 1.38 # TODO μ linear
β̂₁ = 0.00203 # TODO μ linear
l = @. β̂₀ + β̂₁ * (year - t₁)

l2 = layer(x = year, y = l, Geom.line, Theme(default_color = "green"))

plot(l2, l1, Coord.cartesian(xmin = year[1]),
    Guide.title("Fitted estimates for μ"), Guide.xlabel("Year"), Guide.ylabel("Sea-level"))

## 6.3.2 Race Time Data

In [None]:
# TODO : Find data...

## 6.3.4 Daily Rainfall Data 

In [None]:
csv = CSV.read("rain.csv", header=false)
df = DataFrame(csv)
data = df[:, :Column1];

In [None]:
# TODO : pareto + \sigma linear + deviance statistic

## 6.3.5 Wooster Temperature Data

In [None]:
csv = CSV.read("wooster.csv", header=false)
df = DataFrame(csv)
data = -1 * df[:, :Column1]; # minima

In [None]:
# TODO : seasons? keep ξ same but calculate individuals μ and σ