Compute the validator for all the files for different values of $L$.

In [1]:
using DIVAnd
using PyPlot
using Proj4
using DelimitedFiles
using PyCall
using Dates
using NCDatasets
include("../scripts/PhytoInterp.jl")
include("../scripts/validate_probability.jl")
doplot = true

true

## Files and directories

In [2]:
csvdir = "/data/EMODnet/Biology/phytoplankton/CSV-split/"
datadir = "./data/"
datafilelist = readdir(csvdir);

# List only the analysis files
anaylsisfilelist = [d for d in datafilelist if occursin("analysis", d)]

@info("Working on $(length(anaylsisfilelist)) files");
figdir = "./figures/validation/"
isdir(datadir) ? " " : mkpath(datadir)
isdir(figdir) ? " " : mkpath(figdir);

┌ Info: Working on 200 files
└ @ Main In[2]:8


## Prepare mask

In [3]:
longrid = -2.:0.1:10.
latgrid = 51.:0.1:56.
bathname = joinpath(datadir, "gebco_30sec_4.nc")
bx, by, b = load_bath(bathname, true, longrid, latgrid)
mask, (pm, pn),(xi, yi) = DIVAnd.DIVAnd_rectdom(longrid, latgrid);
xi, yi, mask = DIVAnd.load_mask(bathname, true, longrid, latgrid, 0.0);
xx, yy = ndgrid(xi, yi);

In [4]:
function compute_validator(CLlist, lon, lat, occurs, lonv, latv, occursv)
    
    data_presence = occurs .== 1;
    data_absence = .!(data_presence);
    
    npre = sum(data_presence)
    nabs = sum(data_absence);
    @info("Presence: $(npre), absence: $(nabs)");
    
    inflation = ones(length(lon));
    validatorlist = ones(length(CLlist))
    
    i = 0
    for L in CLlist
        i += 1
        dens2, LHM, LCV, LSCV = DIVAnd_heatmap(mask, (pm,pn), (xx, yy), 
            (lon[data_presence], lat[data_presence]), inflation[data_presence], L, nmax=20000);
        dens3, LHM, LCV, LSCV = DIVAnd_heatmap(mask, (pm,pn), (xx, yy), 
            (lon[data_absence], lat[data_absence]), inflation[data_absence], L, nmax=20000);

        #reldens = dens2 ./ (dens2 .+ dens3);
        reldens = npre .* dens2 ./ (npre .* dens2 .+ nabs .* dens3);

        fieldinterp_valid = reinterp_field(longrid, latgrid, reldens, lonv, latv);

        validator = validate_probability((longrid, latgrid), reldens, (lonv, latv), occursv);
        validatorlist[i] = validator
    end
    return validatorlist
end

compute_validator (generic function with 1 method)

## Main loop

In [5]:
function plot_CL_validator(CLlist::Array, validatorlist::Array, figname)
    fig = PyPlot.figure(figsize=(10, 8))
    plot(CLlist, validatorlist, "ko--")
    xlabel("Correlation length (°)")
    title("Validator ", loc="left", rotation=0, ha="right")
    PyPlot.savefig(figname, dpi=300, bbox_inches="tight")
    PyPlot.close()
end

plot_CL_validator (generic function with 1 method)

In [6]:
CLlist = [0.05, 0.1, 0.5, 1., 1.5, 2., 5.];
validationfile = "../product/validation_scores_weighted.dat"

# Open file and write tested correlatio lengths
open(validationfile, "w") do io
    write(io, "L", "\t", join(CLlist, "\t"), "\n")

    for datafile in anaylsisfilelist
        speciesname = get_species_name(datafile)
        speciesslug = get_species_slug(datafile)
        @info("Working on $(speciesname)")

        validationfile = replace(datafile, "analysis" => "validation")
        if !isfile(joinpath(csvdir, validationfile))
            @error("Validation file not found")
        end

        # Data reading
        dates, lons, lats, occurs = read_data_phyto(joinpath(csvdir, datafile));
        datesv, lonv, latv, occursv = read_data_phyto(joinpath(csvdir, validationfile));

        # Conversion of coordinates
        # We have to go from `EPSG:32361` to `WGS84`.         
        lon, lat = transform_coords(lons, lats)
        lonv, latv = transform_coords(lonv, latv)

        # Compute heatmap
        inflation = ones(length(lon));

        validatorlist = compute_validator(CLlist, lon, lat, occurs, lonv, latv, occursv);

        figname = joinpath(figdir, "$(speciesslug)_validador_weighted")
        plot_CL_validator(CLlist::Array, validatorlist::Array, figname)

        #println("$(speciesname), $(validatorlist)")
        write(io, speciesname, "\t", join(round.(validatorlist, digits=3), "\t"), "\n")

    end # end of the loop
end

┌ Info: Working on Actinocyclus octonarius var. octonarius
└ @ Main In[6]:11
┌ Info: Presence: 1173, absence: 7714
└ @ Main In[4]:8
┌ Info: Working on Actinocyclus senarius
└ @ Main In[6]:11
┌ Info: Presence: 3676, absence: 5894
└ @ Main In[4]:8
┌ Info: Working on Actinocyclus
└ @ Main In[6]:11
┌ Info: Presence: 4281, absence: 7069
└ @ Main In[4]:8
┌ Info: Working on Actinoptychus
└ @ Main In[6]:11
┌ Info: Presence: 882, absence: 14536
└ @ Main In[4]:8
┌ Info: Working on Amphidinium
└ @ Main In[6]:11
┌ Info: Presence: 798, absence: 10747
└ @ Main In[4]:8
┌ Info: Working on Archaeperidinium minutum
└ @ Main In[6]:11
┌ Info: Presence: 1113, absence: 6289
└ @ Main In[4]:8
┌ Info: Working on Archaeperidinium
└ @ Main In[6]:11
┌ Info: Presence: 1042, absence: 7400
└ @ Main In[4]:8
┌ Info: Working on Asterionella glacialis
└ @ Main In[6]:11
┌ Info: Presence: 4232, absence: 14861
└ @ Main In[4]:8
┌ Info: Working on Asterionella kariana
└ @ Main In[6]:11
┌ Info: Presence: 1579, absence: 7945
└

┌ Info: Working on Heterocapsa
└ @ Main In[6]:11
┌ Info: Presence: 5595, absence: 5400
└ @ Main In[4]:8
┌ Info: Working on Katodinium glaucum
└ @ Main In[6]:11
┌ Info: Presence: 3068, absence: 5040
└ @ Main In[4]:8
┌ Info: Working on Katodinium
└ @ Main In[6]:11
┌ Info: Presence: 3036, absence: 7510
└ @ Main In[4]:8
┌ Info: Working on Lauderia annulata
└ @ Main In[6]:11
┌ Info: Presence: 2195, absence: 16586
└ @ Main In[4]:8
┌ Info: Working on Lauderia
└ @ Main In[6]:11
┌ Info: Presence: 2145, absence: 16727
└ @ Main In[4]:8
┌ Info: Working on Leptocylindrus danicus
└ @ Main In[6]:11
┌ Info: Presence: 3232, absence: 15028
└ @ Main In[4]:8
┌ Info: Working on Leptocylindrus minimus
└ @ Main In[6]:11
┌ Info: Presence: 1861, absence: 6670
└ @ Main In[4]:8
┌ Info: Working on Leptocylindrus
└ @ Main In[6]:11
┌ Info: Presence: 4574, absence: 15742
└ @ Main In[4]:8
┌ Info: Working on Leucocryptos marina
└ @ Main In[6]:11
┌ Info: Presence: 1738, absence: 4640
└ @ Main In[4]:8
┌ Info: Working on

┌ Info: Working on Rhizosolenia fragilissima f. fragilissima
└ @ Main In[6]:11
┌ Info: Presence: 1655, absence: 17075
└ @ Main In[4]:8
┌ Info: Working on Rhizosolenia hebetata
└ @ Main In[6]:11
┌ Info: Presence: 941, absence: 8177
└ @ Main In[4]:8
┌ Info: Working on Rhizosolenia imbricata
└ @ Main In[6]:11
┌ Info: Presence: 2044, absence: 7692
└ @ Main In[4]:8
┌ Info: Working on Rhizosolenia setigera
└ @ Main In[6]:11
┌ Info: Presence: 4656, absence: 14993
└ @ Main In[4]:8
┌ Info: Working on Rhizosolenia shrubsolei
└ @ Main In[6]:11
┌ Info: Presence: 3206, absence: 4212
└ @ Main In[4]:8
┌ Info: Working on Rhizosolenia stolterfothii
└ @ Main In[6]:11
┌ Info: Presence: 1169, absence: 5148
└ @ Main In[4]:8
┌ Info: Working on Rhizosolenia styliformis
└ @ Main In[6]:11
┌ Info: Presence: 1559, absence: 15855
└ @ Main In[4]:8
┌ Info: Working on Rhizosolenia
└ @ Main In[6]:11
┌ Info: Presence: 11595, absence: 9901
└ @ Main In[4]:8
┌ Info: Working on Scenedesmus
└ @ Main In[6]:11
┌ Info: Presen