This notebook is designed to create the _benthos_ interpolated maps using `DIVAnd`.      
The data file was prepared by P. Herman (Deltares).

In [23]:
using DIVAnd
using PyPlot
using Proj4
using DelimitedFiles
using PyCall
using Dates
using NCDatasets
using Pkg
include("../scripts/BenthosInterp.jl")
Pkg.status("DIVAnd")

doplot = false       # set to 'true' to create the plots
doplotdata = true    # set to 'true' to plot the observations
usecartopy = true    # set to 'true' if plots are created using Cartopy
writenc = true;     # set to 'true' to write netCDF files with the results

[32m[1mStatus[22m[39m `~/.julia/environments/v1.5/Project.toml`
 [90m [efc8151c] [39m[37mDIVAnd v2.6.5[39m


## Files and directories

In [2]:
figdir = "../product/figures/1-UniformL/"
outputdir = "../product/netCDF/"
datadir = "../data/"
datafile = joinpath(datadir, "specs4Diva.csv")
isdir(figdir) ? "Figure directory already exists" : mkpath(figdir)
isdir(outputdir) ? "Output directory already exists" : mkpath(outputdir)
isfile(datafile) ? @info("File already downloaded") : download("https://dox.ulg.ac.be/index.php/s/vNQcvqjW8RzdNBt/download", datafile)

┌ Info: File already downloaded
└ @ Main In[2]:7


In [3]:
domain = [-16., 9., 45., 66.]; # [West East South North]
Δlon = 0.1
Δlat = 0.1

0.1

## Prepare mask
### Interpolation grid

In [4]:
longrid = domain[1]:Δlon:domain[2]
latgrid = domain[3]:Δlat:domain[4]

45.0:0.1:66.0

### Download bathymetry file

In [5]:
bathname = joinpath(datadir, "gebco_30sec_4.nc")
if !isfile(bathname)
    download("https://dox.ulg.ac.be/index.php/s/RSwm4HPHImdZoQP/download", bathname)
else
    @info("Bathymetry file already downloaded")
end

┌ Info: Bathymetry file already downloaded
└ @ Main In[5]:5


### Read bathymetry

In [6]:
bx, by, b = load_bath(bathname, true, longrid, latgrid)
@show size(b)

if doplot
    fig = PyPlot.figure()
    ax = PyPlot.subplot(111)
    pcolor(bx,by,b', vmin=0., cmap=PyPlot.cm.gist_earth); 
    colorbar(orientation="vertical")
    title("Depth (m)")
    savefig(joinpath(figdir, "benthos_bathy.jpg"), dpi=300, bbox_inches="tight")
    show()
end

size(b) = (251, 211)


### Metrics

In [7]:
_, (pm, pn),(xi, yi) = DIVAnd.DIVAnd_rectdom(longrid, latgrid);
xi, yi, mask = DIVAnd.load_mask(bathname, true, longrid, latgrid, 0.0);
xx, yy = ndgrid(xi, yi);

## Interpolation
### Data reading

Loop on all the species: 
1. read the data
2. compute the heatmaps and 
3. derive the probability field as:
```
d = npre * dens2 / (npre * dens_pre + nabs * dens_abs)
```
where 
* dens_pre is the heatmap obtained with the presence data only; 
* dens_abs is the heatmap obtained with the absence data only. 

The reason for this equation is that the heatmap are computed so that their integral over the domain is 1, whatever the number of observations. 

In [8]:
# Set correlation length
Lvalues = [0.01, 0.05, 0.1, 0.5, 1.]
L = 0.1

0.1

In [32]:
include("../scripts/BenthosInterp.jl")

write_nc_error

In [34]:
speciesnamelist = get_species_list(datafile)
for species in speciesnamelist

    speciesslug = get_species_slug(String(species))
    
    @info(speciesslug)
    @info("Working on species $(String(species))");
    lon_pre, lat_pre, lon_abs, lat_abs = read_coords_species(datafile, species);
    npre = length(lon_pre)
    nabs = length(lon_abs)

    @info("Number of presence: $(npre), number of absence: $(nabs)")
    

    # Plot the data locations
    if doplotdata
        make_plot_presence_absence(lon_pre, lat_pre, lon_abs, lat_abs, String(species),
            dlat=4., dlon=6.,
            figname=joinpath(figdir, "$(speciesslug)_data.jpg"), usecartopy=true)
    end
    
    
    @info("Computing heatmaps")
    dens_pre, LHM2, LCV2, LSCV2 = DIVAnd_heatmap(mask, (pm,pn), (xx, yy), 
        (lon_pre, lat_pre), ones(npre), L);
    dens_abs, LHM3, LCV3, LSCV3 = DIVAnd_heatmap(mask, (pm,pn), (xx, yy), 
        (lon_abs, lat_abs), ones(nabs), L);


    d = npre .* dens_pre ./ (npre .* dens_pre .+ nabs .* dens_abs);

    @info("Computing error field with CPME")
    lon = [lon_pre ; lon_abs]
    lat = [lat_pre ; lat_abs]

    cpme = DIVAnd_cpme(mask, (pm, pn), (xx, yy), (lon, lat), 
        ones(length(lon)), 0.5, 5.);

    
    if doplot
        plot_heatmap(longrid, latgrid, d, lon_pre, lat_pre, lon_abs, lat_abs,
            "$(species): probability", figname=joinpath(figdir, "$(speciesslug)_density.jpßg"), 
            usecartopy=usecartopy)            
        #plot_error(longrid, latgrid, cpme, "$(species)", 
        #    joinpath(figdir, "$(speciesslug)_error.png"))
    end   

    if writenc
        @info("Creating the netCDF file with results")
        create_nc_results(joinpath(outputdir, "$(speciesslug)_density.nc"), 
            longrid, latgrid, d, String(species), domain=domain);

        @info("Adding error field to netCDF file")
        write_nc_error(joinpath(outputdir, "$(speciesslug)_density.nc"), cpme);
    end


end

┌ Info: Abra_alba
└ @ Main In[34]:6
┌ Info: Working on species Abra_alba
└ @ Main In[34]:7
┌ Info: Column index for Abra_alba: 5
└ @ Main /home/ctroupin/Projects/EMODnet/EMODnet-Biology-Benthos-Interpolated-Maps/scripts/BenthosInterp.jl:21
┌ Info: Number of presence: 14068, number of absence: 65977
└ @ Main In[34]:12


In [40]:
outputdir

"../product/netCDF/"

Create a new field that takes into account the error field:
* if error is zero, we take the gridded field as it is;
* if error is close to 1, it means we don't have observations so the new field is zero (assuming that we don't have observations means there is nothing to observe);
* if error is between 0 and 1, we calculate the field as 

## Variable correlation length

In [30]:
?NCDatasets.defVar

```
defVar(ds::NCDataset,name,vtype,dimnames; kwargs...)
defVar(ds::NCDataset,name,data,dimnames; kwargs...)
```

Define a variable with the name `name` in the dataset `ds`.  `vtype` can be Julia types in the table below (with the corresponding NetCDF type). The parameter `dimnames` is a tuple with the names of the dimension.  For scalar this parameter is the empty tuple `()`. The variable is returned (of the type CFVariable).

Instead of providing the variable type one can directly give also the data `data` which will be used to fill the NetCDF variable. In this case, the dimensions with the appropriate size will be created as required using the names in `dimnames`.

If `data` is a vector or array of `DateTime` objects, then the dates are saved as double-precision floats and units "days since 1900-01-01 00:00:00" (unless a time unit is specifed with the `attrib` keyword as described below). Dates are converted to the default calendar in the CF conversion which is the mixed Julian/Gregorian calendar.

## Keyword arguments

  * `fillvalue`: A value filled in the NetCDF file to indicate missing data.  It will be stored in the _FillValue attribute.
  * `chunksizes`: Vector integers setting the chunk size. The total size of a chunk must be less than 4 GiB.
  * `deflatelevel`: Compression level: 0 (default) means no compression and 9 means maximum compression. Each chunk will be compressed individually.
  * `shuffle`: If true, the shuffle filter is activated which can improve the compression ratio.
  * `checksum`: The checksum method can be `:fletcher32` or `:nochecksum` (checksumming is disabled, which is the default)
  * `attrib`: An iterable of attribute name and attribute value pairs, for example a `Dict`, `DataStructures.OrderedDict` or simply a vector of pairs (see example below)
  * `typename` (string): The name of the NetCDF type required for [vlen arrays](https://web.archive.org/save/https://www.unidata.ucar.edu/software/netcdf/netcdf-4/newdocs/netcdf-c/nc_005fdef_005fvlen.html)

`chunksizes`, `deflatelevel`, `shuffle` and `checksum` can only be set on NetCDF 4 files.

## NetCDF data types

| NetCDF Type | Julia Type |
| -----------:| ----------:|
|     NC_BYTE |       Int8 |
|    NC_UBYTE |      UInt8 |
|    NC_SHORT |      Int16 |
|      NC_INT |      Int32 |
|    NC_INT64 |      Int64 |
|    NC_FLOAT |    Float32 |
|   NC_DOUBLE |    Float64 |
|     NC_CHAR |       Char |
|   NC_STRING |     String |

## Example:

In this example, `scale_factor` and `add_offset` are applied when the `data` is saved.

```julia-repl
julia> using DataStructures
julia> data = randn(3,5)
julia> NCDataset("test_file.nc","c") do ds
          defVar(ds,"temp",data,("lon","lat"), attrib = OrderedDict(
             "units" => "degree_Celsius",
             "add_offset" => -273.15,
             "scale_factor" => 0.1,
             "long_name" => "Temperature"
          ))
       end;
```

!!! note
    If the attributes `_FillValue`, `add_offset`, `scale_factor`, `units` and `calendar` are used, they should be defined when calling `defVar` by using the parameter `attrib` as shown in the example above.

