### Script to calculate optimal interpolation parameters

* The script loops over the variable names in _varname_
* The script calculates the optimal signal-to-noise (e) and correlation length (l) parameters for the (full) years 2000 to 2016 by cross validation.
* These optimal parameters are stored in
    * /_varname_/*varname*_newe.txt
    * /_varname_/*varname*_newl.txt

In [1]:
BLAS.set_num_threads(1)

In [2]:
versioninfo()

Julia Version 0.6.4
Commit 9d11f62bcb (2018-07-09 19:09 UTC)
Platform Info:
  OS: Linux (x86_64-pc-linux-gnu)
  CPU: Intel(R) Xeon(R) CPU E5-2630L 0 @ 2.00GHz
  WORD_SIZE: 

In [3]:
print(join(["$p: $v\n" for (p,v) in Pkg.installed()]))

64
  BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Sandybridge MAX_THREADS=16)
  LAPACK: libopenblas64_
  LIBM: libopenlibm
  LLVM: libLLVM-3.9.1 (ORCJIT, sandybridge)


In [4]:
pwd()

"/home/lennerts/Diva_product_scripts_2018"

In [5]:
cd("//data/20180306_OOPS/")

In [6]:
pwd()

"/data/20180306_OOPS"

In [7]:
using DIVAnd
using PyPlot
using NCDatasets
using DataStructures

# Load a more efficient version of sparse matrix multiplication
include(joinpath(Pkg.dir("DIVAnd"),"src","override_ssmult.jl"))

Libz: 0.4.0
AxisAlgorithms: 0.3.0
OffsetArrays: 0.6.0
HTTP: 0.6.12
CondaBinDeps: 0.1.0
OceanPlot: 0.0.0-
Nullables: 0.0.6
ZMQ: 0.6.3
DataStructures: 0.8.4
Compat: 1.0.0
EzXML: 0.7.1
ShowItLikeYouBuildIt: 0.2.0
SpecialFunctions: 0.6.0
Blosc: 0.5.0
ZipFile: 0.6.0
FixedPointNumbers: 0.4.6
SHA: 0.5.7
Missings: 0.2.10
MAT: 0.4.0
HDF5: 0.9.3
ColorTypes: 0.6.7
BufferedStreams: 0.4.1
MbedTLS: 0.5.12
SortingAlgorithms: 0.2.1
Conda: 1.0.0
PyCall: 1.17.1
WoodburyMatrices: 0.3.0
JSON: 0.17.2
StatsBase: 0.23.1
IJulia: 1.9.1
PyPlot: 2.6.0
BinDeps: 0.8.8
Parameters: 0.9.1
DIVAnd: 2.0.0+
Mustache: 0.3.3
Reexport: 0.1.0
CMakeWrapper: 0.1.0
URIParser: 0.3.1
Interpolations: 0.8.0
BinaryProvider: 0.3.3
IniFile: 0.4.0
LaTeXStrings: 1.0.0
DataArrays: 0.7.0
VersionParsing: 1.1.1
Colors: 0.8.2
MacroTools: 0.4.2
NCDatasets: 0.3.0
Ratios: 0.2.0


[1m[36mINFO: [39m[22m[36mRecompiling stale cache file /home/lennerts/.julia/lib/v0.6/HTTP.ji for module HTTP.
[39m[1m[36mINFO: [39m[22m[36mRecompiling stale cache file /home/lennerts/.julia/lib/v0.6/PyPlot.ji for module PyPlot.
[39m

### yearlists_ function

In [8]:
function yearlists_(dataset_range, total_window_yrs)
    # dataset_range = 2000:2012
    # total_window_yrs = 10
    # will return: [2000:2009, 2001:2010, 2002:2011, 2003:2012]
    
    n_windows = length(dataset_range) - total_window_yrs + 1

    a = Array{UnitRange{Int64}}(n_windows)

    for i = 1:n_windows
        a[i] =  dataset_range[i]:(dataset_range[i] + total_window_yrs -1)
    end
    return(a)
end

yearlists_ (generic function with 1 method)

### Plotting function

In [9]:

function plotres(timeindex,sel,fit,erri) # fit = interpolated field
    tmp = copy(fit)
    tmp[erri .> .3] = NaN; # only plotting where error < 0.3
    figure(figsize = (10,8))
    subplot(2,1,1)
    title("$(timeindex) - surface")
    
    # select the data near the surface
    selsurface = sel .& (depth .< 5)
    vmin = minimum(value[selsurface])
    vmax = maximum(value[selsurface])
    
    # plot the data
    scatter(lon[selsurface],lat[selsurface],10,value[selsurface];
            vmin = vmin, vmax = vmax)
    xlim(minimum(lonr),maximum(lonr))
    ylim(minimum(latr),maximum(latr))
    colorbar()
    contourf(bx,by,b', levels = [-1e5,0],colors = [[.5,.5,.5]])
    
    # plot the analysis
    subplot(2,1,2)
    pcolor(lonr,latr,tmp[:,:,1]';
           vmin = vmin, vmax = vmax)
    colorbar()
    contourf(bx,by,b', levels = [-1e5,0],colors = [[.5,.5,.5]])
end

plotres (generic function with 1 method)

## Parameters

download bathymetry

In [10]:
bathname = "gebco_30sec_4.nc"

if !isfile(bathname)
    download("https://b2drop.eudat.eu/s/ACcxUEZZi6a4ziR/download",bathname)
else
    info("Bathymetry file already downloaded")
end

bathisglobal = true

true

[1m[36mINFO: [39m[22m[36mBathymetry file already downloaded
[39m

resolution of grid and mask

In [11]:
dx = dy = 0.1
lonr = -75.:dx:20.; # the range of longitudes (start:step:end)
latr = 35.:dy:75.; # the range of latitudes (start:step:end)
depthr = [-1., 1.] # put . always! (otherwise integer and error in DIVA)

# create mask
# mask,(pm,pn),(xi,yi) = divand_rectdom(lonr,latr)
# I changed the line below after error with diva3d
mask,(pm,pn),(xi,yi) = DIVAnd.domain(bathname, true, lonr, latr)
bx,by,b = load_bath(bathname,true,lonr,latr)


(-75.0:0.1:20.0, 35.0:0.1:75.0, [2475.89 1803.55 … 290.672 483.172; 2698.02 2466.61 … 291.781 466.875; … ; 3013.61 2988.84 … 173.094 126.891; 2998.63 2965.13 … 146.625 140.734])

### Optimisation loop for:
* different parameters (varname)
* different year ranges (yearlist -> yr)
* different seasons (monthlists -> mo)

In [None]:
epsilon2 = 2 # 0.5^-1 (old value: 0.5);
len = 150_000 # IN  METERS

# Months and years
monthlists = [[1,2,3,4,5,6,7,8,9,10,11,12]]
yearlists = yearlists_(2000:2016, 1);

for varname = ["log_tem_lon","log_cal_hel", "log_tot_lar","log_tot_sma",
                "log_oit_spp", "log_cal_fin","ratio_large_to_small",
                "log_aca_spp", "log_met_luc", "log_chli"]
    
    i = 1
    newl = ones(length(yearlists)*length(monthlists))
    newe = ones(length(yearlists)*length(monthlists))
    
    varDir = joinpath(pwd(),varname)
    mkpath(varDir)
    
    @show varname
    t1 = now()
    @show t1
    
    # read data
    fname = "bigfile_$(varname).txt"

    if !isfile(fname)
        error("ERROR: File not found")
    else
        info("File found -> OK!")
    end

    value,lon,lat,depth,time,ids = loadbigfile(fname)
    checkobs((lon,lat,depth,time), value, ids)
    
    #### make selection for each timestep
    for yr = yearlists
        for mo = monthlists
                print("start $(i)  ")
                TS = TimeSelectorYearListMonthList([yr],[mo])
                @show TS
                t1 = now() # keep track of time
                @show t1

                # subset data based on timestep
                # https://stackoverflow.com/a/29661623
                sel = ones(length(value))
                sel = (depth .< 1) .& (indexin(Dates.month.(time), mo) .> 0) .& (indexin(Dates.year.(time), yr) .> 0)

                value_sel = value[sel]
                lon_sel = lon[sel]
                lat_sel = lat[sel]
                depth_sel = depth[sel]
                time_sel = time[sel]
                ids_sel = ids[sel];
                @show (size(value_sel))


                #### run optimization for noise-to-signal ratio
                # optimal parameter selection with mode '0' = default
                @time bestfactorl,bestfactore, cvval,cvvalues, x2Ddata,y2Ddata,cvinter,xi2D,yi2D =
                DIVAnd_cv(mask,(pm,pn),(xi,yi),(lon_sel,lat_sel),value_sel-mean(value_sel),len,epsilon2,2,3,0);

                @show newl[i]=len*bestfactorl
                @show newe[i]=epsilon2*bestfactore
            
                open(joinpath(varDir,"$(varname)_newe.txt"), "a") do io
                    writedlm(io, newe[i])
                end
            
                open(joinpath(varDir,"$(varname)_newl.txt"), "a") do io
                    writedlm(io, newl[i])
                end

                i = i+1

        end # mo
    end # yr

    #writedlm("$(varname)_newe.txt", newe)
    #writedlm("$(varname)_newl.txt", newl)

end # varname