### Script to combine all the separate NetCDF files for one species per season

* The script loops over the variable names in _varname_
* creates a new directory /_varname_/netcdf_season/
* and combines the data of the netcdfs in /_varname_/netcdf_all into 4 netcdfs (one per season): varname_StartyearEndyear_season.nc

with as season a number between 1 and 4:

* 1: months 1-3
* 2: months 4-6
* 3: months 7-9
* 4: months 10-12
 

In [1]:
versioninfo()
print(join(["$p: $v\n" for (p,v) in Pkg.installed()]))

Julia Version 0.6.4
Commit 9d11f62bcb (2018-07-09 19:09 UTC)
Platform Info:
  OS: Linux (x86_64-pc-linux-gnu)
  CPU: Intel(R) Xeon(R) CPU E5-2630L 0 @ 2.00GHz
  WORD_SIZE: 64
  BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Sandybridge MAX_THREADS=16)
  LAPACK: libopenblas64_
  LIBM: libopenlibm
  LLVM: libLLVM-3.9.1 (ORCJIT, sandybridge)


In [2]:
pwd()
cd("//data/20180306_OOPS/")
pwd()

"/data/20180306_OOPS"

Libz: 0.4.0
AxisAlgorithms: 0.3.0
OffsetArrays: 0.6.0
HTTP: 0.6.12
CondaBinDeps: 0.1.0
OceanPlot: 0.0.0-
Nullables: 0.0.6
ZMQ: 0.6.3
DataStructures: 0.8.4
Compat: 1.0.0
EzXML: 0.7.1
ShowItLikeYouBuildIt: 0.2.0
SpecialFunctions: 0.6.0
Blosc: 0.5.0
ZipFile: 0.6.0
FixedPointNumbers: 0.4.6
SHA: 0.5.7
Missings: 0.2.10
MAT: 0.4.0
HDF5: 0.9.3
ColorTypes: 0.6.7
BufferedStreams: 0.4.1
MbedTLS: 0.5.12
SortingAlgorithms: 0.2.1
Conda: 1.0.0
PyCall: 1.17.1
WoodburyMatrices: 0.3.0
JSON: 0.17.2
StatsBase: 0.23.1
IJulia: 1.9.1
PyPlot: 2.6.0
BinDeps: 0.8.8
Parameters: 0.9.1
DIVAnd: 2.0.0+
Mustache: 0.3.3
Reexport: 0.1.0
CMakeWrapper: 0.1.0
URIParser: 0.3.1
Interpolations: 0.8.0
BinaryProvider: 0.3.3
IniFile: 0.4.0
LaTeXStrings: 1.0.0
DataArrays: 0.7.0
VersionParsing: 1.1.1
Colors: 0.8.2
MacroTools: 0.4.2
NCDatasets: 0.3.0
Ratios: 0.2.0


In [3]:
using NCDatasets
using DIVAnd

### 1 create netcdf from template

In [4]:
# Adjusted from the output of the function 'ncgen'
# ncgen("test.nc", newfname = "test3.nc")

In [5]:
function create_netcdf_season_template(varname,yearrange,season, varncsDir)
    # Adjusted from the output of the function 'ncgen'
    # ncgen("test.nc", newfname = "test3.nc")
    
    ds = Dataset(joinpath(varncsDir,"$(replace(varname,"log_",""))_$(yearrange[1])$(yearrange[end])_$(season).nc"),"c")
    println("creating netcdf: $(replace(varname,"log_",""))_$(yearrange[1])$(yearrange[end])_$(season).nc")   
    
    # Dimensions

    ds.dim["lon"] = 951
    ds.dim["lat"] = 401
    ds.dim["time"] = length(yearrange)
    ds.dim["nv"] = 2
    #ds.dim["season"] = 4
    # ds.dim["year"] = length(yearrange)

    # Declare variables

    nclon = defVar(ds,"lon", Float64, ("lon",)) 
    nclon.attrib["units"] = "degrees_east"
    nclon.attrib["standard_name"] = "longitude"
    nclon.attrib["long_name"] = "longitude"

    nclat = defVar(ds,"lat", Float64, ("lat",)) 
    nclat.attrib["units"] = "degrees_north"
    nclat.attrib["standard_name"] = "latitude"
    nclat.attrib["long_name"] = "latitude"

    nctime = defVar(ds,"time", Float64, ("time",)) 
    nctime.attrib["units"] = "days since 1900-01-01 00:00:00"
    nctime.attrib["standard_name"] = "time"
    nctime.attrib["long_name"] = "time"
    nctime.attrib["calendar"] = "standard"
    nctime.attrib["climatology"] = "climatology_bounds"

    ncclimatology_bounds = defVar(ds,"climatology_bounds", Float64, ("nv", "time",)) 
    ncclimatology_bounds.attrib["units"] = "days since 1900-01-01 00:00:00"

    # ncseason = defVar(ds,"season", Int, ("season",))
    # ncseason.attrib["units"] = "season"
    # ncseason.attrib["standard_name"] = "season"
    # ncseason.attrib["long_name"] = "season number (1: months 1-3; 2: months 4-6; 3: months 7-9; 4: months 10-12)"
    
    # ncyear = defVar(ds,"year", Int, ("year",))
    # ncyear.attrib["units"] = "year"
    # ncyear.attrib["standard_name"] = "year"
    # ncyear.attrib["long_name"] = "year"
    # ncyear.attrib["calendar"] = "standard"
    
    nclog = defVar(ds,"$(varname)", Float32, ("lon", "lat", "time")) # removed "time", "season"
    nclog.attrib["units"] = "logab"
    nclog.attrib["standard_name"] = "$(varname)"
    nclog.attrib["long_name"] = "$(replace(varname,'_',' '))"
    nclog.attrib["_FillValue"] = Float32(9.96921e36)
    nclog.attrib["missing_value"] = Float32(9.96921e36)
    nclog.attrib["cell_methods"] = "time: mean within years time: mean over years"

    nclog_L1 = defVar(ds,"$(varname)_L1", Float32, ("lon", "lat", "time")) # removed "time", "season"
    nclog_L1.attrib["units"] = "logab"
    nclog_L1.attrib["standard_name"] = "$(varname)_L1"
    nclog_L1.attrib["long_name"] = "$(replace(varname,'_',' ')) masked using relative error threshold 0.3"
    nclog_L1.attrib["_FillValue"] = Float32(9.96921e36)
    nclog_L1.attrib["missing_value"] = Float32(9.96921e36)

    nclog_L2 = defVar(ds,"$(varname)_L2", Float32, ("lon", "lat", "time")) # removed "time", "season"
    nclog_L2.attrib["units"] = "logab"
    nclog_L2.attrib["standard_name"] = "$(varname)_L2"
    nclog_L2.attrib["long_name"] = "$(replace(varname,'_',' ')) masked using relative error threshold 0.5"
    nclog_L2.attrib["_FillValue"] = Float32(9.96921e36)
    nclog_L2.attrib["missing_value"] = Float32(9.96921e36)

    nclog_relerr = defVar(ds,"$(varname)_relerr", Float32, ("lon", "lat", "time")) # removed "time", "season"
    nclog_relerr.attrib["units"] = "1"
    nclog_relerr.attrib["long_name"] = "Relative error of $(replace(varname,'_',' '))"
    nclog_relerr.attrib["valid_min"] = Float32(0.0)
    nclog_relerr.attrib["valid_max"] = Float32(1.0)
    nclog_relerr.attrib["_FillValue"] = Float32(9.96921e36)
    nclog_relerr.attrib["missing_value"] = Float32(9.96921e36)

    # Global attributes

    ds.attrib["Conventions"] = "CF-1.6"
    ds.attrib["title"] = "DIVA 3D analysis of $(replace(varname,'_',' '))"
    ds.attrib["file_name"] = "$(varname).nc"
    ds.attrib["product_id"] = "16342e64-c6e3-11e8-320c-055e34a07963"
    ds.attrib["date"] = "2018-10-03T10:05:26"
    ds.attrib["project"] = "EMODnet Biology"
    ds.attrib["institution_urn"] = "SDN::EDMO::422"
    ds.attrib["production"] = "Flanders Marine Institute"
    ds.attrib["Author_e-mail"] = "Lennert Schepers <lennert.schepers@vliz.be>"
    ds.attrib["source"] = "SAHFOS / Marine Biological Association (UK)"
    ds.attrib["comment"] = "..."
    ds.attrib["product_version"] = "2.0"

    close(ds)
end

create_netcdf_season_template (generic function with 1 method)

##### populate netcdf  - per season

In [None]:
@show t1 = now()

yearrange = 1958:2016
for varname = ["log_tem_lon","log_cal_hel", "log_tot_lar","log_tot_sma",
               "log_oit_spp", "log_cal_fin","ratio_large_to_small",
               "log_aca_spp", "log_met_luc", "log_chli"]
    
    varDir = joinpath(pwd(),varname)             # species directory
    varncDir = joinpath(varDir,"netcdf_all")     #  subdirectory with all separate netcdfs
    varncsDir = joinpath(varDir,"netcdf_season") #  subdirectory for combined netcdfs
    mkpath(varncsDir)
    
    for season = 1:4
                # read data
                print("season: $(season)  ")
                # define monthstring
                if season == 1 
                    season_str = "0103"
                elseif season == 2
                    season_str = "0406"
                elseif season == 3
                    season_str = "0709"
                elseif season == 4
                    season_str = "1012"
                end
    
    
        i = 1
        # create template
        println("creating netcdf template ...")
        create_netcdf_season_template(varname, yearrange, season, varncsDir)
        

        # append data to the NetCDF
        Dataset(joinpath(varncsDir,"$(replace(varname,"log_",""))_$(yearrange[1])$(yearrange[end])_$(season).nc"),"a") do ds 

            for year = yearrange
                print("\nyear: $(year) ")
                
                fname = joinpath(varncDir,"$(replace(varname,"log_",""))_$(year)$(year)_$(season_str).nc")
                
                # variables only needed 1st time
                if i==1
                    Dataset(fname,"r") do nc
                        # take values of netcdf
                        ds["lat"][:] = nc["lat"][:]
                        ds["lon"][:] = nc["lon"][:]
                        
                        # ds["season"][:] = 1:4
                        # ds["year"][:] = yearrange
                        
                        ds.attrib["product_id"] = nc.attrib["product_id"]
                        
                    end
                end # end if
                
                # for each netcdf
                Dataset(fname,"r") do nc

                    # take values of slice
                    ds["time"][i] = nc["time"][1] ## HOW IS TIME CONVERTED TO FLOAT?
                    ds["climatology_bounds"][:,i] = nc["climatology_bounds"].var[:,1]
                    
                    # "lon", "lat", "time", "season", "year"
                    ds["$(varname)"][:,:, i] = nc["$(varname)"][:,:,1,1]
                    ds["$(varname)_L1"][:,:, i] = nc["$(varname)_L1"][:,:,1,1]
                    ds["$(varname)_L2"][:,:, i] = nc["$(varname)_L2"][:,:,1,1]
                    ds["$(varname)_relerr"][:,:, i] = nc["$(varname)_relerr"][:,:,1,1]
                    
                end # end single netcdf
                

            i = i + 1  
         
            end # year        
        end # ds closed
   end # season
  
end # varname
@show t2=now()
@show Dates.canonicalize(Dates.CompoundPeriod(t2-t1))