In [1]:
#%reset -f
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.collections import PolyCollection #for plots polygons as rasters
import xarray as xr
from datetime import datetime, timedelta as delta
from matplotlib import path
from scipy.spatial import KDTree, cKDTree #c implementation is faster (to find nearest neighbor)
import os
import dask as da
from tqdm import tqdm
import glob
from scipy.interpolate import griddata #interpolation in space for non-uniform grids

In [2]:
#inputs----------
#path of directories
home_dir = "/export/lv4/user/jfajardourbina/"
dir_vel= f"{home_dir}dws_ulf_getm_2D_depth_avg/data/velocity/"
dir_dws_bound = f"{home_dir}dws_ulf_getm_2D_depth_avg/experiments_post_proc/analysis_eulerian_data_36years/data_dws_boundaries/"
savee='everyM2' #saving track data every m2
deploy='everyM2'#deploy set of particles every m2
minTsim=60 #mimimum time of simulation (days)
maxTsim=91 #maximum time of simulation (days)
dir_tracks = f"{home_dir}dws_ulf_getm_2D_depth_avg/experiments_post_proc/lagrangian_simulation_36years/exp-deployHighVolume_coords-xcyc_save-{savee}_deploy-{deploy}_Tsim-{minTsim}-{maxTsim}d/tracks/"
#files
files_vel_root="RE.DWS200m.uvz."
file_dws_bound0="dws_boundaries_contour0.nc"; #contour of DWS are the land points closest to ocean points
#parameters
npa_per_dep=12967 #number of particles per deployment
m2=int(12.42*3600+2) #period in seconds
dx=400/1e3; dy=400/1e3 #particle grid resolution
#
#paths for output data
dir_post_proc_data=f"{home_dir}dws_ulf_getm_2D_depth_avg/experiments_post_proc/lagrangian_simulation_36years/machine_learning_github/Lagrangian_ML/post_proc_data/"
dir_net_displacement="net_displacement/" 

In [3]:
#open dws boundaries----
names_tr=["marsdiep","eierlandsgat","vlie","borndiep","pinkegat","watershed1"]
names_islands=["coast","texel","vlieland","terschelling","ameland","schiermonnikoog"]
#contour0 (inlets and boundaries along coast and islands)
dsb0=xr.open_dataset(dir_dws_bound+file_dws_bound0) #float64
bdr_dws0=dsb0.bdr_dws.values #points that define DWS

#open any velocity file---
year_sim=2009;month_sim=5
file_vel_path=f"{dir_vel}{files_vel_root}{year_sim}{month_sim:02d}01.nc" #:02d includes leading zeros (at the begin)
ds=xr.open_dataset(file_vel_path,chunks={'xc':-1,'yc':-1,'time':110}) #chunks every 36h
xc=ds.xc; yc=ds.yc; h=ds.bathymetry.load()
mask=h.copy(); mask=xr.where(np.isfinite(mask),1,0) #mask ocean=1, land=0

In [5]:
#gridding data with nearest neighbor in a extended domain (to avoid problems in boundaries when using convolution)
def gridding_particles(var,x0,y0):
    xmin=x0.min();xmax=x0.max();ymin=y0.min();ymax=y0.max()
    extend_grid=10 #so from particle min max positions extend grid 10*dx (to not have problems with convolution)
    xgrid=np.arange(xmin-dx*1e3*extend_grid,xmax+dx*1e3*(extend_grid+1),dx*1e3)
    ygrid=np.arange(ymin-dy*1e3*extend_grid,ymax+dy*1e3*(extend_grid+1),dy*1e3)
    xgrid0,ygrid0=np.meshgrid(xgrid,ygrid)
    valgrid=xgrid0.flatten()*np.nan
    tree = cKDTree(np.c_[xgrid0.flatten(),ygrid0.flatten()]) #points in the new extended grid
    _,ij = tree.query(np.c_[x0,y0], k=1) #get index for every x0,y0 to put values in the new grid
    valgrid[ij]=var
    valgrid=np.reshape(valgrid,(len(ygrid),len(xgrid)))
    return xgrid0,ygrid0,valgrid

In [1]:
#files_track_paths=sorted(glob.glob(f'{dir_tracks}/**/*.nc',recursive=True)) 
year_ini=1980; year_end=2015
years=np.arange(year_ini,year_end+1)

In [7]:
%%time
#it takes 10 min for the 36 years
for year in years:

    print(year)
    files_track_paths=sorted(glob.glob(f'{dir_tracks}{year}/*.nc',recursive=True)) 

    dx_grid=[]; dy_grid=[]
    nonan=[]
    for i in range(len(files_track_paths)):
        #
        #print(files_track_paths[i][-68:])
        dst0=xr.open_dataset(files_track_paths[i], chunks={'traj': npa_per_dep}) #float32, a bit faster than npa_per_dep*10 when .compute data for this case
        dst0.close()
        #
        #get number of deployments for this month
        num_deploys_1m0=int(dict(dst0.dims)['traj']/npa_per_dep)
        #ini_dep / end_dep = 0(1st dep), 1(2nd dep), 2...
        #it = 0(displacement after M2 from fixed grid ini positions), 1(displacement after 2xM2), ...2
        ini_dep=0 #select index of initial deployment
        end_dep=num_deploys_1m0-1 #select index of last deployment
        it=0 #select 1st displacement for all deployments
        #
        if i==0:
            #build grid (like the one of displacements)
            x0=dst0.x.isel(traj=range(npa_per_dep),obs=0).values; y0=dst0.y.isel(traj=range(npa_per_dep),obs=0)
            xmin=x0.min();xmax=x0.max();ymin=y0.min();ymax=y0.max()
            extend_grid=10 #so from particle min max positions extend grid 10*dx (to not have problems with convolution)
            xgrid=np.arange(xmin-dx*1e3*extend_grid,xmax+dx*1e3*(extend_grid+1),dx*1e3,dtype='float32')
            ygrid=np.arange(ymin-dy*1e3*extend_grid,ymax+dy*1e3*(extend_grid+1),dy*1e3,dtype='float32')
            xgrid0,ygrid0=np.meshgrid(xgrid,ygrid)
            points=np.array([x0,y0]).T
            tdep_ini=dst0.time.isel(obs=0,traj=ini_dep*npa_per_dep).values #initial date of deployment for this year
        if  i==len(files_track_paths)-1:
            tdep_end=dst0.time.isel(obs=0,traj=(end_dep+1)*npa_per_dep-1).values #final date of deployment for this year
        #
        # save the 1st displacements for all the deployments of this month-------
        deploys=np.arange(npa_per_dep*ini_dep,npa_per_dep*(end_dep+1))  
        #net displacement from intial time of realese until the it time
        dxx=(dst0.x.isel(traj=deploys,obs=[0,it+1]).diff("obs")/1e3).isel(obs=0)
        dyy=(dst0.y.isel(traj=deploys,obs=[0,it+1]).diff("obs")/1e3).isel(obs=0)
        #
        #grid data with nearest----
        #it is faster than our gridding method and the same results 
        ntt=int(len(dxx)/npa_per_dep)
        dx_grid0=np.reshape(dxx.values,(ntt,npa_per_dep)) #(deploys,npa_per_dep)
        dx_grid0 = np.moveaxis(griddata(points, dx_grid0.T, (xgrid0, ygrid0), method='nearest'),-1,0)
        dy_grid0=np.reshape(dyy.values,(ntt,npa_per_dep))
        dy_grid0= np.moveaxis(griddata(points, dy_grid0.T, (xgrid0, ygrid0), method='nearest'),-1,0)
        #check no nans in gridding
        nonan.append(np.sum(np.isnan(dx_grid0))+np.sum(np.isnan(dy_grid0)))
        #now put nan to the values that grid data put to the regions out DWS domain due to the nearest method
        _,_,mask_par=gridding_particles(dxx[range(npa_per_dep)].values,x0,y0)
        mask_par[np.isfinite(mask_par)]=1
        dx_grid0*=mask_par; dy_grid0*=mask_par
        #initial time of the above deployment, and next time
        #t0=dst0.time.isel(obs=0,traj=np.arange(ini_dep,end_dep+1)*npa_per_dep).values
        #t1=dst0.time.isel(obs=it+1,traj=np.arange(ini_dep,end_dep+1)*npa_per_dep).values
        #
        #save data
        dx_grid.append(dx_grid0); dy_grid.append(dy_grid0)

    print(np.sum(np.array(nonan))) #should be always 0, so no nan in original data

    #Saving all the months in 1 file for this year-----
    #
    t_dep=np.arange(tdep_ini,tdep_end+np.timedelta64(1,'s'),m2,dtype='datetime64[s]') #only for this year
    dx_grid=np.concatenate(dx_grid,axis=0) #(time_dep,y,x)
    dy_grid=np.concatenate(dy_grid,axis=0)

    dsout = xr.Dataset()
    #global coords and attrs---
    dsout.coords["time"] = t_dep
    dsout["time"].attrs['description'] = 'initial date of deployments, values every M2'
    dsout.coords["y"] = ygrid
    dsout["y"].attrs['description'] = 'y-position in meter'
    dsout.coords["x"] = xgrid
    dsout["x"].attrs['description'] = 'x-position in meter'
    #
    dsout.attrs["year_of_deployments"] = f"{year}"
    dsout.attrs["npar"] = f"number of particles per deployment = {npa_per_dep}"
    #
    #variables---
    #
    dsout["it"] = it+1 #"f{it+1}"
    dsout["it"].attrs['long_name'] = 'net displacement between [t0, t0+it*M2]'
    dsout["m2"] = m2 #"f{m2}"
    dsout["m2"].attrs['long_name'] = 'm2 tidal period in seconds'
    #
    dsout["dx"] = (("time","y","x"),dx_grid)
    dsout["dx"].attrs['long_name'] = 'net displacement along x-axis'
    #dsout["dx"].attrs['description'] = 'rt computed with the first crossing. NaN for stuck particles'
    dsout["dx"].attrs['units'] = 'm'
    #
    dsout["dy"] = (("time","y","x"),dy_grid)
    dsout["dy"].attrs['long_name'] = 'net displacement along y-axis'
    #dsout["dy"].attrs['description'] = 'rt computed with the first crossing. NaN for stuck particles'
    dsout["dy"].attrs['units'] = 'm'
    #
    file_out_nc=f"{year}_net_displacement_during_{it+1}M2_for_convlstm.nc"
    dir_out_nc=dir_post_proc_data+dir_net_displacement
    dsout.to_netcdf(dir_out_nc+file_out_nc)
    dsout.close(); del dsout