In [7]:
import xarray as xr
import pandas as pd
import numpy as np
from itertools import compress

In [8]:
#load data with xarray
file_path = r"d:\klein\DELFT3D4_model_converter\_Delft3D4_input_data\trim-Model-k01-with topography-eco.nc"
xr_file = xr.open_dataset(file_path)
print(xr_file)

<xarray.Dataset> Size: 2GB
Dimensions:        (MC: 407, NC: 29, M: 407, N: 29, LSTSCI: 2, SIG_LYR: 1,
                    SIG_INTF: 2, KMAXOUT: 2, KMAXOUT_RESTR: 1, time: 1276,
                    LSED: 1, KMAX: 1, LSEDTOT: 1, nlyr: 1, nlyrp1: 2, avgtime: 1)
Coordinates:
    XZ             (M, N) float64 94kB ...
    YZ             (M, N) float64 94kB ...
  * SIG_LYR        (SIG_LYR) float32 4B -0.5
  * SIG_INTF       (SIG_INTF) float32 8B 0.0 -1.0
  * KMAXOUT        (KMAXOUT) int32 8B 0 1
  * KMAXOUT_RESTR  (KMAXOUT_RESTR) int32 4B 0
  * time           (time) datetime64[ns] 10kB 2020-05-01T01:00:00 ... 2020-10...
Dimensions without coordinates: MC, NC, M, N, LSTSCI, LSED, KMAX, LSEDTOT,
                                nlyr, nlyrp1, avgtime
Data variables: (12/48)
    XCOR           (MC, NC) float64 94kB ...
    YCOR           (MC, NC) float64 94kB ...
    ALFAS          (M, N) float32 47kB ...
    KCU            (MC, N) int32 47kB ...
    KCV            (M, NC) int32 47kB ...
    KCS 

In [9]:
#Drop and re-add variables that are challenging for writing with xarray

mftavg = xr_file.MFTAVG.copy()
moravg = xr_file.MORAVG.copy()
grid = xr_file.grid.copy()

#drop some variables that might be challenging for writing
xr_file = xr_file.drop_vars(["MFTAVG","MORAVG"]) #"NAMCON","GRAVITY",

#drop grid
#xr_file = xr_file.drop_vars(["grid"])

In [10]:
#Subset on time and area to reduce size

def row_in_list(row, check_set):
    '''
    Function to asses if all values of row are in list, ignoring nan's
    '''
    row_non_nan = row.dropna().tolist()
    return all(item in check_set for item in row_non_nan)

#Subset on time to reduce size
sub_file = xr_file.sel(time = slice('2020-05-03','2020-05-04'))
time_attrs = sub_file["time"].attrs
#time_attrs["_FillValue"] = False
sub_file["time"] = sub_file["time"].assign_attrs(time_attrs)


#Subset on bounding box
bool_nodes_needed = (sub_file['YCOR'] >= 34.805) & (sub_file['YCOR'] <= 34.81) &\
    (sub_file['XCOR'] >= 135.79)  & (sub_file['XCOR'] <= 135.81)
#index_nodes_needed = [i for i, x in enumerate(bool_nodes_needed) if x]
df_bool_nodes_needed = pd.DataFrame(bool_nodes_needed)
bool_nodes_needed_M = df_bool_nodes_needed.any(axis = 1)
bool_nodes_needed_N = df_bool_nodes_needed.any(axis = 0)
M_nodes_needed = set(list(compress(range(len(bool_nodes_needed_M)), bool_nodes_needed_M)))
N_nodes_needed = set(list(compress(range(len(bool_nodes_needed_N)), bool_nodes_needed_N)))

#filter on M from dataset data
sub_file = sub_file.sel(M = xr_file.M.isin(list(M_nodes_needed)), MC = xr_file.MC.isin(list(M_nodes_needed)),
                        N = xr_file.N.isin(list(N_nodes_needed)), NC = xr_file.NC.isin(list(N_nodes_needed)),
        )

#filter on N from dataset data
sub_file = sub_file.sel()

#Check
print(sub_file)

<xarray.Dataset> Size: 4MB
Dimensions:        (MC: 81, NC: 28, M: 81, N: 28, LSTSCI: 2, SIG_LYR: 1,
                    SIG_INTF: 2, KMAXOUT: 2, KMAXOUT_RESTR: 1, time: 16,
                    LSED: 1, KMAX: 1, LSEDTOT: 1, nlyr: 1, nlyrp1: 2, avgtime: 1)
Coordinates:
    XZ             (M, N) float64 18kB ...
    YZ             (M, N) float64 18kB ...
  * SIG_LYR        (SIG_LYR) float32 4B -0.5
  * SIG_INTF       (SIG_INTF) float32 8B 0.0 -1.0
  * KMAXOUT        (KMAXOUT) int32 8B 0 1
  * KMAXOUT_RESTR  (KMAXOUT_RESTR) int32 4B 0
  * time           (time) datetime64[ns] 128B 2020-05-03T01:00:00 ... 2020-05...
Dimensions without coordinates: MC, NC, M, N, LSTSCI, LSED, KMAX, LSEDTOT,
                                nlyr, nlyrp1, avgtime
Data variables: (12/46)
    XCOR           (MC, NC) float64 18kB 135.8 135.8 135.8 ... 135.8 135.8 135.8
    YCOR           (MC, NC) float64 18kB 34.8 34.8 34.8 ... 34.81 34.81 34.81
    ALFAS          (M, N) float32 9kB ...
    KCU            (MC, N) i

In [11]:
#re-add morphology and grid
#sub_file["MFTAVG"] = mftavg
sub_file["MORAVG"] = moravg
sub_file["grid"] = grid

In [12]:
#write to file example
sub_file.to_netcdf("rawdata/delft3d4flow_output_example.nc")