write zarr from set of netcdf per year

In [None]:
import xarray as xr
from os.path import join
from datetime import timedelta, datetime
import pandas as pd
import numpy as np

In [None]:
# load metadata and coupled indices

# ddir = r'/scratch/cama/output'
ddir = r'/home/glofris1/VIDI/experiments/CaMaFlood_e2o/output'
# gtsm_dir = r'/scratch/gtsm'
gtsm_dir = join(ddir, '../GTSM')
# out_dir = r'/scratch/dash/compound'
out_dir = join(ddir, '../analysis')
qmin = 0 # minimal longterm mean discharge [m3/s]
attrs = pd.read_csv(join(ddir, 'rivmth+gtsm_attrs.csv'), index_col=0)
print(len(attrs))
attrs = attrs[attrs['q_mean'] > qmin]
print(len(attrs))
index = attrs.index.values
coupling = pd.read_csv(join(gtsm_dir, r'cmf_gtsm_75km.csv'), index_col='rivmth_idx').reindex(index) # reindex to make sure it allings
gtsm_stations = coupling['gtsm_idx'].values

In [None]:
fn_out = join(out_dir, 'GTSR_day.zarr')
out_chunks = {'time': -1, 'station_id':100}

rm_coords = {
    'station_x_coordinate':'lon', 
    'station_y_coordinate':'lat', 
    'stations': 'station_id',
    'mean_surge_seas': 'seasonal_surge'
}

ds_gtsm = xr.open_mfdataset(join(gtsm_dir, r"gtsm_select_*_day.nc"))
ds_gtsm['stations'] = xr.Variable(['stations'], gtsm_stations)
ds_gtsm = ds_gtsm.transpose('stations', 'time')

## set coordinates
ds_gtsm = ds_gtsm.drop(['station_id', 'max_surge_seas', 'min_surge_seas']).rename(rm_coords)
ds_gtsm['lon'].attrs = dict(
    long_name = "longitude",
    units = "degrees_east",
    standard_name = "longitude"
)
ds_gtsm['lat'].attrs = dict(
    long_name = "latitude",
    units = "degrees_north",
    standard_name = "latitude"
)
ds_gtsm['crs'] = 0
ds_gtsm['crs'].attrs = dict(
    grid_mapping_name = "latitude_longitude",
    longitude_of_prime_meridian = 0.0,
    semi_major_axis = 6378137.0,
    inverse_flattening = 298.257223563,
    epsg = 4326
)
ds_gtsm['station_id'].attrs = dict(
  long_name = "GTSM station index",
  units = "-",
)
ds_gtsm['station_name'].attrs = dict(
  long_name = "GTSM station name",
  units = "-",
)

## add variables and set metadata
ds_gtsm['rivmth_id'] = xr.Variable(['station_id'], coupling.index.values)
ds_gtsm['rivmth_id'].attrs = dict(
  long_name = "CaMa-Flood river mouth index",
  units = "-",
)

ds_gtsm['egm_offset'] = xr.Variable(['station_id'], coupling['gtsm_egm_offset'])
ds_gtsm['egm_offset'].attrs.update(
    long_name = 'vertical offset between msl and the egm96 datum',
    positive = 'up',
    units = 'm',
    grid_mapping = "crs"
)
ds_gtsm['rivmth_to_station_distance'] = xr.Variable(['station_id'], coupling['dist'].values)
ds_gtsm['rivmth_to_station_distance'].attrs.update(
    long_name = 'horizontal distance from CaMa-Flood river mouth to the coupled GTSM station',
    units = 'm'
)
ds_gtsm['tidal_range'] = xr.Variable(['station_id'], attrs['tidal_range'].values)
ds_gtsm['tidal_range'].attrs.update(
    standard_name = 'sea_surface_height_amplitude_due_to_geocentric_ocean_tide',
    long_name = 'FES2012 tidal range',
    units = 'm'
)
ds_gtsm['high_wl'].attrs.update(
    standard_name = 'sea_surface_height_above_mean_sea_level',
    long_name = 'daily maximum sea surface level from surge+tide components',
    units = 'm',
    grid_mapping = "crs"
)
ds_gtsm['low_wl'].attrs.update(
    standard_name = 'sea_surface_height_above_mean_sea_level',
    long_name = 'daily mimimum sea surface level from surge+tide components',
    units = 'm',
    grid_mapping = "crs"
)
ds_gtsm['high_tide'].attrs.update(
    standard_name = 'sea_surface_height_due_to_tide',
    long_name = 'daily maximum astronomical tide',
    units = 'm',
    grid_mapping = "crs"
)
ds_gtsm['low_tide'].attrs.update(
    standard_name = 'sea_surface_height_due_to_tide',
    long_name = 'daily minimum astronomical tide ',
    units = 'm',
    grid_mapping = "crs"
)
ds_gtsm['max_surge'].attrs.update(
    standard_name = 'sea_surface_height_due_to_surge',
    long_name = 'daily maximum surge (barotropic variations of MLS)',
    units = 'm',
    grid_mapping = "crs"
)
ds_gtsm['mean_surge'].attrs.update(
    standard_name = 'sea_surface_height_due_to_surge',
    long_name = 'daily mean surge (barotropic variations of MLS)',
    units = 'm',
    grid_mapping = "crs"
)
ds_gtsm['min_surge'].attrs.update(
    standard_name = 'sea_surface_height_due_to_surge',
    long_name = 'daily minimum surge (barotropic variations of MLS)',
    units = 'm',
    grid_mapping = "crs"
)
ds_gtsm['seasonal_surge'].attrs.update(
    standard_name = 'sea_surface_height_due_to_surge',
    long_name = 'daily 90-day moving average surge (barotropic variations of MLS)',
    units = 'm',
    grid_mapping = "crs"
)
ds_gtsm['skew_surge'].attrs.update(
    standard_name = 'sea_surface_height_due_to_surge',
    long_name = 'daily difference between high_wl and high_tide (skewed non-tidal residual)',
    units = 'm',
    grid_mapping = "crs"
)
# global attributes
ds_gtsm.attrs = dict(
    title = 'global tide and surge reanalysis (GTSR) excerpt',
    source = 'GTSM (Muis et al. 2016)',
    institution = 'Institute for Environmental Studies (IVM) - Vrije Universiteit Amsterdam',
    author = 'Dirk Eilander (dirk.eilander@vu.nl)',
    conventions = "CF-1.7",
    date_created = str(datetime.now().date()),
    history = 'created using xarray v' + xr.__version__,
)

ds_gtsm.chunk(out_chunks).to_zarr(fn_out, mode='w')
ds_gtsm.chunk(out_chunks).to_netcdf(fn_out.replace('.zarr', '.nc'), mode='w')
ds_gtsm

In [None]:
ds_gtsm