In [None]:
import pandas as pd
import geopandas as gp
import xarray as xr
import numpy as np
from os.path import join, basename
import glob
import gistools
import rasterio

In [None]:
map_dir = r'/home/dirk/models/CaMa-Flood_v3.6.2/map/global_15min'
gtsm_dir = r'/home/dirk/datasets/GTSM'
# I/O
min_uparea=1e9 # 1000 km2
max_dist=10e3 # 10km
fn_outlets = join(map_dir, 'rivmth_upa{:.0e}_dist{:.0e}.txt'.format(min_uparea, max_dist))
# fn_gtsm_ts = join(gtsm_dir, 'global_model_waterlevel_1980_masked.nc')
fn_gtsm_shp = join(gtsm_dir, 'gtsm_valid.csv')

In [None]:
# read valid gtsm stations
df = pd.read_csv(fn_gtsm_shp)
gdf_gtsm = gistools.pandas2geopandas(df, x_col='station_x_coordinate', y_col='station_y_coordinate')
gdf_gtsm = gdf_gtsm.rename(columns={'stations': 'idx'})
gdf_gtsm.head()

In [None]:
# read riv mouth points
gdf_rivmth = gistools.pandas2geopandas(pd.read_csv(fn_outlets, index_col=0))
gdf_rivmth = gdf_rivmth.sort_values(['col_15min', 'row_15min', 'uparea']).reset_index().rename(columns={'id': 'rivmth_idx'})
print(len(gdf_rivmth))
gdf_rivmth.head()

In [None]:
# merge based on nearest neighbor
from gistools.vector_tools import sjoin_nn

gdf_merge = sjoin_nn(gdf_rivmth, gdf_gtsm, max_dist=75e3, prefix='gtsm')
gdf_merge = gdf_merge[np.isfinite(gdf_merge['gtsm_idx'])].reset_index().drop(columns=['index', 'gtsm_index']) # delete stations that did not get coupled
gdf_merge.index.name = 'couple_id'
gdf_merge['gtsm_idx'] = gdf_merge['gtsm_idx'].astype(int)
print(len(gdf_merge))
# couple every station to one (the largest) riv mouth
# gdf_merge = gdf_merge.sort_values(['gtsm_idx', 'uparea']).drop_duplicates('gtsm_idx', keep='last')
# print(len(gdf_merge))
gdf_merge.head()

In [None]:
gdf_merge['rivmth_lon'], gdf_merge['rivmth_lat'] = zip(*[g.coords[:][0] for g in gdf_merge.geometry])

In [None]:
# add egm correction
lats, lons = gdf_merge['gtsm_lat'].values, gdf_merge['gtsm_lon'].values
fn = r'/home/dirk/datasets/EGM_correctie/EGM96_offset_filled.tif'
with rasterio.open(fn) as ds:
    r, c = ds.index(lons,lats)
    r, c = np.asarray(r), np.asarray(c)
    gdf_merge['gtsm_egm_offset'] = ds.read(1)[r, c]

We save the coupled stations (1:n) to a gis and csv file. The csv file is used to create a nc files of the selected gtsm stations (of which some are duplicates). The maks_resmaple_fes and mask_resample_gtsm create tide, surge and total waterlevel nc files that are use to force CMF

In [None]:
import os
fn_out = join(gtsm_dir, 'cmf_gtsm_75km.geojson')
if os.path.isfile(fn_out):
    os.unlink(fn_out)
from shapely.geometry import LineString
ls = [LineString(p.coords[:] + [(lon, lat)]) for (p, lon, lat) in 
      zip(gdf_merge.geometry, gdf_merge.gtsm_lon, gdf_merge.gtsm_lat)]
gdf_merge['lon'], gdf_merge['lat'] = zip(*[geom.coords[:][0] for geom in gdf_merge.geometry])
gdf_merge['geometry'] = ls
gdf_merge.to_file(fn_out, driver='GeoJSON')
# gdf_merge.head()

In [None]:
rm_dict = {'col_15min': 'cmf_col_15min', 'row_15min': 'cmf_row_15min', 'lat_15min': 'cmf_lat_15min', 'lon_15min': 'cmf_lon_15min', 'index': 'cmf_rivmth_id', 'gtsm_dist':'dist'}
gdf_out = gdf_merge.rename(columns=rm_dict).drop(columns='geometry')
gdf_out.to_csv(fn_out.replace('.geojson', '.csv'))

In [None]:
rivmth_not_coupled = np.array([i for i in gdf_rivmth.rivmth_idx.values if i not in gdf_merge.rivmth_idx.values])
gdf_rivmth_not = gdf_rivmth.set_index('rivmth_idx').loc[rivmth_not_coupled, :]
gdf_rivmth_not.to_file(fn_out.replace('.geojson', '_notcoupled.geojson'), driver='GeoJSON')

In [None]:
len(rivmth_not_coupled), len(gdf_rivmth), len(gdf_merge)

In [None]:
# %matplotlib notebook
# gdf_merge['gtsm_dist'].plot(kind='hist')

In [None]:
%matplotlib notebook
import matplotlib.pyplot as plt
from gistools.plot_tools import basemap

fig, ax = basemap(figsize=(8, 4), gridlines=True)
ax=ax[0]
ax.coastlines()
# gdf_rivmth.plot(ax=ax, column='uparea', cmap='Blues', alpha=1, legend=True, vmin=1e8, vmax=1e12, s=20)
gdf_rivmth.set_index('rivmth_idx').loc[rivmth_not_coupled, :].plot(ax=ax, c='red') #column='uparea', cmap='Oranges', alpha=1, legend=True, vmin=1e8, vmax=1e12, s=15)

In [None]:
%matplotlib notebook
import matplotlib.pyplot as plt
from gistools.plot_tools import basemap

fig, ax = basemap(figsize=(8, 4), gridlines=True)
ax=ax[0]
ax.coastlines()
gdf_merge.plot(ax=ax, color='c', linewidth=3)
gdf_rivmth.set_index('rivmth_idx').loc[gdf_merge.rivmth_idx, :].plot(ax=ax, color='b')
gdf_gtsm.set_index('idx').loc[gdf_merge.gtsm_idx, :].plot(ax=ax, color='r')

write CMF input reference coupling file which is ordered the same way as the nc file. Because of the 1:n  (rivmouth to GTSM) coupling an nc files with duplicate timeseries for surge, tide and waterlevel have te be created

In [None]:
import rasterio
# save binary output data
ref_bin_fn  = join(map_dir, 'cmf_gtsm_75km.bin')
tif_fn = join(map_dir, 'cmf_gtsm_75km.tif')
fn_lonlat = join(map_dir, "lonlat.tif")

ref = np.array([])
with rasterio.open(fn_lonlat, 'r') as src:
    shape = src.shape
    profile = src.profile.copy()
    profile['count'] = 1

refmap = np.ones(shape, dtype=profile['dtype']) * -9999
for idx in gdf_merge.index:
    cama_iy, cama_ix = gdf_merge.loc[idx, 'row_15min'], gdf_merge.loc[idx, 'col_15min']
    ref = np.append(ref, cama_ix + 1) # fortran index
    ref = np.append(ref, cama_iy + 1) # fortran index
    refmap[cama_iy, cama_ix] = gdf_merge.loc[idx, 'gtsm_idx']
ref = ref.reshape(-1,2).astype(np.int32)
ref.tofile(ref_bin_fn)

# save refmap to geotiff
with rasterio.open(tif_fn, 'w', **profile) as dst:
    dst.write(refmap, 1)

ref

In [None]:
# check Hiroaki's coupling matrix
# ref_bin_fn = join(map_dir, 'ref_global.bin')
# tif_fn = join(map_dir, 'ref_global.tif')
# ref = np.fromfile(ref_bin_fn, dtype=np.int32).reshape(-1,2)

# refmap = np.ones(shape, dtype=profile['dtype']) * -9999
# for idx, (cama_ix, cama_iy) in enumerate(ref):
#     if cama_ix == -9999: continue
#     refmap[cama_iy-1, cama_ix-1] = idx

# # save refmap to geotiff
# with rasterio.open(tif_fn, 'w', **profile) as dst:
#     dst.write(refmap, 1)
    
# np.sum(ref[:, 0]!=-9999)