In [None]:
from os.path import join, basename, dirname
import numpy as np
import os
import pandas as pd
from os.path import join
# local libraries
from cmftools import get_outlets, get_catchments

In [None]:
# spatial libraries
import geopandas as gp 
from  shapely.geometry import Point
import rasterio
import numpy as np

def sample_map(x, y, fn_map, fn_catmxy=None, layer=1):
    """
    """
    if not os.path.isfile(fn_map): 
        raise IOError("{} file not found".format(fn_map))
    with rasterio.open(fn_map, 'r') as src:
        if fn_catmxy is None:
            # assume low resolution lat lon coordinates are given
            r, c = src.index(x, y)
            r, c = np.atleast_1d(r).astype(int), np.atleast_1d(c).astype(int)
            nrows, ncols = src.shape
            valid = np.logical_and.reduce((r>=0, r<nrows, c>=0, c<ncols))
        else:
            # convert to low resolution row col using catmxy index
            r, c, valid = model_index(x, y, fn_catmxy)
        # if the fill domain fits into memory that's faster than using the rasterio sample function
        sample = np.ones(r.size, dtype=src.dtypes[layer-1])*np.nan
        sample[valid] = src.read(layer)[r[valid], c[valid]]
    return sample

## update 191203

add more metadata from CMF map derived with pyflwdir package

In [None]:
# spatial libraries
import rasterio.transform
# flow direction library
import pyflwdir # https://gitlab.com/deltares/wflow/pyflwdir

In [None]:
# read some cama-flood maps and parse flow direction data

map_dir = r'/home/dirk/models/cama-flood_bmi_v3.6.2_nc/map/global_15min'

fn_nextxy = join(map_dir, 'nextxy.tif')
with rasterio.open(fn_nextxy, 'r') as src:
    nextxy = src.read()
    transform = src.transform
    flw = pyflwdir.FlwdirRaster(data=nextxy, ftype='nextxy')
basins = flw.basins().astype(np.int32)
with rasterio.open(join(map_dir, 'elevtn.tif'), 'r') as src:
    elevtn = src.read(1)
with rasterio.open(join(map_dir, 'rivlen_grid.tif'), 'r') as src:
    rivlen = src.read(1)
with rasterio.open(join(map_dir, 'grarea.tif'), 'r') as src:
    grarea = src.read(1)
    profile = src.profile
    
profile.update(dtype=np.int32, nodata=-9999)
with rasterio.open(join(map_dir, 'flw_basins.tif'), 'w', **profile) as dst:
    dst.write(basins, 1)

In [None]:
# read simulation (meta) data
root = r'/scratch/compound_hotspots/'
ddir = join(root, 'data', '4-postprocessed')

fn_csv_coupling = join(root, 'src', '1-prepare', r'cmf_gtsm_75km_update191017.csv')
fn_csv_coupling2 = join(root, 'src', '1-prepare', r'cmf_gtsm_75km_update191203.csv')

coupling = pd.read_csv(fn_csv_coupling, index_col='index')
lon, lat = coupling['cmf_lon_15min'], coupling['cmf_lat_15min']
sample_dict = {}
sample_dict['basin_idx'] = join(map_dir, 'flw_basins.tif')
for name in sample_dict:
    coupling[name] = sample_map(lon, lat, sample_dict[name]).astype(np.int)
coupling_sel.to_csv(fn_csv_coupling2, float_format='%.6f')


## update 191019
fix selections of stations with too large uparea

In [None]:
# fixing error in uparea of earlier files
root = r'/scratch/compound_hotspots'
ddir = join(root, 'data')
fn_csv_coupling = join(root, 'src', '1-prepare', r'cmf_gtsm_75km.csv')
fn_csv_coupling2 = join(root, 'src', '1-prepare', r'cmf_gtsm_75km_update191017.csv')
fn_csv_rivmth = join(root, 'src', '1-prepare', r'rivmth_upa1e+09_dist1e+04_update191017.csv')

rivmth = pd.read_csv(fn_csv_rivmth, index_col=0).drop(columns=['col_15min', 'row_15min', 'lat_15min', 'lon_15min'])
coupling = pd.read_csv(fn_csv_coupling, index_col='rivmth_idx').drop(columns=['cmf_col_15min', 'cmf_row_15min'])
coupling_sel = coupling.reindex(rivmth.index.values).dropna(axis=0)
rivmth_sel = rivmth.reindex(coupling_sel.index)
for col in rivmth.columns:
    coupling_sel[col] = rivmth_sel[col]
coupling_sel['rivwth'] = np.round(coupling_sel['rivwth'],0)
coupling_sel['dist'] = np.round(coupling_sel['dist'],0)
coupling_sel = coupling_sel.reset_index()
for name in ['couple_id', 'gtsm_idx', 'rivwth', 'rivmth_idx', 'dist']:
    coupling_sel[name] = coupling_sel[name].astype(int)
coupling_sel = coupling_sel.set_index('couple_id').sort_index()
coupling_sel.index.name = 'index'
coupling_sel.to_csv(fn_csv_coupling2, float_format='%.6f')
# coupling_sel

In [None]:
# write list with locations which are incorrectly used
fn_csv_coupling2 = join(root, 'src', '1-prepare', r'cmf_gtsm_75km_update191017_wrong.csv')
coupling = pd.read_csv(fn_csv_coupling, index_col='rivmth_idx').drop(columns=['cmf_col_15min', 'cmf_row_15min'])

idx_false = [idx for idx in coupling.index if idx not in rivmth.index]
coupling_false = coupling.reindex(idx_false).dropna(axis=0).drop(columns=['uparea', 'rivwth'])
coupling_false['dist'] = np.round(coupling_false['dist'],0)
coupling_false = coupling_false.reset_index()
for name in ['couple_id', 'gtsm_idx', 'rivmth_idx', 'dist']:
    coupling_false[name] = coupling_false[name].astype(int)
coupling_false = coupling_false.set_index('couple_id').sort_index()
coupling_false.index.name = 'index'
coupling_false.to_csv(fn_csv_coupling2, float_format='%.6f')

## previous

Retrieve CMF outlet locations and sample attributes (uparea, elevtn, dist2coast) from map.

In [None]:
# get outlets
outlets = get_outlets(fn_nextxy, fn_lonlat, sample_dict={}, fn_out=fn_outlets_all, res_str='15min')
print(len(outlets))
outlets.head()

In [None]:
map_dir = r'/home/dirk/models/cama-flood_bmi_v3.6.2_nc/map/global_15min'
NE_dir = r'/home/dirk/datasets/NaturalEarth'
# I/O
fn_nextxy = join(map_dir, "nextxy.tif")
fn_lonlat = join(map_dir, "lonlat.tif")
fn_outlets_all = join(map_dir, 'rivmth_all.txt')
# sample metadata from
names = ['uparea', 'rivwth', 'rivhgt', 'elevtn']
sample_dict = {name: join(map_dir, "{}.tif".format(name)) for name in names}
sample_dict

In [None]:
# fn_outlets = join(map_dir, 'rivmth.txt')
min_uparea=1e9 # 1000 km2
max_dist=10e3 # 10km
fn_outlets = join(map_dir, 'rivmth_upa{:.0e}_dist{:.0e}.txt'.format(min_uparea, max_dist))

outlets = pd.read_csv(fn_outlets, index_col=0)
#15 min res
lon, lat = outlets['lon_15min'], outlets['lat_15min']
for name in sample_dict:
    outlets[name] = sample_map(lon, lat, sample_dict[name])
# highres
lon, lat = outlets['lon'], outlets['lat']
outlets['dist2coast'] = sample_map(lon, lat, join(NE_dir, 'ne_10m_dist2coast_eucl_1min.tif'))

In [None]:
# # filter
min_uparea=1e9 # 1000 km2
max_dist=10e3 # 10km
fn_outlets = join(map_dir, 'rivmth_upa{:.0e}_dist{:.0e}_update191017.csv'.format(min_uparea, max_dist))
outlets_select = outlets[np.logical_and(outlets['uparea'].values>min_uparea, outlets['dist2coast'].values<max_dist)]
outlets_select['uparea'] = outlets_select['uparea'] / 1e6
outlets_select.to_csv(fn_outlets, float_format='%.6f')

# outlets_select['uparea'] = outlets_select['uparea'] / 1e6
outlets_select.sort_values(by='uparea', ascending=False)