# Load waterbodies

This notebook loads waterbodies time series surface areas and other features. The end result is an HDF5 file organised by drainage region.

## Setup

In [3]:
%config IPython.use_jedi = False

### Load modules

In [4]:
%matplotlib widget

from pathlib import Path

import joblib
import fiona
import numpy as np
import geopandas as gpd
import pandas as pd
from tqdm.notebook import tqdm
import h5py

### Load data

In [5]:
# waterbody_shp_path = Path('/g/data/r78/cek156/dea-notebooks/Scientific_workflows/DEAWaterbodies/AusAllTime01-005HybridWaterbodies/AusWaterBodiesFINAL.shp')
waterbody_shp_path = Path('/g/data/r78/cek156/dea-notebooks/Scientific_workflows/DEAWaterbodies/NLIDGGSData/DEAwaterbody_withStreamData_andGAwaterbodynames.shp')
waterbody_csv_path = Path('/g/data/r78/cek156/dea-notebooks/Scientific_workflows/DEAWaterbodies/timeseries_aus_uid/')
surface_area_threshold = 50

In [6]:
waterbody_shapes = gpd.read_file(waterbody_shp_path).to_crs('EPSG:3577')

Join with the BOM drainage divisions. I grabbed these from the v2.1.1 Geofabric Reporting Regions and converted them from gdb + WGS84 to GeoJSON + Australian Albers in QGIS.

In [7]:
drainage = gpd.read_file('bom_drainagedivisions_v2p1p1.geojson')

Join these with the BOM river regions. Same source as the above.

In [8]:
waterbody_shapes = gpd.sjoin(waterbody_shapes, drainage, how='left', op='within', lsuffix='', rsuffix='_bom_drainage')

In [9]:
riverregions = gpd.read_file('bom_riverregions_v2p1p1.geojson')

In [10]:
waterbody_shapes = gpd.sjoin(waterbody_shapes, riverregions, how='left', op='within', lsuffix='', rsuffix='_bom_riverregions')

Finally join these with the GA 1997 River Basins dataset, aggregated by basin to remove states.

In [11]:
basins = gpd.read_file('river_basins_ga_1997_aggregated.geojson')

In [12]:
waterbody_shapes = gpd.sjoin(waterbody_shapes, basins, how='left', op='within', lsuffix='', rsuffix='_ga_basins')

Filter out the columns we care about.

In [13]:
columns = [
    'UID', 'Stream', 'Hierarchy', 'Perennial', 'FEATURETYP', 'TEXTNOTE', 'geometry', 'DivNumber', 'Division_', 'RivRegNum', 'RivRegName',
    'BNAME', 'BNUM', 'RNAME', 'RNUM', 'DNAME', 'DNUM']

In [14]:
waterbody_shapes = waterbody_shapes[columns]

In [15]:
waterbody_shapes.head()

Unnamed: 0,UID,Stream,Hierarchy,Perennial,FEATURETYP,TEXTNOTE,geometry,DivNumber,Division_,RivRegNum,RivRegName,BNAME,BNUM,RNAME,RNUM,DNAME,DNUM
0,rj25uzs2b,,Minor,Non Perennial,Aquaculture Area,aquafarm abandoned,"POLYGON ((349050.000 -1570100.000, 349075.000 ...",12,Carpentaria Coast,4,ROPER RIVER,ROPER RIVER,3,ROPER,A,GULF OF CARPENTARIA,IX
1,rj2hhbdvq,,Minor,Non Perennial,Aquaculture Area,aquafarm abandoned,"POLYGON ((349375.000 -1569775.000, 349425.000 ...",12,Carpentaria Coast,4,ROPER RIVER,ROPER RIVER,3,ROPER,A,GULF OF CARPENTARIA,IX
2,rj25ur05e,,,,Aquaculture Area,aquafarm abandoned,"POLYGON ((346950.000 -1570150.000, 346975.000 ...",12,Carpentaria Coast,4,ROPER RIVER,ROPER RIVER,3,ROPER,A,GULF OF CARPENTARIA,IX
3,rj25ux1z6,,,,Aquaculture Area,aquafarm abandoned,"POLYGON ((347825.000 -1570650.000, 347900.000 ...",12,Carpentaria Coast,4,ROPER RIVER,ROPER RIVER,3,ROPER,A,GULF OF CARPENTARIA,IX
4,rj25uxbfx,,,,Aquaculture Area,aquafarm abandoned,"POLYGON ((347600.000 -1570000.000, 347625.000 ...",12,Carpentaria Coast,4,ROPER RIVER,ROPER RIVER,3,ROPER,A,GULF OF CARPENTARIA,IX


Where there was only partial intersection, waterbodies have not been allocated to a drainage division, river region, or basin. Remedy this by manually iterating through and assigning them to the region with maximum overlap.

In [16]:
missing_D = waterbody_shapes.Division_.isnull()
missing_R = waterbody_shapes.RivRegNum.isnull()
missing_B = waterbody_shapes.BNAME.isnull()

print(missing_D.sum(), 'waterbodies are missing divisions')
print(missing_R.sum(), 'waterbodies are missing rivers')
print(missing_B.sum(), 'waterbodies are missing basins')

2925 waterbodies are missing divisions
3486 waterbodies are missing rivers
2036 waterbodies are missing basins


In [17]:
subjoin_D = gpd.sjoin(waterbody_shapes[missing_D], drainage, how='left', op='intersects')
# Take the first one, whatever it is.
subjoin_D = subjoin_D.reset_index().drop_duplicates(subset=['UID'], keep='first')
subjoin_D = subjoin_D.set_index('UID')

In [18]:
subjoin_R = gpd.sjoin(waterbody_shapes[missing_R], drainage, how='left', op='intersects')
subjoin_R = subjoin_R.reset_index().drop_duplicates(subset=['UID'], keep='first')
subjoin_R = subjoin_R.set_index('UID')

subjoin_B = gpd.sjoin(waterbody_shapes[missing_B], drainage, how='left', op='intersects')
subjoin_B = subjoin_B.reset_index().drop_duplicates(subset=['UID'], keep='first')
subjoin_B = subjoin_B.set_index('UID')

In [19]:
waterbody_shapes = waterbody_shapes.set_index('UID')

In [20]:
waterbody_shapes.loc[subjoin_D.index]['DivNumber'] = subjoin_D.DivNumber_right
waterbody_shapes.loc[subjoin_D.index]['Division_'] = subjoin_D.Division

In [21]:
waterbody_shapes

Unnamed: 0_level_0,Stream,Hierarchy,Perennial,FEATURETYP,TEXTNOTE,geometry,DivNumber,Division_,RivRegNum,RivRegName,BNAME,BNUM,RNAME,RNUM,DNAME,DNUM
UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
rj25uzs2b,,Minor,Non Perennial,Aquaculture Area,aquafarm abandoned,"POLYGON ((349050.000 -1570100.000, 349075.000 ...",12,Carpentaria Coast,4,ROPER RIVER,ROPER RIVER,3,ROPER,A,GULF OF CARPENTARIA,IX
rj2hhbdvq,,Minor,Non Perennial,Aquaculture Area,aquafarm abandoned,"POLYGON ((349375.000 -1569775.000, 349425.000 ...",12,Carpentaria Coast,4,ROPER RIVER,ROPER RIVER,3,ROPER,A,GULF OF CARPENTARIA,IX
rj25ur05e,,,,Aquaculture Area,aquafarm abandoned,"POLYGON ((346950.000 -1570150.000, 346975.000 ...",12,Carpentaria Coast,4,ROPER RIVER,ROPER RIVER,3,ROPER,A,GULF OF CARPENTARIA,IX
rj25ux1z6,,,,Aquaculture Area,aquafarm abandoned,"POLYGON ((347825.000 -1570650.000, 347900.000 ...",12,Carpentaria Coast,4,ROPER RIVER,ROPER RIVER,3,ROPER,A,GULF OF CARPENTARIA,IX
rj25uxbfx,,,,Aquaculture Area,aquafarm abandoned,"POLYGON ((347600.000 -1570000.000, 347625.000 ...",12,Carpentaria Coast,4,ROPER RIVER,ROPER RIVER,3,ROPER,A,GULF OF CARPENTARIA,IX
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
rhehehne4,FIERY CREEK,Major,Non Perennial,,,"POLYGON ((773425.000 -2041850.000, 773425.000 ...",12,Carpentaria Coast,13,NICHOLSON-LEICHHARDT RIVERS,LEICHHARDT RIVER,13,LEICHHARDT,D,GULF OF CARPENTARIA,IX
rhehtm1un,SANDY CREEK,Major,Non Perennial,,,"POLYGON ((783350.000 -2040950.000, 783400.000 ...",12,Carpentaria Coast,13,NICHOLSON-LEICHHARDT RIVERS,LEICHHARDT RIVER,13,LEICHHARDT,D,GULF OF CARPENTARIA,IX
rhehttv5k,,Minor,Non Perennial,,,"POLYGON ((784725.000 -2040875.000, 784750.000 ...",12,Carpentaria Coast,13,NICHOLSON-LEICHHARDT RIVERS,LEICHHARDT RIVER,13,LEICHHARDT,D,GULF OF CARPENTARIA,IX
rhehm1tgq,,Minor,Non Perennial,,,"POLYGON ((782150.000 -2048100.000, 782175.000 ...",12,Carpentaria Coast,13,NICHOLSON-LEICHHARDT RIVERS,LEICHHARDT RIVER,13,LEICHHARDT,D,GULF OF CARPENTARIA,IX


In [22]:
waterbody_shapes.loc[subjoin_R.index]['RivRegNum'] = subjoin_R.RivRegNum
waterbody_shapes.loc[subjoin_R.index]['RivRegName'] = subjoin_R.RivRegName

In [23]:
waterbody_shapes.loc[subjoin_B.index]['BNAME'] = subjoin_B.BNAME
waterbody_shapes.loc[subjoin_B.index]['BNUM'] = subjoin_B.BNUM
waterbody_shapes.loc[subjoin_B.index]['RNAME'] = subjoin_B.RNAME
waterbody_shapes.loc[subjoin_B.index]['RNUM'] = subjoin_B.RNUM
waterbody_shapes.loc[subjoin_B.index]['DNAME'] = subjoin_B.DNAME
waterbody_shapes.loc[subjoin_B.index]['DNUM'] = subjoin_B.DNUM

In [133]:
waterbody_shapes.to_file('waterbodies_joined_drainage_basins.shp')

Load the time series data for each waterbody.

In [10]:
all_time_series = []
for i, shape in tqdm(waterbody_shapes.iterrows(), total=len(waterbody_shapes)):
    uid = shape.UID
    csv_path = waterbody_csv_path / uid[:4] / f'{uid}.csv'
    try:
        time_series = pd.read_csv(csv_path)
    except FileNotFoundError:
        print('Couldn\'t find', uid)
        time_series = all_time_series[-1].copy()
        time_series['pc_wet'] = np.nan
        time_series['px_wet'] = np.nan
    # Relabel the third column to something consistent, and rename all columns to something
    # easier to access.
    time_series.rename(columns={
        'Observation Date': 'date',
        'Wet pixel percentage': 'pc_wet',
        time_series.columns[2]: 'px_wet',
        }, inplace=True)
    # Convert time strings into datetimes.
    time_series.date = pd.to_datetime(time_series.date)
    # Store the actual number of pixels too.
    n_pixels = shape.geometry.area // (25 ** 2)
    time_series.attrs['px_tot'] = n_pixels  # attrs is experimental.
    all_time_series.append(time_series)

HBox(children=(FloatProgress(value=0.0, max=295902.0), HTML(value='')))




KeyboardInterrupt: 

Or if those are already loaded...

In [24]:
all_time_series = joblib.load('all_time_series.joblib')

In [25]:
len(all_time_series)

295902

In [26]:
waterbodies = waterbody_shapes

In [27]:
assert len(all_time_series) == len(waterbody_shapes)

## Interpolate histories

The next bit of code is memory-intensive, so only operate on one drainage division at a time.

In [28]:
divisions = waterbodies.Division_.unique()
divisions = [d if isinstance(d, str) else 'None' for d in divisions]
divisions

['Carpentaria Coast',
 'None',
 'Pilbara-Gascoyne',
 'Tanami-Timor Sea Coast',
 'South Australian Gulf',
 'Tasmania',
 'South West Coast',
 'South East Coast (Victoria)',
 'South East Coast (NSW)',
 'Murray-Darling Basin',
 'Lake Eyre Basin',
 'South Western Plateau',
 'North East Coast',
 'North Western Plateau']

Define the time range to interpolate over:

In [29]:
dates = np.arange(np.datetime64('1986-08-16'), np.datetime64('2020-07-19'), 1)

Initialise the HDF5 file.

In [30]:
n_all_basins = 0

In [31]:
with h5py.File('interpolated_waterbodies_by_division_and_basin_fixeduids.h5', 'w') as f:
    for d in divisions:
        print(d)
        g = f.require_group(d)
        for basin in waterbodies[waterbodies.Division_ == d].BNAME.unique():
            if basin is None:
                basin = 'NONE'
            try:
                g.require_group(basin)
            except AttributeError:
                # nan - there was no matching basin
                basin = 'NONE'
                g.require_group(basin)
            print(f'\t/{basin}')
            n_all_basins += 1

Carpentaria Coast
	/ROPER RIVER
	/EMBLEY RIVER
	/NONE
	/WATSON RIVER
	/ARCHER RIVER
	/COLEMAN RIVER
	/HOLROYD RIVER
	/FLINDERS RIVER
	/LEICHHARDT RIVER
	/MORNING INLET
	/NICHOLSON RIVER
	/GILBERT RIVER
	/NORMAN RIVER
	/STAATEN RIVER
	/MITCHELL RIVER (WA)
	/HERBERT RIVER
	/TORRES STRAIT ISLANDS
	/McARTHUR RIVER
	/GROOTE EYLANDT
	/CALVERT RIVER
	/WALKER RIVER
	/SETTLEMENT CREEK
	/MORNINGTON ISLAND
	/LIMMEN BIGHT RIVER
	/KOOLATONG RIVER
	/ROSIE RIVER
	/ROBINSON RIVER
	/TOWNS RIVER
	/BARKLY
	/DALY RIVER
	/DUCIE RIVER
	/JARDINE RIVER
	/WENLOCK RIVER
	/BUCKINGHAM RIVER
	/GEORGINA RIVER
	/WISO
None
Pilbara-Gascoyne
	/GREENOUGH RIVER
	/PORT HEDLAND COAST
	/ONSLOW COAST
	/LYNDON-MINILYA RIVERS
	/WOORAMEL RIVER
	/GASCOYNE RIVER
	/FORTESCUE RIVER
	/MURCHISON RIVER
	/SALT LAKE
	/ASHBURTON RIVER
	/NONE
	/YARRA YARRA LAKES
	/SANDY DESERT
Tanami-Timor Sea Coast
	/FINNISS RIVER
	/ADELAIDE RIVER
	/MARY RIVER (WA)
	/EAST ALLIGATOR RIVER
	/MACKAY
	/DALY RIVER
	/WISO
	/NONE
	/LENNARD RIVER
	/DRYSDALE RIVE

Then do the interpolation for each basin.

In [None]:
dt_index = pd.DatetimeIndex(dates)

for (div, basin), wbs in tqdm(waterbodies.groupby(['Division_', 'BNAME']), total=n_all_basins):
    in_division_indices = np.arange(len(all_time_series))[(waterbodies.Division_ == div) & (waterbodies.BNAME == basin)]
#     in_division_wbs = waterbodies[waterbodies.Division__dea_wb == division]
    
    if basin is None:
        basin = 'NONE'

    # Initialise the HDF5 array.
    with h5py.File('interpolated_waterbodies_by_division_and_basin_fixeduids.h5', 'r+') as f:
        group = f[div][basin]
        hds_pc = group.require_dataset('pc_wet', (len(wbs), len(dt_index)), dtype='float16')
        hds_uid = group.require_dataset('uid', data=wbs.index.values.astype('S9'), dtype='S9', shape=wbs.index.shape)

        # Round every date to the nearest day and set date to be the index.
        # Note that we also have to drop the timezone, which pandas assumes is UTC.
        # If pandas did not assume it was UTC - maybe it assumed UTC+11 for example - then this would also do
        # a conversion into UTC, which is probably not what we want.
        for i, history_i in enumerate(tqdm(in_division_indices, position=1, leave=False)):
            history = all_time_series[history_i]
            history.date = history.date.dt.round('1d')
            history = history.set_index('date', drop=True)
            history.index = history.index.tz_convert(None)
            # Merge duplicate dates into one.
            history = history.groupby('date').mean()
            # Then reindex with the full list of dates.
            history = history.reindex(dt_index) 
            # Linearly interpolate.
            history = history.interpolate(limit_direction='both')
            # Finally, store it in the HDF5 dataset.
            hds_pc[i] = history.pc_wet.astype('float32')

HBox(children=(FloatProgress(value=0.0, max=304.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=845.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=586.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1752.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=351.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=861.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=3285.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1997.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=189.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=683.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=137.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=605.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1320.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=530.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=5539.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=328.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=971.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1676.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2569.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1143.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2466.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=499.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1438.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2492.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=53.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=539.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=639.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=140.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2937.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=9158.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=9178.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1675.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=73.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=3729.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=462.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1071.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=5511.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=214.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=515.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=600.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=4417.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=938.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=321.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=597.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=598.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=9196.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2323.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2095.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1905.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=244.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=3187.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1491.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2233.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=3494.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1202.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=37.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=971.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2425.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=5421.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2294.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=973.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=6949.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2129.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=901.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=608.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=114.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=153.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1294.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=4878.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1727.0), HTML(value='')))

In [None]:
dates_str = list(map(str, dt_index.values.astype('datetime64[D]')))

In [None]:
with h5py.File('interpolated_waterbodies_by_division_and_basin_fixeduids.h5', 'r+') as f:
    f['dates'] = np.array(dates_str).astype('S10')

## Verification

Check everything is loaded OK:

In [27]:
waterbodies = wbs

In [28]:
waterbody_shapes = waterbodies

In [44]:
h5 = h5py.File('interpolated_waterbodies_by_division_and_basin.h5', 'r')

In [45]:
divbasins = {i for i in wbs[['Division_', 'BNAME']].itertuples(index=False)}

In [54]:
for db in divbasins:
    z = h5[db.Division_ or 'None'][db.BNAME]
    assert z['pc_wet'][()].any()

In [55]:
h5.close()