In [1]:
from pathlib import Path
import numpy as np

import geopandas as gpd
import h3pandas
import pandas as pd
import rasterio
from shapely.geometry import Point


In [9]:
output_dir = Path('../data/output')
output_dir.parents[0].mkdir(parents=True, exist_ok=True)
discrete_dir = Path('../data/discrete')
continuous_dir = Path('../data/continuous')

In [7]:
def h3index_raster(file, output_dir, stem=None, operation='max'):
    with rasterio.open(file) as src:
        array = src.read(1)
        transform = src.transform
    height, width = array.shape
    cols, rows = np.meshgrid(np.arange(width), np.arange(height))
    xs, ys = rasterio.transform.xy(src.transform, rows.flatten(), cols.flatten())
    dstack = np.dstack((xs, ys))[0]
    pts = [Point(*c) for c in dstack]
    df_data = zip(pts, array.flatten())
    df = gpd.GeoDataFrame(zip(array.flatten(), pts), columns=['value','geometry'], crs='EPSG:2193')
    df = df.to_crs(4326)
    h3 = df.h3.geo_to_h3_aggregate(7, operation=operation)
    h3.to_parquet(f'{(output_dir)}/{stem or ''}_{(file.stem)}.parquet')

In [10]:
%%time
mode = lambda x: pd.Series.mode(x)[0]
for file in list(sorted(discrete_dir.glob('*.asc')))[:10]:
    h3index_raster(file, output_dir, stem='discrete', operation=mode)

CPU times: user 3.09 s, sys: 49.3 ms, total: 3.14 s
Wall time: 3.17 s


In [12]:
%%time
for file in list(sorted(continuous_dir.glob('*.asc')))[:10]:
    h3index_raster(file, output_dir, stem='continuous', operation='mean')

CPU times: user 2.66 s, sys: 31.1 ms, total: 2.69 s
Wall time: 2.73 s
