In [1]:
import rasterio
import geopandas as gpd
from shapely.geometry import Point
import h3pandas
from pathlib import Path
import numpy as np
import pandas as pd

In [2]:
output_dir = Path('combined_h3')

In [3]:
%%time
input_dir = Path('discrete')
r_files = list(sorted(input_dir.glob('*.asc')))
for file in r_files:
    with rasterio.open(file) as src:
        array = src.read(1)
        transform = src.transform
    rows, cols = array.shape
    xs, ys = np.indices((cols, rows))  
    xs_flat = xs.flatten()
    ys_flat = ys.flatten()
    values_flat = array.flatten()
    coords = rasterio.transform.xy(transform, xs_flat, ys_flat)
    points = list(zip(coords[0], coords[1], values_flat))
    geometry = [Point(x, y) for x, y, _ in points]
    df = gpd.GeoDataFrame(points, columns=['X', 'Y', 'Value'], geometry=geometry)
    df=df.drop(columns=['X','Y']).set_crs(2193)
    df['Value']=pd.to_numeric(df['Value'], downcast='integer')
    df=df.to_crs(4326)
    h3=df.h3.geo_to_h3_aggregate(12, operation='max')
    h3.to_parquet(f'{(output_dir)}/discrete_{(file.stem)}.parquet')

CPU times: total: 36.7 s
Wall time: 1min 56s


In [4]:
%%time
input_dir = Path('continuous')
r_files = list(sorted(input_dir.glob('*.asc')))
for file in r_files:
    with rasterio.open(file) as src:
        array = src.read(1)
        transform = src.transform
    rows, cols = array.shape
    xs, ys = np.indices((cols, rows))  
    xs_flat = xs.flatten()
    ys_flat = ys.flatten()
    values_flat = array.flatten()
    coords = rasterio.transform.xy(transform, xs_flat, ys_flat)
    points = list(zip(coords[0], coords[1], values_flat))
    geometry = [Point(x, y) for x, y, _ in points]
    df = gpd.GeoDataFrame(points, columns=['X', 'Y', 'Value'], geometry=geometry)
    df=df.drop(columns=['X','Y']).set_crs(2193)
    df=df.to_crs(4326)
    h3=df.h3.geo_to_h3_aggregate(12, operation='mean')
    h3.to_parquet(f'{(output_dir)}/continuous_{(file.stem)}.parquet')

CPU times: total: 31.3 s
Wall time: 1min 46s
