In [None]:
import rasterio
import geopandas as gpd
import numpy as np
from rasterstats import zonal_stats
from shapely.ops import transform as shapely_transform
import pyproj
from shapely.geometry import box, Polygon, MultiPolygon
import h3

In [None]:
AOI = "metropolia_gornoslasko-zaglebiowska_8269826.geojson" # geojson or None
MAP_HEX_SIZE = 9

In [None]:
def hex_to_polygon(h3_index):
    boundary = h3.cell_to_boundary(h3_index)
    return Polygon([(lon, lat) for lat, lon in boundary])

In [None]:
tiff_path = "GHS_BUILT_C_MSZ_E2018_GLOBE_R2023A_54009_10_V1_0_R4_C20.tif"
with rasterio.open(tiff_path) as src:
    bounds = src.bounds
    crs = src.crs
    raster_data = src.read(1)
    transform = src.transform

In [None]:
mollweide = pyproj.CRS("ESRI:54009")
wgs84 = pyproj.CRS("EPSG:4326")

In [None]:
def get_hexes_for_polygon(polygon, target_hex_size=7, fill_hex_size=10):
    polygon_points_lon_lat = polygon.exterior.coords
    polygon_points_lat_lon = tuple(coord[::-1] for coord in polygon_points_lon_lat)
    h3_polygon = h3.LatLngPoly(polygon_points_lat_lon)
    fill_hexes = h3.h3shape_to_cells(h3_polygon, fill_hex_size)
    target_hexes = {h3.cell_to_parent(h, target_hex_size) for h in fill_hexes}
    return target_hexes

In [None]:
project = pyproj.Transformer.from_crs(crs_from=mollweide, crs_to=wgs84, always_xy=True).transform

In [None]:
bbox = box(bounds.left, bounds.bottom, bounds.right, bounds.top)
bbox_wgs84 = shapely_transform(project, bbox)
bbox_coords = [(x, y) for x, y in bbox_wgs84.exterior.coords]

In [None]:
if AOI:
    gdf_aoi = gpd.read_file(AOI)
    geometry = gdf_aoi["geometry"].iloc[0]
else:
    geometry = Polygon(shell=bbox_coords)

In [None]:
geometry

In [None]:
aoi_hexes = set()

In [None]:
if isinstance(geometry, Polygon):
    aoi_hexes = get_hexes_for_polygon(polygon=geometry, target_hex_size=MAP_HEX_SIZE)
elif isinstance(geometry, MultiPolygon):
    for geom_polygon in geometry.geoms:
        aoi_hexes.update(get_hexes_for_polygon(polygon=geom_polygon, target_hex_size=MAP_HEX_SIZE))

In [None]:
hex_polygons = [hex_to_polygon(h) for h in aoi_hexes]
gdf = gpd.GeoDataFrame({'h3_index': list(aoi_hexes)}, geometry=hex_polygons, crs="EPSG:4326")
gdf.head()

In [None]:
gdf = gdf.to_crs(mollweide)
gdf.head()

In [None]:
cmap = {
    0: "other",
    1: "low vegetation",
    2: "medium vegetation",
    3: "high vegetation",
    4: "water",
    5: "road",
    11: "residential 1",
    12: "residential 2",
    13: "residential 3",
    14: "residential 4",
    15: "residential 5",
    21: "non-residential 1",
    22: "non-residential 2",
    23: "non-residential 3",
    24: "non-residential 4",
    25: "non-residential 5",
}

In [None]:
stats = zonal_stats(gdf, tiff_path, categorical=True, category_map=cmap, geojson_out=True)

In [None]:
stats_gdf = gpd.GeoDataFrame.from_features(stats)
stats_gdf.head()

In [None]:
data_columns = [
    "other",
    "low vegetation",
    "medium vegetation",
    "high vegetation",
    "water",
    "road",
    "residential 1",
    "residential 2",
    "residential 3",
    "residential 4",
    "residential 5",
    "non-residential 1",
    "non-residential 2",
    "non-residential 3",
    "non-residential 4",
    "non-residential 5",
]

In [None]:
gdf_output = stats_gdf[["h3_index"] + data_columns]
gdf_output = gdf_output.fillna(0)
if gdf_output.size > 0:
    gdf_output[data_columns] = gdf_output[data_columns].astype("int")
    gdf_output.head()

In [None]:
if gdf_output.size > 0:
    print("Exporting data to .csv file")
    gdf_output.to_csv(tiff_path.replace(".tif", ".csv"), header=False, index=False)
else:
    print("No data to export")