https://github.com/uber/h3-py

https://deck.gl/examples/global-grids


```
conda config --add channels conda-forge
conda install h3-py
```

In [None]:
import h3
import pyarrow.dataset as ds
from tqdm import tqdm

# Higher resolution means smaller bins
resolution = 4

shipTypes = ["Cargo", "Passenger", "Tanker"]

dataPath = "/home/joajohan/ecommons/ais_global_2025"
dataset = ds.dataset(dataPath)

Start by inspecting the dataset:

In [None]:
next(dataset.to_batches()).to_pandas()

Find the total number of rows

In [None]:
totalRowCount = sum(batch.num_rows for batch in dataset.to_batches(columns=[]))
totalRowCount

In [None]:
# Helpers

def shipTypeFromNumber(i):
    if i >= 60:
        if i <= 69:
            return "Passenger"
        if i <= 79:
            return "Cargo"
        if i <= 89:
            return "Tanker"
    raise ValueError("Unknown ship type number: {}".format(i))

def iterateRow(dataset, columns):
    for batch in dataset.to_batches(columns=columns):
        for r in batch.to_pylist():
            yield r

In [None]:
grid = {}

for r in tqdm(iterateRow(
    dataset,
    ["lat", "lon", "ais_ship_type"]
), total=totalRowCount):
    shipType = shipTypeFromNumber(r['ais_ship_type'])
    cellId = h3.latlng_to_cell(
        r['lat'],
        r['lon'],
        resolution
    )
    # Add to count
    if (cellId not in grid):
        grid[cellId] = {}
    if shipType not in grid[cellId]:
        grid[cellId][shipType] = 0
    grid[cellId][shipType] += 1

In [None]:
import csv
def writeOutput(path, grid):
    with open(path, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        # Write header
        writer.writerow(['id'] + ["count"+s for s in shipTypes])
        # Write rows
        for i in grid.items():
            [id, counts] = i
            for s in shipTypes:
                if not s in counts:
                    counts[s] = 0
            writer.writerow([id]+[v for v in counts.values()])

In [None]:
writeOutput("../map/counts.csv", grid)