# EXPORT

In [None]:
from pathlib import Path

input_dir = Path("../../data/raster")
output_dir = Path("../../data/parquet")
output_dir.mkdir(exist_ok=True)

raster_files = list((input_dir / 'google').glob("*.tif")) + list((input_dir / 'bing').glob("*.tif"))

In [None]:
import rasterio
import pandas as pd

from tqdm import tqdm

def raster2gpkg(input_dir):
    with rasterio.open(input_dir) as src:
        data = src.read()
        nodata = src.nodata
        transform = src.transform

    _, rows, cols = data.shape

    records = []

    for i in range(rows):
        for j in range(cols):
            values = data[:, i, j]

            if any(values == nodata):
                continue

            x, y = transform * (j + 0.5, i + 0.5)

            record = {f"B{b+1:02}": values[b] for b in range(64)}

            record['x'] = x
            record['y'] = y
            record["class"] = values[64]
            record["tile_id"] = input_dir.stem

            records.append(record)

    df = pd.DataFrame(records)

    return df if not df.empty else None

In [None]:
counter = 0

for raster_file in tqdm(raster_files):
    df = raster2gpkg(raster_file)

    if not df is None:
        df.to_parquet(output_dir / f"{counter}.pq", index=False)

        counter += 1

100%|██████████| 8098/8098 [36:05<00:00,  3.74it/s]
