In [1]:
import sys
import pathlib
import os
from skmap.catalog import DataCatalog
from skmap.loader import TiledDataLoader
from skmap.overlay import SpaceOverlay, SpaceTimeOverlay
from skmap.misc import find_files, GoogleSheet, ttprint
from osgeo.gdal import BuildVRT, SetConfigOption
import random
import pandas as pd
import time
import skmap_bindings as sb
import numpy as np
from shapely.geometry import Point
from geopandas import gpd 


### prepare the data

In [2]:
# read in the data to be overlaid
df = pd.read_parquet(f'/mnt/primus/xuemeng_tmp_harbour/overlay_tmp/soil_cec.cleaned.oh_l2.pq')
# df['site_key'] = df['site_key'].astype('str')
years = df['time'].unique().tolist()
years = [int(ii) for ii in years]

### extract the layers

In [3]:
base_path = [f'http://192.168.49.{gaia_id}:8333' for gaia_id in range(30,47)]
GDAL_OPTS = {'GDAL_HTTP_VERSION': '1.0', 'CPL_VSIL_CURL_ALLOWED_EXTENSIONS': '.tif'}
max_ram_mb = 750000
n_threads = 96

# read in gsheet
gsheet_key = '/mnt/apollo/stac/gaia-319808-913d36b5fca4.json'
gsheet_url = 'https://docs.google.com/spreadsheets/d/1lNTpzdHBG5dirYj46iBDRJMk_YAV0Um2ovBc8v3dR9w/edit?gid=78425683#gid=78425683'
gsheet = GoogleSheet(gsheet_key, gsheet_url, verbose=False)

# create catalog
catalog = DataCatalog.create_catalog(catalog_def=gsheet.eu_soil_prop, years=years, base_path=base_path)
catalog.save_json('soil.prop_eu.json')

Year 2000 not available for layer wv_mcd19a2v061_n_1km_s_YYYY0101_YYYY0131_go_epsg.4326_v20230619, propagating year 2001
Year 2000 not available for layer non.photosynthetic.veg_mcd43a4.fc_m_500m_s_YYYY0101_YYYY1231_go_epsg.4326_v20240616, propagating year 2001
Year 2000 not available for layer non.photosynthetic.veg_mcd43a4.fc_mx_500m_s_YYYY0101_YYYY1231_go_epsg.4326_v20240616, propagating year 2001
Year 2000 not available for layer non.photosynthetic.veg_mcd43a4.fc_std_500m_s_YYYY0101_YYYY1231_go_epsg.4326_v20240616, propagating year 2001
Year 2000 not available for layer photosynthetic.veg_mcd43a4.fc_m_500m_s_YYYY0101_YYYY1231_go_epsg.4326_v20240616, propagating year 2001
Year 2000 not available for layer photosynthetic.veg_mcd43a4.fc_mx_500m_s_YYYY0101_YYYY1231_go_epsg.4326_v20240616, propagating year 2001
Year 2000 not available for layer photosynthetic.veg_mcd43a4.fc_std_500m_s_YYYY0101_YYYY1231_go_epsg.4326_v20240616, propagating year 2001
Year 2000 not available for layer wv_mc

### overlay

In [4]:
from shapely.geometry import Point
print('data size before overlay', df.shape)
geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])]
df = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")

data size before overlay (409617, 41)


In [5]:
## ____________________test____________________________-

start = time.time()
space_overlay = SpaceTimeOverlay(
        col_date='time',
        points=df, 
        catalog=catalog,
        raster_tiles='ard2_final_status.gpkg',
        verbose=True,
        n_threads=n_threads,
        tile_id_col='TILE')

print(f"Extraction of overlay meta-data: {(time.time() - start):.2f} s")


[10:50:01] Overlay 28199 points from 2000 in 598 raster layers
[10:50:01] Reading ard2_final_status.gpkg
[10:50:30] Scanning blocks of 598 layers
[10:50:30] Finding query pixels for 0eedd36ec93fd9c99387cc8f5c320801 (34 layers)
[10:50:30] Finding query pixels for 1111cac5fd6669660413a5daaf395e08 (11 layers)
[10:50:31] Finding query pixels for 20b4790b3a160a2ac8adefedfe07fbe7 (1 layers)
[10:50:31] Finding query pixels for 2bff5e510f1e47be04d69728dd453b6b (123 layers)
[10:50:32] Finding query pixels for 2fbfe950e4c22a02ba7e3b52884be34f (3 layers)
[10:50:32] Finding query pixels for 435d45ba442271d360c4ea7ec0c92bc6 (1 layers)
[10:50:33] Finding query pixels for 4febf6577f1f162c52ad482e075d0c76 (7 layers)
[10:50:35] Finding query pixels for 56140668da1ff98e935fb06418a942f2 (1 layers)
[10:50:37] Finding query pixels for 660960b8a39c6afeae9171be064ae057 (139 layers)
[10:50:38] Finding query pixels for 67d1c8ae1f1ff92ae4147038afb08e2b (10 layers)
[10:50:39] Finding query pixels for 6df3e8a2717

In [6]:
start = time.time()
ovelayed_data = space_overlay.run(gdal_opts=GDAL_OPTS, max_ram_mb=max_ram_mb, out_file_name="soil_overlaid.pq")
print(f"Reading overlayed layers: {(time.time() - start):.2f} s")
print(f'data size: ', ovelayed_data.shape)

[11:28:07] Running the overlay for 2000
[11:28:07] Loading and sampling 34 raster layers for group 0eedd36ec93fd9c99387cc8f5c320801
[11:28:08] Loading and sampling 11 raster layers for group 1111cac5fd6669660413a5daaf395e08
[11:28:10] Loading and sampling 1 raster layers for group 20b4790b3a160a2ac8adefedfe07fbe7
[11:28:10] Loading and sampling 123 raster layers for group 2bff5e510f1e47be04d69728dd453b6b
[11:28:12] Loading and sampling 3 raster layers for group 2fbfe950e4c22a02ba7e3b52884be34f
[11:28:14] Loading and sampling 1 raster layers for group 435d45ba442271d360c4ea7ec0c92bc6
[11:28:15] Loading and sampling 7 raster layers for group 4febf6577f1f162c52ad482e075d0c76
[11:28:16] Loading and sampling 1 raster layers for group 56140668da1ff98e935fb06418a942f2
[11:28:18] Loading and sampling 139 raster layers for group 660960b8a39c6afeae9171be064ae057
[11:28:51] Loading and sampling 10 raster layers for group 67d1c8ae1f1ff92ae4147038afb08e2b
[11:28:52] Loading and sampling 1 raster la