In [1]:
import sys
import pathlib
import os
from skmap.catalog import DataCatalog
from skmap.loader import TiledDataLoader
from skmap.overlay import SpaceOverlay, SpaceTimeOverlay
from skmap.misc import find_files, GoogleSheet, ttprint
from osgeo.gdal import BuildVRT, SetConfigOption
import random
import pandas as pd
import time
import skmap_bindings as sb
import numpy as np
from shapely.geometry import Point
from geopandas import gpd 

version = 'v20250301'

### prepare the data

In [3]:
# read in the data to be overlaid
df = pd.read_parquet('./time.series_lucas_plausible.pq')
# df['site_key'] = df['site_key'].astype('str')
years = list(np.arange(2000,2024,1))

### extract the layers

In [4]:
base_path = [f'http://192.168.49.{gaia_id}:8333' for gaia_id in range(30,47)]
GDAL_OPTS = {'GDAL_HTTP_VERSION': '1.0', 'CPL_VSIL_CURL_ALLOWED_EXTENSIONS': '.tif'}
max_ram_mb = 750000
n_threads = 96

# read in gsheet
gsheet_key = '/mnt/apollo/stac/gaia-319808-913d36b5fca4.json'
gsheet_url = 'https://docs.google.com/spreadsheets/d/1lNTpzdHBG5dirYj46iBDRJMk_YAV0Um2ovBc8v3dR9w/edit?gid=78425683#gid=78425683'
gsheet = GoogleSheet(gsheet_key, gsheet_url, verbose=False)

# create catalog
catalog = DataCatalog.create_catalog(catalog_def=gsheet.soc_change_snr, years=years, base_path=base_path, replace_group_feat_name=True)
# catalog.save_json('soil.prop_eu.json')

Year 2000 not available for layer non.photosynthetic.veg_mcd43a4.fc_m_500m_s_{year}0101_{year}1231_go_epsg.4326_v20240616, propagating year 2001
Year 2000 not available for layer non.photosynthetic.veg_mcd43a4.fc_mx_500m_s_{year}0101_{year}1231_go_epsg.4326_v20240616, propagating year 2001
Year 2000 not available for layer non.photosynthetic.veg_mcd43a4.fc_std_500m_s_{year}0101_{year}1231_go_epsg.4326_v20240616, propagating year 2001
Year 2000 not available for layer non.photosynthetic.veg_mcd43a4.fc_std_gf_500m_s_{year}0101_{year}1231_go_epsg.4326_v20250116, propagating year 2001
Year 2000 not available for layer photosynthetic.veg_mcd43a4.fc_m_500m_s_{year}0101_{year}1231_go_epsg.4326_v20240616, propagating year 2001
Year 2000 not available for layer photosynthetic.veg_mcd43a4.fc_mx_500m_s_{year}0101_{year}1231_go_epsg.4326_v20240616, propagating year 2001
Year 2000 not available for layer photosynthetic.veg_mcd43a4.fc_std_500m_s_{year}0101_{year}1231_go_epsg.4326_v20240616, propagat

### overlay

In [6]:
from shapely.geometry import Point
print('data size before overlay', df.shape)
geometry = [Point(xy) for xy in zip(df['lon_mean'], df['lat_mean'])]
df = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")

data size before overlay (10204, 29)


In [11]:
## ____________________test____________________________-

start = time.time()
space_overlay = SpaceOverlay(
        # col_date='time',
        points=df, 
        catalog=catalog,
        raster_tiles='./ard2_final_status.gpkg',
        verbose=True,
        n_threads=n_threads,
        tile_id_col='TILE')

print(f"Extraction of overlay meta-data: {(time.time() - start):.2f} s")


[10:55:45] Reading ./ard2_final_status.gpkg
[11:00:27] Scanning blocks of 2267 layers
[11:00:27] Finding query pixels for 0eedd36ec93fd9c99387cc8f5c320801 (34 layers)
[11:00:27] Finding query pixels for 1111cac5fd6669660413a5daaf395e08 (11 layers)
[11:00:29] Finding query pixels for 1c15fdb5dcc20e9188b97bbf7d255ad8 (23 layers)
[11:00:29] Finding query pixels for 20b4790b3a160a2ac8adefedfe07fbe7 (1 layers)
[11:00:30] Finding query pixels for 2bff5e510f1e47be04d69728dd453b6b (123 layers)
[11:00:30] Finding query pixels for 2fbfe950e4c22a02ba7e3b52884be34f (3 layers)
[11:00:37] Finding query pixels for 435d45ba442271d360c4ea7ec0c92bc6 (1 layers)
[11:00:42] Finding query pixels for 4febf6577f1f162c52ad482e075d0c76 (7 layers)
[11:01:00] Finding query pixels for 56140668da1ff98e935fb06418a942f2 (1 layers)
[11:01:11] Finding query pixels for 60c20556ded90f9aa6601c805ff37e11 (10 layers)
[11:01:40] Finding query pixels for 660960b8a39c6afeae9171be064ae057 (1000 layers)
[11:01:57] Finding query 

In [12]:
start = time.time()
ovelayed_data = space_overlay.run(gdal_opts=GDAL_OPTS, max_ram_mb=max_ram_mb, out_file_name=f"soc.snr_series_overlaid_{version}.pq")
print(f"Reading overlayed layers: {(time.time() - start):.2f} s")
print(f'data size: ', ovelayed_data.shape)

[11:04:35] Loading and sampling 34 raster layers for group 0eedd36ec93fd9c99387cc8f5c320801
[11:04:36] Loading and sampling 11 raster layers for group 1111cac5fd6669660413a5daaf395e08
[11:04:51] Loading and sampling 23 raster layers for group 1c15fdb5dcc20e9188b97bbf7d255ad8
[11:04:52] Loading and sampling 1 raster layers for group 20b4790b3a160a2ac8adefedfe07fbe7
[11:04:53] Loading and sampling 123 raster layers for group 2bff5e510f1e47be04d69728dd453b6b
[11:05:02] Loading and sampling 3 raster layers for group 2fbfe950e4c22a02ba7e3b52884be34f
[11:05:21] Loading and sampling 1 raster layers for group 435d45ba442271d360c4ea7ec0c92bc6
[11:05:23] Loading and sampling 7 raster layers for group 4febf6577f1f162c52ad482e075d0c76
[11:05:49] Loading and sampling 1 raster layers for group 56140668da1ff98e935fb06418a942f2
[11:06:04] Loading and sampling 10 raster layers for group 60c20556ded90f9aa6601c805ff37e11
[11:07:18] Loading and sampling 1000 raster layers for group 660960b8a39c6afeae9171b