In [1]:
import sys
import pathlib
import os
from skmap.catalog import DataCatalog
from skmap.loader import TiledDataLoader
from skmap.overlay import SpaceOverlay, SpaceTimeOverlay
from skmap.misc import find_files, GoogleSheet, ttprint
from osgeo.gdal import BuildVRT, SetConfigOption
import random
import pandas as pd
import time
import skmap_bindings as sb
import numpy as np
from shapely.geometry import Point
from geopandas import gpd 

version = 'v20250301'

### prepare the data

In [2]:
# read in the data to be overlaid
df = pd.read_parquet('./material/time.series_lucas_plausible.pq')
# df['site_key'] = df['site_key'].astype('str')
years = list(np.arange(2000,2024,1))

### extract the layers

In [3]:
base_path = [f'http://192.168.49.{gaia_id}:8333' for gaia_id in range(30,47)]
GDAL_OPTS = {'GDAL_HTTP_VERSION': '1.0', 'CPL_VSIL_CURL_ALLOWED_EXTENSIONS': '.tif'}
max_ram_mb = 750000
n_threads = 96

# read in gsheet
gsheet_key = '/mnt/apollo/stac/gaia-319808-913d36b5fca4.json'
gsheet_url = 'https://docs.google.com/spreadsheets/d/1lNTpzdHBG5dirYj46iBDRJMk_YAV0Um2ovBc8v3dR9w/edit?gid=78425683#gid=78425683'
gsheet = GoogleSheet(gsheet_key, gsheet_url, verbose=False)

# create catalog
catalog = DataCatalog.create_catalog(catalog_def=gsheet.soc_change_snr, years=years, base_path=base_path, replace_group_feat_name=True)
# catalog.save_json('soil.prop_eu.json')

FileNotFoundError: [Errno 2] No such file or directory: '/mnt/apollo/stac/gaia-319808-913d36b5fca4.json'

### overlay

In [4]:
from shapely.geometry import Point
print('data size before overlay', df.shape)
geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])]
df = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")

data size before overlay (412840, 59)


In [5]:
## ____________________test____________________________-

start = time.time()
space_overlay = SpaceTimeOverlay(
        col_date='time',
        points=df, 
        catalog=catalog,
        raster_tiles='./ard2_final_status.gpkg',
        verbose=True,
        n_threads=n_threads,
        tile_id_col='TILE')

print(f"Extraction of overlay meta-data: {(time.time() - start):.2f} s")


[13:06:02] Overlay 28121 points from 2000 in 404 raster layers
[13:06:02] Reading ./ard2_final_status.gpkg
[13:06:23] Scanning blocks of 404 layers
[13:06:23] Finding query pixels for 0eedd36ec93fd9c99387cc8f5c320801 (34 layers)
[13:06:23] Finding query pixels for 1111cac5fd6669660413a5daaf395e08 (11 layers)
[13:06:24] Finding query pixels for 1c15fdb5dcc20e9188b97bbf7d255ad8 (1 layers)
[13:06:24] Finding query pixels for 20b4790b3a160a2ac8adefedfe07fbe7 (1 layers)
[13:06:24] Finding query pixels for 2bff5e510f1e47be04d69728dd453b6b (123 layers)
[13:06:25] Finding query pixels for 2fbfe950e4c22a02ba7e3b52884be34f (3 layers)
[13:06:25] Finding query pixels for 435d45ba442271d360c4ea7ec0c92bc6 (1 layers)
[13:06:26] Finding query pixels for 4febf6577f1f162c52ad482e075d0c76 (7 layers)
[13:06:28] Finding query pixels for 56140668da1ff98e935fb06418a942f2 (1 layers)
[13:06:30] Finding query pixels for 60c20556ded90f9aa6601c805ff37e11 (10 layers)
[13:06:33] Finding query pixels for 660960b8a39

In [7]:
start = time.time()
ovelayed_data = space_overlay.run(gdal_opts=GDAL_OPTS, max_ram_mb=max_ram_mb, out_file_name=f"soc.snr_overlaid_{version}.pq")
print(f"Reading overlayed layers: {(time.time() - start):.2f} s")
print(f'data size: ', ovelayed_data.shape)

[12:33:02] Running the overlay for 2000
[12:33:02] Loading and sampling 34 raster layers for group 0eedd36ec93fd9c99387cc8f5c320801
[12:33:03] Loading and sampling 11 raster layers for group 1111cac5fd6669660413a5daaf395e08
[12:33:04] Loading and sampling 1 raster layers for group 1c15fdb5dcc20e9188b97bbf7d255ad8
[12:33:04] Loading and sampling 1 raster layers for group 20b4790b3a160a2ac8adefedfe07fbe7
[12:33:04] Loading and sampling 123 raster layers for group 2bff5e510f1e47be04d69728dd453b6b
[12:33:05] Loading and sampling 3 raster layers for group 2fbfe950e4c22a02ba7e3b52884be34f
[12:33:06] Loading and sampling 1 raster layers for group 435d45ba442271d360c4ea7ec0c92bc6
[12:33:07] Loading and sampling 7 raster layers for group 4febf6577f1f162c52ad482e075d0c76
[12:33:08] Loading and sampling 1 raster layers for group 56140668da1ff98e935fb06418a942f2
[12:33:09] Loading and sampling 10 raster layers for group 60c20556ded90f9aa6601c805ff37e11
[12:33:11] Loading and sampling 57 raster lay