In [1]:
import sys
import pathlib
import os
from skmap.catalog import DataCatalog
# from skmap.loader import TiledDataLoader
from skmap.overlay import SpaceOverlay, SpaceTimeOverlay
from skmap.misc import find_files, GoogleSheet, ttprint
from osgeo.gdal import BuildVRT, SetConfigOption
import random
import pandas as pd
import time
import skmap_bindings as sb
import numpy as np
from shapely.geometry import Point
from geopandas import gpd 

version = 'v20250521'
df = pd.read_parquet(f'./srs_lucas.plausible_{version}.pq')
print(df.shape)
years = list(np.arange(2000,2019,1))

(10204, 21)


### extract the layers

In [2]:
base_path = [f'http://192.168.49.{gaia_id}:8333' for gaia_id in range(30,47)]
GDAL_OPTS = {'GDAL_HTTP_VERSION': '1.0', 'CPL_VSIL_CURL_ALLOWED_EXTENSIONS': '.tif'}
max_ram_mb = 750000
n_threads = 96

# read in gsheet
gsheet_key = '/mnt/apollo/stac/gaia-319808-913d36b5fca4.json'
gsheet_url = 'https://docs.google.com/spreadsheets/d/1lNTpzdHBG5dirYj46iBDRJMk_YAV0Um2ovBc8v3dR9w/edit?gid=78425683#gid=78425683'
gsheet = GoogleSheet(gsheet_key, gsheet_url, verbose=False)

# create catalog
catalog = DataCatalog.create_catalog(catalog_def=gsheet.soc_change_snr, years=years, base_path=base_path, replace_group_feat_name=True)
catalog.save_json('overlay_snr_srs.json')

Year 2000 not available for layer non.photosynthetic.veg_mcd43a4.fc_m_500m_s_{year}0101_{year}1231_go_epsg.4326_v20240616, propagating year 2001
Year 2000 not available for layer non.photosynthetic.veg_mcd43a4.fc_mx_500m_s_{year}0101_{year}1231_go_epsg.4326_v20240616, propagating year 2001
Year 2000 not available for layer non.photosynthetic.veg_mcd43a4.fc_std_500m_s_{year}0101_{year}1231_go_epsg.4326_v20240616, propagating year 2001
Year 2000 not available for layer non.photosynthetic.veg_mcd43a4.fc_std_gf_500m_s_{year}0101_{year}1231_go_epsg.4326_v20250116, propagating year 2001
Year 2000 not available for layer photosynthetic.veg_mcd43a4.fc_m_500m_s_{year}0101_{year}1231_go_epsg.4326_v20240616, propagating year 2001
Year 2000 not available for layer photosynthetic.veg_mcd43a4.fc_mx_500m_s_{year}0101_{year}1231_go_epsg.4326_v20240616, propagating year 2001
Year 2000 not available for layer photosynthetic.veg_mcd43a4.fc_std_500m_s_{year}0101_{year}1231_go_epsg.4326_v20240616, propagat

### overlay

In [3]:
from shapely.geometry import Point
print('data size before overlay', df.shape)
geometry = [Point(xy) for xy in zip(df['lon_mean'], df['lat_mean'])]
df = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")

data size before overlay (10204, 21)


In [4]:
## ____________________test____________________________-

start = time.time()
space_overlay = SpaceOverlay(
        # col_date='time',
        points=df, 
        catalog=catalog,
        raster_tiles='./ard2_final_status.gpkg',
        verbose=True,
        n_threads=n_threads,
        tile_id_col='TILE')

print(f"Extraction of overlay meta-data: {(time.time() - start):.2f} s")


[12:53:05] Reading ./ard2_final_status.gpkg
[12:53:22] 0 out of 1737 URLs returning 404
[12:55:10] Scanning blocks of 1737 layers
[12:55:10] Finding query pixels for 0eedd36ec93fd9c99387cc8f5c320801 (34 layers)
[12:55:10] Finding query pixels for 1111cac5fd6669660413a5daaf395e08 (11 layers)
[12:55:12] Finding query pixels for 1c15fdb5dcc20e9188b97bbf7d255ad8 (19 layers)
[12:55:12] Finding query pixels for 20b4790b3a160a2ac8adefedfe07fbe7 (1 layers)
[12:55:13] Finding query pixels for 2bff5e510f1e47be04d69728dd453b6b (123 layers)
[12:55:14] Finding query pixels for 2fbfe950e4c22a02ba7e3b52884be34f (3 layers)
[12:55:20] Finding query pixels for 435d45ba442271d360c4ea7ec0c92bc6 (1 layers)
[12:55:27] Finding query pixels for 4febf6577f1f162c52ad482e075d0c76 (7 layers)
[12:55:45] Finding query pixels for 56140668da1ff98e935fb06418a942f2 (1 layers)
[12:55:57] Finding query pixels for 660960b8a39c6afeae9171be064ae057 (681 layers)
[12:56:16] Finding query pixels for 67d1c8ae1f1ff92ae4147038afb

In [5]:
start = time.time()
ovelayed_data = space_overlay.run(gdal_opts=GDAL_OPTS, max_ram_mb=max_ram_mb, out_file_name=f"srs_overlaid_{version}.pq")
print(f"Reading overlayed layers: {(time.time() - start):.2f} s")
print(f'data size: ', ovelayed_data.shape)

[12:57:46] Loading and sampling 34 raster layers for group 0eedd36ec93fd9c99387cc8f5c320801


OMP: Hint Consider unsetting KMP_DEVICE_THREAD_LIMIT (KMP_ALL_THREADS), KMP_TEAMS_THREAD_LIMIT, and OMP_THREAD_LIMIT (if any are set).


[12:57:47] Loading and sampling 11 raster layers for group 1111cac5fd6669660413a5daaf395e08
[12:57:59] Loading and sampling 19 raster layers for group 1c15fdb5dcc20e9188b97bbf7d255ad8
[12:58:00] Loading and sampling 1 raster layers for group 20b4790b3a160a2ac8adefedfe07fbe7
[12:58:00] Loading and sampling 123 raster layers for group 2bff5e510f1e47be04d69728dd453b6b
[12:58:05] Loading and sampling 3 raster layers for group 2fbfe950e4c22a02ba7e3b52884be34f
[12:58:23] Loading and sampling 1 raster layers for group 435d45ba442271d360c4ea7ec0c92bc6
[12:58:24] Loading and sampling 7 raster layers for group 4febf6577f1f162c52ad482e075d0c76
[12:58:47] Loading and sampling 1 raster layers for group 56140668da1ff98e935fb06418a942f2
[12:59:14] Loading and sampling 681 raster layers for group 660960b8a39c6afeae9171be064ae057
[14:30:26] Loading and sampling 10 raster layers for group 67d1c8ae1f1ff92ae4147038afb08e2b
[14:30:31] Loading and sampling 1 raster layers for group 6df3e8a271732db3b96070129