In [1]:
import sys
import pathlib
import os
from skmap.catalog import s3_setup, DataCatalog
from skmap.loader import TiledDataLoader
from skmap.overlay import SpaceOverlay, SpaceTimeOverlay
from skmap.misc import find_files, GoogleSheet, ttprint
import random
import pandas as pd
import time
import skmap_bindings as sb
import numpy as np
# warnings.filterwarnings('default')
folder = '/mnt/primus/xuemeng_tmp_harbour/data/'
data_file = f'{folder}/000_data_ocd.pq'
df = pd.read_parquet(data_file)
years = df['time'].unique().tolist()
years = [int(ii) for ii in years]

base_path = [f'http://192.168.49.{gaia_id}:8333' for gaia_id in range(30,47)]
GDAL_OPTS = {'GDAL_HTTP_VERSION': '1.0', 'CPL_VSIL_CURL_ALLOWED_EXTENSIONS': '.tif'}
max_ram_mb = 500000
n_threads = 96

gsheet_key = '/mnt/apollo/stac/gaia-319808-913d36b5fca4.json'
gsheet_url = 'https://docs.google.com/spreadsheets/d/1lNTpzdHBG5dirYj46iBDRJMk_YAV0Um2ovBc8v3dR9w/edit?gid=78425683#gid=78425683'
gsheet = GoogleSheet(gsheet_key, gsheet_url, verbose=False)
json_out_path = 'soc_catalog.json'

catalog = DataCatalog.create_catalog(catalog_def=gsheet.eu_ocd, years=years, base_path=base_path)
catalog.save_json(json_out_path)

# os.environ['PROJ_LIB'] = '/opt/conda/share/proj'
# /home/opengeohub/.local/bin

Year 2000 not available for layer wv_mcd19a2v061_n_1km_s_YYYY0101_YYYY0131_go_epsg.4326_v20230619, propagating year 2001
Year 2000 not available for layer bare.soil_mcd43a4.fc_m_500m_s_YYYY0101_YYYY1231_eu_epsg.3035_v20240616, propagating year 2001
Year 2000 not available for layer bare.soil_mcd43a4.fc_mx_500m_s_YYYY0101_YYYY1231_eu_epsg.3035_v20240616, propagating year 2001
Year 2000 not available for layer bare.soil_mcd43a4.fc_std_500m_s_YYYY0101_YYYY1231_eu_epsg.3035_v20240616, propagating year 2001
Year 2000 not available for layer non.photosynthetic.veg_mcd43a4.fc_m_500m_s_YYYY0101_YYYY1231_eu_epsg.3035_v20240616, propagating year 2001
Year 2000 not available for layer non.photosynthetic.veg_mcd43a4.fc_mx_500m_s_YYYY0101_YYYY1231_eu_epsg.3035_v20240616, propagating year 2001
Year 2000 not available for layer non.photosynthetic.veg_mcd43a4.fc_std_500m_s_YYYY0101_YYYY1231_eu_epsg.3035_v20240616, propagating year 2001
Year 2000 not available for layer photosynthetic.veg_mcd43a4.fc_m_

In [2]:
# df = df.sample(10)

from shapely.geometry import Point
from geopandas import gpd 
geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])]
df = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")
df

Unnamed: 0,time,lat,lon,hzn_dep,id,ref,nuts0,lc_survey,ocd,geometry
0,2018.0,43.170890,-3.001301,5.0,1001,ParcelasINES,ES,,16.596297,POINT (-3.00130 43.17089)
1,2018.0,43.126201,-3.127953,5.0,1002,ParcelasINES,ES,,16.348176,POINT (-3.12795 43.12620)
2,2018.0,43.125850,-3.062770,5.0,1003,ParcelasINES,ES,,6.556407,POINT (-3.06277 43.12585)
3,2018.0,43.124164,-3.004227,5.0,1004,ParcelasINES,ES,,5.328774,POINT (-3.00423 43.12416)
4,2018.0,43.080842,-3.001301,5.0,1005,ParcelasINES,ES,,10.693605,POINT (-3.00130 43.08084)
...,...,...,...,...,...,...,...,...,...,...
1874458,2010.0,46.481772,10.141516,5.0,04_01_00_2_1640975_1999_2020,GLanCE,,Rock,0.000000,POINT (10.14152 46.48177)
1874458,2012.0,46.481772,10.141516,5.0,04_01_00_2_1640975_1999_2020,GLanCE,,Rock,0.000000,POINT (10.14152 46.48177)
1874458,2014.0,46.481772,10.141516,5.0,04_01_00_2_1640975_1999_2020,GLanCE,,Rock,0.000000,POINT (10.14152 46.48177)
1874458,2016.0,46.481772,10.141516,5.0,04_01_00_2_1640975_1999_2020,GLanCE,,Rock,0.000000,POINT (10.14152 46.48177)


In [4]:
start = time.time()
space_overlay = SpaceTimeOverlay(
        col_date='time',
        points=df, #data_file,
        catalog=catalog,
        # raster_tiles='ard2_final_status.gpkg',
        verbose=True,
        n_threads=n_threads)
        # tile_id_col='TILE')
print(f"Extraction of overlay meta-data: {(time.time() - start):.2f} s")


[14:16:04] Overlay 68 points from 2000 in 547 raster layers
[14:16:05] Scanning blocks of 547 layers
[14:16:05] Finding query pixels for 11117604574f51a6657e1a79c880997c (4 layers)
[14:16:05] Finding query pixels for 19c216e0d0d5c40dda2d85333be38eef (5 layers)
[14:16:06] Finding query pixels for 2fdd9b8bca8ffaf994f8fd4cc1482e00 (1 layers)
[14:16:24] Finding query pixels for 3450b653822b31a34fc31516788af952 (11 layers)
[14:16:24] Finding query pixels for 3bafbaefdf706bd9f2957546820622c9 (10 layers)
[14:16:24] Finding query pixels for 4311588f8ad1b173e54b2c56c2b75794 (15 layers)
[14:16:26] Finding query pixels for 74475bf9755bf3957c165bcb63815022 (2 layers)
[14:16:28] Finding query pixels for 74abec3b41ddc086f3d7abacb8ea5b3e (1 layers)
[14:16:28] Finding query pixels for 7cd181496f429fa03497a8de622e6a45 (6 layers)
[14:16:32] Finding query pixels for 8a2478377a7119f5849f41012d8146ec (139 layers)
[14:16:33] Finding query pixels for 8a9008d12d74c4106e26e9ed22377cb2 (1 layers)
[14:16:33] Fin

In [5]:
start = time.time()
ovelayed_data = space_overlay.run(gdal_opts=GDAL_OPTS, max_ram_mb=max_ram_mb, out_file_name="ovelayed_year.pq")
print(f"Reading overlayed layers: {(time.time() - start):.2f} s")
ovelayed_data

[16:02:21] Running the overlay for 2000
[16:02:21] Loading and sampling 547 raster layers for group 11117604574f51a6657e1a79c880997c
[16:02:22] Loading and sampling 547 raster layers for group 19c216e0d0d5c40dda2d85333be38eef
[16:02:24] Loading and sampling 547 raster layers for group 2fdd9b8bca8ffaf994f8fd4cc1482e00
[16:02:24] Loading and sampling 547 raster layers for group 3450b653822b31a34fc31516788af952
[16:02:26] Loading and sampling 547 raster layers for group 3bafbaefdf706bd9f2957546820622c9
[16:02:27] Loading and sampling 547 raster layers for group 4311588f8ad1b173e54b2c56c2b75794
[16:02:27] Loading and sampling 547 raster layers for group 74475bf9755bf3957c165bcb63815022
[16:02:28] Loading and sampling 547 raster layers for group 74abec3b41ddc086f3d7abacb8ea5b3e
[16:02:28] Loading and sampling 547 raster layers for group 7cd181496f429fa03497a8de622e6a45
[16:02:29] Loading and sampling 547 raster layers for group 8a2478377a7119f5849f41012d8146ec
[16:02:40] Loading and samplin

Unnamed: 0,time,lat,lon,hzn_dep,id,ref,nuts0,lc_survey,ocd,wv_mcd19a2v061_n_1km_s_YYYY0901_YYYY0930_go_epsg.4326_v20230619,...,pos.openess.bareearth_ensemble_m_120m_s_20000101_20221231_eu_epsg.3035_v20240501,lithology.74.lithology_egdi.1m_c_250m_s_20000101_20221231_eu_epsg.3035_v20240530.tif,CHELSA_pr_11_1981-2010_V.2.1,bioclim.var_chelsa.bio7_m_1km_s_19810101_20101231_eu_epsg.3035_v20230822,ai_chelsa_m_1km_s_19810101_20101231_eu_epsg.3035_v20240531,lithology.113.lithology_egdi.1m_c_250m_s_20000101_20221231_eu_epsg.3035_v20240530.tif,rsds.max_chelsa_m_1km_s_19810101_20101231_eu_epsg.3035_v20240531,twi.bareearth_ensemble_m_120m_s_20000101_20221231_eu_epsg.3035_v20240501,lithology.11.lithology_egdi.1m_c_250m_s_20000101_20221231_eu_epsg.3035_v20240530.tif,lithology.66.lithology_egdi.1m_c_250m_s_20000101_20221231_eu_epsg.3035_v20240530.tif
0,2000,60.301940,5.897585,5.0,04_20_00_2_1289073_1999_2018,GLanCE,,Rock,0.000000,14.0,...,133.0,0.0,4801.0,232.0,10.278099,0.0,17987.0,396.0,0.0,0.0
1,2000,58.354949,6.391051,5.0,04_20_00_2_1289074_1999_2018,GLanCE,,Rock,0.000000,6.0,...,153.0,0.0,2799.0,196.0,5.078445,0.0,20239.0,405.0,0.0,0.0
2,2000,61.133326,4.927744,5.0,04_20_00_2_1289075_1999_2018,GLanCE,,Rock,0.000000,19.0,...,142.0,0.0,3328.0,127.0,12.791590,0.0,18995.0,413.0,0.0,0.0
3,2000,60.114546,6.840408,5.0,04_10_00_2_1289076_1999_2018,GLanCE,,Rock,0.000000,18.0,...,152.0,0.0,3688.0,228.0,9.931071,0.0,20761.0,581.0,0.0,0.0
4,2000,67.609693,15.974328,5.0,04_10_00_2_1289077_1999_2018,GLanCE,,Rock,0.000000,22.0,...,124.0,0.0,1556.0,274.0,4.791328,0.0,18082.0,376.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47281,2020,28.338085,-14.081805,20.0,906,ParcelasCOS,ES,,2.020088,42.0,...,146.0,0.0,219.0,144.0,0.155697,0.0,26659.0,-32768.0,0.0,0.0
47282,2020,28.069033,-14.389319,20.0,907,ParcelasCOS,ES,,2.859832,31.0,...,149.0,0.0,208.0,76.0,0.278570,0.0,26407.0,-32768.0,0.0,0.0
47283,2020,28.068781,-15.610405,20.0,908,ParcelasCOS,ES,,77.543449,27.0,...,150.0,0.0,606.0,148.0,0.469543,0.0,27192.0,-32768.0,0.0,0.0
47284,2020,28.061905,-16.526363,20.0,909,ParcelasCOS,ES,,4.913090,23.0,...,154.0,0.0,483.0,89.0,0.411882,0.0,27550.0,-32768.0,0.0,0.0


In [6]:
# ovelayed_data.to_parquet('test.pq')
ovelayed_data.to_parquet(f'{folder}/001_data_overlayed.pq')