In [None]:
!pip install hda -U
!pip install xarray

In [None]:
wekeo_dir = 'sample_data/wekeo_dir'

In [None]:
import pandas as pd

pd.set_option('display.max_rows', 20)

In [None]:
import os

user = os.getenv("user")
password = os.getenv("password")

In [None]:
from hda import Client, Configuration

conf = Configuration(user = user, password = password)
hda_client = Client(config = conf)

In [None]:
query = {
  "dataset_id": "EO:CLMS:DAT:CLMS_GLOBAL_NDVI_1KM_V2_10DAILY_NETCDF",
  "productType": "NDVI",
  "productionStatus": "ARCHIVED",
  "acquisitionType": "NOMINAL",
  "platform": "PROBA-V",
  "processingCenter": "VITO",
  "resolution": "1000",
  "bbox": [
    -10,
    30,
    180,
    90
  ],
  "startdate": "2017-12-31T04:00:00.000Z",
  "enddate": "2024-09-01T06:00:00.000Z",
  "itemsPerPage": 200,
  "startIndex": 0
}

In [None]:
matches = hda_client.search(query)
matches.download(download_dir=wekeo_dir)

In [None]:
import xarray as xr

ds = xr.open_mfdataset(f"./{wekeo_dir}/*.nc")

In [None]:
coarsen_ds = ds.coarsen(lat=56, lon=70, boundary='pad').mean()
coarsen_ds['year_month'] = coarsen_ds['time'].dt.year * 100 + coarsen_ds['time'].dt.month

coarsen_ds['OLD_NDVI'] = coarsen_ds['NDVI'].shift(time=1).fillna(coarsen_ds['NDVI'])
coarsen_ds['RNDVI'] = coarsen_ds['NDVI'].shift(lon=-1).fillna(coarsen_ds['NDVI'])
coarsen_ds['LNDVI'] = coarsen_ds['NDVI'].shift(lon=1).fillna(coarsen_ds['NDVI'])
coarsen_ds['TNDVI'] = coarsen_ds['NDVI'].shift(lat=-1).fillna(coarsen_ds['NDVI'])
coarsen_ds['BNDVI'] = coarsen_ds['NDVI'].shift(lat=1).fillna(coarsen_ds['NDVI'])
grouped_ds = coarsen_ds.groupby('year_month').mean()

In [None]:
grouped_ds.to_netcdf("ndvi_train.nc4", engine="h5netcdf", invalid_netcdf=True)