## Tech preamble

In [1]:
import os
import pycurl
import intake
from urllib.parse import urlparse
from pathlib import Path

## Chose location to store the data

In [2]:
os.environ["ESM_VFC_DATA_DIR"] = str(Path("../esm_vfc_data_store/").resolve())

## Use curl to retrieve the data

We assume that there's a list of `data_urls` given in the meta datasection of each entry.

In [3]:
def fetch_data(catalog_entry, force_download=False):

    # set output directory and ensure it exists
    output_dir = Path(os.environ["ESM_VFC_DATA_DIR"]) / catalog_entry.cat.name
    output_dir.mkdir(parents=True, exist_ok=True)
        
    # for all urls, get data 
    for url in catalog_entry.metadata["data_urls"]:

        file_name = Path(urlparse(url).path).name
        output_file = output_dir / file_name

        if output_file.exists() and not force_download:
            print(f"No need to download {output_file}")
        else:
            print(f"downloading {output_file} ... ", end="")
            with open(output_file, 'wb') as f:
                c = pycurl.Curl()
                c.setopt(c.URL, url)
                c.setopt(c.WRITEDATA, f)
                c.perform()
                c.close()
            print("done")

## Open the catalog and fetch the data

In [4]:
cat = intake.open_catalog("../catalogs/NEMO_GYRE_Test.yaml")

In [5]:
for cat_entry in cat:
    fetch_data(cat[cat_entry], force_download=True)

downloading /Users/wrath/src/github.com/ESM-VFC/esm-vfc-catalogs/esm_vfc_data_store/NEMO_GYRE_Test/GYRE_5d_00010101_00011230_grid_T.nc ... done
downloading /Users/wrath/src/github.com/ESM-VFC/esm-vfc-catalogs/esm_vfc_data_store/NEMO_GYRE_Test/GYRE_5d_00010101_00011230_grid_U.nc ... done
downloading /Users/wrath/src/github.com/ESM-VFC/esm-vfc-catalogs/esm_vfc_data_store/NEMO_GYRE_Test/GYRE_5d_00010101_00011230_grid_V.nc ... done
downloading /Users/wrath/src/github.com/ESM-VFC/esm-vfc-catalogs/esm_vfc_data_store/NEMO_GYRE_Test/GYRE_5d_00010101_00011230_grid_W.nc ... done
downloading /Users/wrath/src/github.com/ESM-VFC/esm-vfc-catalogs/esm_vfc_data_store/NEMO_GYRE_Test/mesh_mask.nc ... done


## Open the datasets

In [6]:
datasets = {k: v.to_dask() for k, v in cat.items()}

In [7]:
datasets

{'NEMO_GYRE_Test_grid_T': <xarray.Dataset>
 Dimensions:            (deptht: 31, tbnds: 2, time_counter: 72, x: 32, y: 22)
 Coordinates:
   * deptht             (deptht) float32 4.9752655 15.096427 ... 4451.2593
   * time_counter       (time_counter) float64 2.16e+05 6.48e+05 ... 3.089e+07
 Dimensions without coordinates: tbnds, x, y
 Data variables:
     nav_lon            (y, x) float32 dask.array<chunksize=(22, 32), meta=np.ndarray>
     nav_lat            (y, x) float32 dask.array<chunksize=(22, 32), meta=np.ndarray>
     time_counter_bnds  (time_counter, tbnds) float64 dask.array<chunksize=(72, 2), meta=np.ndarray>
     votemper           (time_counter, deptht, y, x) float32 dask.array<chunksize=(72, 31, 22, 32), meta=np.ndarray>
     vosaline           (time_counter, deptht, y, x) float32 dask.array<chunksize=(72, 31, 22, 32), meta=np.ndarray>
     sosstsst           (time_counter, y, x) float32 dask.array<chunksize=(72, 22, 32), meta=np.ndarray>
     sosaline           (time_coun