# **This notebook aims to load data from [cmems data store](https://data.marine.copernicus.eu/products) and adapt it to the pangeo-fish format**

## **1. Copernicus actual data format, from the following [notebook](https://github.com/IAOCEA/pangeo-fish/blob/main/notebooks/pangeo-fish.ipynb)**

In [1]:
# Import necessary libraries and modules.
import xarray as xr
from pint_xarray import unit_registry as ureg
from pangeo_fish.io import open_tag
import hvplot.xarray

In [3]:
# catalog_url specifies the URL for the catalog for reference data used.
catalog_url = "https://data-taos.ifremer.fr/kerchunk/ref-copernicus.yaml"

# bbox, bounding box, defines the latitude and longitude range for the analysis area.


# relative_depth_threshold defines the acceptable fish depth relative to the maximum tag depth.
# It determines whether the fish can be considered to be in a certain location based on depth.
relative_depth_threshold = 0.8

In [4]:
# Import necessary libraries
import intake
from pangeo_fish.io import open_copernicus_catalog

# Open and clean reference model
cat = intake.open_catalog(catalog_url)
# true_model = open_copernicus_catalog(cat)

In [5]:
chunks=None

In [6]:
true_model = (
    cat.data(type="TEM", chunks=None)
    .to_dask()
    .rename({"thetao": "TEMP"})
    .get(["TEMP"])
    .assign_coords({"time": lambda ds: ds["time"].astype("datetime64[ns]")})).assign(
            {
                "XE": cat.data(type="SSH", chunks=chunks).to_dask().get("zos"),
                "H0": (
                    cat.data_tmp(type="mdt", chunks=chunks)
                    .to_dask()
                    .get("deptho")
                    .rename({"latitude": "lat", "longitude": "lon"})
                ),
                "mask": (
                    cat.data_tmp(type="mdt", chunks=chunks)
                    .to_dask()
                    .get("mask")
                    .rename({"latitude": "lat", "longitude": "lon"})
                ),
            }
        )

  'dims': dict(self._ds.dims),
  'dims': dict(self._ds.dims),


In [7]:
true_model

## **2. Loading data from this [cmems dataset](https://data.marine.copernicus.eu/product/IBI_MULTIYEAR_PHY_005_002/description)**

In [2]:
import copernicusmarine

In [3]:
name = "cmems_mod_ibi_phy_my_0.083deg"

catalogue = copernicusmarine.describe(
include_datasets=True,
contains = [name],
)

ERROR - 2024-04-11T09:14:25Z - Client version 1.0.10 is not compatible with current backend service. Please update to the latest client version.
ERROR - 2024-04-11T09:14:25Z - Client version 1.0.10 is not compatible with current backend service. Please update to the latest client version.


In [10]:
for value in catalogue['products'][0]['datasets']:
    print(value["dataset_id"])

cmems_mod_ibi_phy_my_0.083deg-2D_PT1H-m
cmems_mod_ibi_phy_my_0.083deg-3D-climatology_P1M-m
cmems_mod_ibi_phy_my_0.083deg-3D_P1D-m
cmems_mod_ibi_phy_my_0.083deg-3D_P1M-m
cmems_mod_ibi_phy_my_0.083deg-3D_P1Y-m
cmems_mod_ibi_phy_my_0.083deg-3D_static


In [11]:
all_uri_dict = {
    "cmems_mod_ibi_phy_my_0.083deg-2D_PT1H-m": {
        "arco-geo-series": "https://s3.waw3-1.cloudferro.com/mdl-arco-time-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-2D_PT1H-m_202012/timeChunked.zarr",
        "arco-time-series": "https://s3.waw3-1.cloudferro.com/mdl-arco-geo-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-2D_PT1H-m_202012/geoChunked.zarr"
    },
    "cmems_mod_ibi_phy_my_0.083deg-3D-climatology_P1M-m": {
        "arco-geo-series": "https://s3.waw3-1.cloudferro.com/mdl-arco-time-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D-climatology_P1M-m_202211/timeChunked.zarr",
        "arco-time-series": "https://s3.waw3-1.cloudferro.com/mdl-arco-geo-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D-climatology_P1M-m_202211/geoChunked.zarr"
    },
    "cmems_mod_ibi_phy_my_0.083deg-3D_P1D-m": {
        "arco-geo-series": "https://s3.waw3-1.cloudferro.com/mdl-arco-time-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1D-m_202012/timeChunked.zarr",
        "arco-time-series": "https://s3.waw3-1.cloudferro.com/mdl-arco-geo-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1D-m_202012/geoChunked.zarr"
    },
    "cmems_mod_ibi_phy_my_0.083deg-3D_P1M-m": {
        "arco-geo-series": "https://s3.waw3-1.cloudferro.com/mdl-arco-time-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1M-m_202012/timeChunked.zarr",
        "arco-time-series": "https://s3.waw3-1.cloudferro.com/mdl-arco-geo-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1M-m_202012/geoChunked.zarr"
    },
    "cmems_mod_ibi_phy_my_0.083deg-3D_P1Y-m": {
        "arco-geo-series": "https://s3.waw3-1.cloudferro.com/mdl-arco-time-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1Y-m_202211/timeChunked.zarr",
        "arco-time-series": "https://s3.waw3-1.cloudferro.com/mdl-arco-geo-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1Y-m_202211/geoChunked.zarr"
    },
    "cmems_mod_ibi_phy_my_0.083deg-3D_static": {
        "static-arco": "https://s3.waw3-1.cloudferro.com:443/mdl-arco-time-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_static_202012--ext--bathy/static.zarr"
    }
}


In [12]:
for cat in all_uri_dict:
    print(cat,'\n')
    for service in all_uri_dict[cat].keys():
        if not service == 'original-files':
            print(service,"\n",all_uri_dict[cat][service])
            print("\n")
            ds = xr.open_dataset(all_uri_dict[cat][service],engine="zarr")
            display(ds.data_vars)
            print('\n')

cmems_mod_ibi_phy_my_0.083deg-2D_PT1H-m 

arco-geo-series 
 https://s3.waw3-1.cloudferro.com/mdl-arco-time-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-2D_PT1H-m_202012/timeChunked.zarr




Data variables:
    mlotst   (time, latitude, longitude) float64 212GB ...
    thetao   (time, latitude, longitude) float64 212GB ...
    ubar     (time, latitude, longitude) float64 212GB ...
    uo       (time, latitude, longitude) float64 212GB ...
    vbar     (time, latitude, longitude) float64 212GB ...
    vo       (time, latitude, longitude) float64 212GB ...
    zos      (time, latitude, longitude) float64 212GB ...



arco-time-series 
 https://s3.waw3-1.cloudferro.com/mdl-arco-geo-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-2D_PT1H-m_202012/geoChunked.zarr




Data variables:
    mlotst   (time, latitude, longitude) float64 212GB ...
    thetao   (time, latitude, longitude) float64 212GB ...
    ubar     (time, latitude, longitude) float64 212GB ...
    uo       (time, latitude, longitude) float64 212GB ...
    vbar     (time, latitude, longitude) float64 212GB ...
    vo       (time, latitude, longitude) float64 212GB ...
    zos      (time, latitude, longitude) float64 212GB ...



cmems_mod_ibi_phy_my_0.083deg-3D-climatology_P1M-m 

arco-geo-series 
 https://s3.waw3-1.cloudferro.com/mdl-arco-time-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D-climatology_P1M-m_202211/timeChunked.zarr




Data variables:
    bottomT_mean                (time, latitude, longitude) float32 5MB ...
    bottomT_standard_deviation  (time, latitude, longitude) float32 5MB ...
    mlotst_mean                 (time, latitude, longitude) float32 5MB ...
    mlotst_standard_deviation   (time, latitude, longitude) float32 5MB ...
    so_mean                     (time, elevation, latitude, longitude) float32 250MB ...
    so_standard_deviation       (time, elevation, latitude, longitude) float32 250MB ...
    thetao_mean                 (time, elevation, latitude, longitude) float32 250MB ...
    thetao_standard_deviation   (time, elevation, latitude, longitude) float32 250MB ...
    uo_mean                     (time, elevation, latitude, longitude) float32 250MB ...
    uo_standard_deviation       (time, elevation, latitude, longitude) float32 250MB ...
    vo_mean                     (time, elevation, latitude, longitude) float32 250MB ...
    vo_standard_deviation       (time, elevation, latitud



arco-time-series 
 https://s3.waw3-1.cloudferro.com/mdl-arco-geo-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D-climatology_P1M-m_202211/geoChunked.zarr




Data variables:
    bottomT_mean                (time, latitude, longitude) float32 5MB ...
    bottomT_standard_deviation  (time, latitude, longitude) float32 5MB ...
    mlotst_mean                 (time, latitude, longitude) float32 5MB ...
    mlotst_standard_deviation   (time, latitude, longitude) float32 5MB ...
    so_mean                     (time, elevation, latitude, longitude) float32 250MB ...
    so_standard_deviation       (time, elevation, latitude, longitude) float32 250MB ...
    thetao_mean                 (time, elevation, latitude, longitude) float32 250MB ...
    thetao_standard_deviation   (time, elevation, latitude, longitude) float32 250MB ...
    uo_mean                     (time, elevation, latitude, longitude) float32 250MB ...
    uo_standard_deviation       (time, elevation, latitude, longitude) float32 250MB ...
    vo_mean                     (time, elevation, latitude, longitude) float32 250MB ...
    vo_standard_deviation       (time, elevation, latitud



cmems_mod_ibi_phy_my_0.083deg-3D_P1D-m 

arco-geo-series 
 https://s3.waw3-1.cloudferro.com/mdl-arco-time-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1D-m_202012/timeChunked.zarr




Data variables:
    bottomT  (time, latitude, longitude) float64 9GB ...
    mlotst   (time, latitude, longitude) float64 9GB ...
    so       (time, elevation, latitude, longitude) float64 442GB ...
    thetao   (time, elevation, latitude, longitude) float64 442GB ...
    uo       (time, elevation, latitude, longitude) float64 442GB ...
    vo       (time, elevation, latitude, longitude) float64 442GB ...
    zos      (time, latitude, longitude) float64 9GB ...



arco-time-series 
 https://s3.waw3-1.cloudferro.com/mdl-arco-geo-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1D-m_202012/geoChunked.zarr




Data variables:
    bottomT  (time, latitude, longitude) float64 9GB ...
    mlotst   (time, latitude, longitude) float64 9GB ...
    so       (time, elevation, latitude, longitude) float64 442GB ...
    thetao   (time, elevation, latitude, longitude) float64 442GB ...
    uo       (time, elevation, latitude, longitude) float64 442GB ...
    vo       (time, elevation, latitude, longitude) float64 442GB ...
    zos      (time, latitude, longitude) float64 9GB ...



cmems_mod_ibi_phy_my_0.083deg-3D_P1M-m 

arco-geo-series 
 https://s3.waw3-1.cloudferro.com/mdl-arco-time-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1M-m_202012/timeChunked.zarr




Data variables:
    bottomT  (time, latitude, longitude) float64 290MB ...
    mlotst   (time, latitude, longitude) float64 290MB ...
    so       (time, elevation, latitude, longitude) float64 15GB ...
    thetao   (time, elevation, latitude, longitude) float64 15GB ...
    uo       (time, elevation, latitude, longitude) float64 15GB ...
    vo       (time, elevation, latitude, longitude) float64 15GB ...
    zos      (time, latitude, longitude) float64 290MB ...



arco-time-series 
 https://s3.waw3-1.cloudferro.com/mdl-arco-geo-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1M-m_202012/geoChunked.zarr




Data variables:
    bottomT  (time, latitude, longitude) float64 290MB ...
    mlotst   (time, latitude, longitude) float64 290MB ...
    so       (time, elevation, latitude, longitude) float64 15GB ...
    thetao   (time, elevation, latitude, longitude) float64 15GB ...
    uo       (time, elevation, latitude, longitude) float64 15GB ...
    vo       (time, elevation, latitude, longitude) float64 15GB ...
    zos      (time, latitude, longitude) float64 290MB ...



cmems_mod_ibi_phy_my_0.083deg-3D_P1Y-m 

arco-geo-series 
 https://s3.waw3-1.cloudferro.com/mdl-arco-time-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1Y-m_202211/timeChunked.zarr




Data variables:
    bottomT  (time, latitude, longitude) float64 24MB ...
    mlotst   (time, latitude, longitude) float64 24MB ...
    so       (time, elevation, latitude, longitude) float64 1GB ...
    thetao   (time, elevation, latitude, longitude) float64 1GB ...
    uo       (time, elevation, latitude, longitude) float64 1GB ...
    vo       (time, elevation, latitude, longitude) float64 1GB ...
    zos      (time, latitude, longitude) float64 24MB ...



arco-time-series 
 https://s3.waw3-1.cloudferro.com/mdl-arco-geo-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1Y-m_202211/geoChunked.zarr




Data variables:
    bottomT  (time, latitude, longitude) float64 24MB ...
    mlotst   (time, latitude, longitude) float64 24MB ...
    so       (time, elevation, latitude, longitude) float64 1GB ...
    thetao   (time, elevation, latitude, longitude) float64 1GB ...
    uo       (time, elevation, latitude, longitude) float64 1GB ...
    vo       (time, elevation, latitude, longitude) float64 1GB ...
    zos      (time, latitude, longitude) float64 24MB ...



cmems_mod_ibi_phy_my_0.083deg-3D_static 

static-arco 
 https://s3.waw3-1.cloudferro.com:443/mdl-arco-time-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_static_202012--ext--bathy/static.zarr




Data variables:
    deptho       (latitude, longitude) float32 417kB ...
    deptho_lev   (latitude, longitude) float32 417kB ...
    mask_thetao  (elevation, latitude, longitude) int8 5MB ...
    mask_uo      (elevation, latitude, longitude) int8 5MB ...
    mask_vo      (elevation, latitude, longitude) int8 5MB ...





In [13]:
from pprint import pprint
import xarray as xr
import copernicusmarine as copernicusmarine

In [14]:
uris_by_key = {}  # Dictionary to store URIs by key

def get_copernicus_data(name,format='arco-geo-series' ):
    #format='arco-geo-series' or 'arco-time-series'
    catalogue = copernicusmarine.describe(
    include_datasets=True,
    contains = [name],
    )
    for value in catalogue['products'][0]['datasets']:
        dataset_id = value['dataset_id']
        if any(substring in dataset_id for substring in ['static', '2D_PT1H-m', '3D_PT1H-m','-3D_P1D-m']):
            uris = []
            for service in value['versions'][0]['parts'][0]['services']:
                service_name = service['service_type']['service_name']
                if service_name in [format,"arco-time-series","static-arco"]:
                    uris.append(service['uri'])
            uris_by_key[dataset_id] = uris
            
    
#    bbox = {"latitude": [48, 49], "longitude": [-6, -4]} 
    #.sel(latitude=slice(*bbox["latitude"]), longitude=slice(*bbox["longitude"]))
    print("thetao", uris_by_key[name+'-3D_P1D-m'][0])
    thetao=(
        xr.open_dataset(uris_by_key[name+'-3D_P1D-m'][0],
        engine='zarr')[['thetao']]
    #.sel(latitude=slice(*bbox["latitude"]), longitude=slice(*bbox["longitude"]))
    )
    print("zos", uris_by_key[name+'-3D_P1D-m'][0])
    zos=(
        xr.open_dataset(uris_by_key[name+'-3D_P1D-m'][0],
        engine='zarr').zos
         #.sel(latitude=slice(*bbox["latitude"]), longitude=slice(*bbox["longitude"]))
        )
    print("deptho", uris_by_key[name+'-3D_static'][0])
    deptho=(
        xr.open_dataset(uris_by_key[name+'-3D_static'][0]
        ,engine="zarr").deptho
        #.sel(latitude=slice(*bbox["latitude"]), longitude=slice(*bbox["longitude"]))
        )

    deptho["latitude"] = thetao["latitude"]
    mask=deptho.isnull()
    print('zos min,max value',zos.shape,zos.latitude[0:2].data,zos.latitude[-3:-1].data)
    print('deptho min,max value',deptho.shape,deptho.latitude[0:2].data,deptho.latitude[-3:-1].data)
    print('thetao min,max value',thetao.thetao.shape,thetao.latitude[0:2].data,thetao.latitude[-3:-1].data)

    

    ds=(
        thetao.rename({"thetao": "TEMP"})
        .assign(
                {
                    "XE": zos,
                    "H0": deptho,
                    "mask": mask,
                }
        )).rename({"latitude": "lat", "longitude": "lon", "elevation": "depth"})
    return ds
    

In [15]:
model = get_copernicus_data(name='cmems_mod_ibi_phy_my_0.083deg')

ERROR - 2024-04-10T15:42:17Z - Client version 1.0.10 is not compatible with current backend service. Please update to the latest client version.
ERROR - 2024-04-10T15:42:17Z - Client version 1.0.10 is not compatible with current backend service. Please update to the latest client version.
thetao https://s3.waw3-1.cloudferro.com/mdl-arco-time-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1D-m_202012/timeChunked.zarr
zos https://s3.waw3-1.cloudferro.com/mdl-arco-time-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1D-m_202012/timeChunked.zarr
deptho https://s3.waw3-1.cloudferro.com:443/mdl-arco-time-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_static_202012--ext--bathy/static.zarr
zos min,max value (10589, 361, 289) [26.       26.083334] [55.833332 55.916664]
deptho min,max value (361, 289) [26.       26.083334] [55.833332 55.916664]
thetao min,max value (10589, 50, 361, 289) [26.       26.083334] [55.833332 55.916664]


In [16]:
model

In [4]:
thetao = xr.open_dataset("https://s3.waw3-1.cloudferro.com/mdl-arco-time-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1D-m_202012/timeChunked.zarr",engine="zarr")[["thetao"]]

In [5]:
thetao

In [18]:
zos = xr.open_dataset("https://s3.waw3-1.cloudferro.com/mdl-arco-time-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_P1D-m_202012/timeChunked.zarr",engine="zarr").zos

In [12]:
deptho = xr.open_dataset("https://s3.waw3-1.cloudferro.com:443/mdl-arco-time-032/arco/IBI_MULTIYEAR_PHY_005_002/cmems_mod_ibi_phy_my_0.083deg-3D_static_202012--ext--bathy/static.zarr",engine="zarr")

In [14]:
deptho.latitude

In [20]:
thetao.thetao

In [34]:
deptho.latitude [-1] - thetao.latitude[-1] 

### **Plotting the model data**

In [21]:
model.TEMP.isel(time=0,depth=-1).hvplot(x='lon',y='lat')

In [22]:
model.H0.hvplot()

In [23]:
model.XE.isel(time=0).hvplot()

# **3. Adding the dynamic depth and bathymetry**
This section is under developpement

In [24]:
from pangeo_fish.io import broadcast_variables
model["depth"] = abs(model["depth"]).sortby(variables="depth")
model = model.assign(
        {
            "dynamic_depth": lambda ds: (ds["depth"] + ds["XE"]).assign_attrs(
                {"units": "m", "positive": "down"}
            ),
            "dynamic_bathymetry": lambda ds: (ds["H0"] + ds["XE"]).assign_attrs(
                {"units": "m", "positive": "down"}
            ),
        }
    ).pipe(broadcast_variables, {"lat": "latitude", "lon": "longitude"})

MemoryError: Unable to allocate 412. GiB for an array with shape (50, 10589, 361, 289) and data type float64