In [1]:
import requests
import sys
from os.path import join, expanduser
from pathlib import Path

import boto3
import fsspec
import numpy as np
import s3fs
import xarray as xr
import zarr
from dask.distributed import Client

user_home_dir = expanduser('~')
sys.path.insert(0,join(user_home_dir,'ECCOv4-py'))
import ecco_v4_py as ecco

In [2]:
client = Client("tcp://127.0.0.1:42659")
client

0,1
Connection method: Direct,
Dashboard: http://127.0.0.1:8787/status,

0,1
Comm: tcp://127.0.0.1:42659,Workers: 4
Dashboard: http://127.0.0.1:8787/status,Total threads: 8
Started: 58 minutes ago,Total memory: 61.46 GiB

0,1
Comm: tcp://127.0.0.1:38015,Total threads: 2
Dashboard: http://127.0.0.1:32955/status,Memory: 15.37 GiB
Nanny: tcp://127.0.0.1:38763,
Local directory: /tmp/dask-scratch-space/worker-40e5wf6a,Local directory: /tmp/dask-scratch-space/worker-40e5wf6a
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 383.55 MiB,Spilled bytes: 0 B
Read bytes: 23.11 kiB,Write bytes: 15.51 kiB

0,1
Comm: tcp://127.0.0.1:33451,Total threads: 2
Dashboard: http://127.0.0.1:38079/status,Memory: 15.37 GiB
Nanny: tcp://127.0.0.1:41399,
Local directory: /tmp/dask-scratch-space/worker-cg0rfb70,Local directory: /tmp/dask-scratch-space/worker-cg0rfb70
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 380.64 MiB,Spilled bytes: 0 B
Read bytes: 21.13 kiB,Write bytes: 13.52 kiB

0,1
Comm: tcp://127.0.0.1:42521,Total threads: 2
Dashboard: http://127.0.0.1:34073/status,Memory: 15.37 GiB
Nanny: tcp://127.0.0.1:45567,
Local directory: /tmp/dask-scratch-space/worker-oo9imz6z,Local directory: /tmp/dask-scratch-space/worker-oo9imz6z
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 381.08 MiB,Spilled bytes: 0 B
Read bytes: 22.75 kiB,Write bytes: 15.15 kiB

0,1
Comm: tcp://127.0.0.1:40555,Total threads: 2
Dashboard: http://127.0.0.1:34921/status,Memory: 15.37 GiB
Nanny: tcp://127.0.0.1:34419,
Local directory: /tmp/dask-scratch-space/worker-fn6dhh_5,Local directory: /tmp/dask-scratch-space/worker-fn6dhh_5
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 380.10 MiB,Spilled bytes: 0 B
Read bytes: 24.38 kiB,Write bytes: 16.78 kiB


In [3]:
# Use this for the netcdf files stored on an s3 bucket
def get_credentials(use_earthdata: bool = False):
    """
    This routine automatically pulls your EDL crediential from .netrc file and use it to obtain an AWS S3 credential 
    through a PO.DAAC service accessible at https://archive.podaac.earthdata.nasa.gov/s3credentials.
    From the PO.DAAC Github (https://podaac.github.io/tutorials/external/July_2022_Earthdata_Webinar.html).
    
    Returns:
    =======
    
    credentials: a dictionary with AWS secret_key, access_key, and token
    """
    # NASA EarthData hosts ECCO V4r4 fileds
    if not use_earthdata:
        session = boto3.Session()
        credentials_b3 = session.get_credentials()        
        credentials = dict()
        credentials['secretAccessKey'] = credentials_b3.secret_key
        credentials['accessKeyId'] = credentials_b3.access_key
        credentials['sessionToken'] = credentials_b3.token

    # A 'public' AWS s3 bucket hosts V4r5 fields (they will eventually move to PO.DAAC)
    else:
        credentials = requests.get('https://archive.podaac.earthdata.nasa.gov/s3credentials').json()
    
    return credentials
    

def init_S3FileSystem(use_earthdata: bool = False, requester_pays: bool = True):
    """
    This routine automatically creates an 's3 file system' object and credentials dictionary.
    The s3 file system needs to be initialized with the special aws credentials.
    
    Returns:
    =======
    
    s3: an AWS S3 filesystem, 
    credentials: a dictionary with AWS secret_key, access_key, and token

    """
    credentials = get_credentials(use_earthdata=use_earthdata)

    if use_earthdata:
        requester_pays = False
        
    s3 = s3fs.S3FileSystem(
        requester_pays=requester_pays,
        anon=False,
        key=credentials['accessKeyId'],
        secret=credentials['secretAccessKey'], 
        token=credentials['sessionToken']
    )
    
    return s3, credentials


def list_s3_fsspec_ecco(mzz_dir: Path) -> None:
    print(np.sort(list(mzz_dir.glob("*.json"))))


def get_s3_fsspec_ecco(data_file: str, mzz_dir: Path, **kwargs) -> xr.Dataset:
    fs = fsspec.filesystem(
        "reference",     
        fo=str(mzz_dir / data_file),
        remote_protocol="s3",
        remote_options={"anon":False, "requester_pays":True},
        skip_instance_cache=True
    )
    fs.asynchronous = True
    store = zarr.storage.FsspecStore(fs)
    ds = xr.open_dataset(
        store,
        engine='zarr',
        consolidated=False,
        **kwargs,
    )
    return ds


def llc_to_latlon(
    xc,
    yc,
    field,
    min_lat: float = -90,
    max_lat: float = 90,
    dlat: float = 1,
    min_lon: float = -180,
    max_lon: float = 180,
    dlon: float = 1,
    method: str | None = None,
    radius: float | None = None,
):
    if method is None:
        method = "nearest_neighbor"
    if radius is None:
        radius = 120000
    lon_c, lat_c, _, _, field_latlon = ecco.resample_to_latlon(
        xc,
        yc, 
        field,
        min_lat,
        max_lat,
        dlat,
        min_lon,
        max_lon,
        dlon,
        fill_value = np.nan,
        mapping_method = method,
        radius_of_influence = radius,
    )
    if "k" in field.dims:
        dims = ["time", "lat", "lon", "depth"]
        coords = dict(
            time=field.time,
            lat=lat_c[:, 0],
            lon=lon_c[0],
            depth=field["Z"],
        )
    else:
        dims = ["time", "lat", "lon"]
        coords = dict(
            time=field.time,
            lat=lat_c[:, 0],
            lon=lon_c[0],
        )
    ds_latlon = xr.DataArray(
        data=field_latlon,
        dims=dims,
        coords=coords,
    )
    ds_latlon.attrs = field.attrs
    return ds_latlon

In [4]:
s3, credentials = init_S3FileSystem(use_earthdata=False, requester_pays=True)
mzz_dir = Path("/efs_ecco/mzz-jsons-V4r5/MZZ_day_mean_native")

In [5]:
ds_precip = get_s3_fsspec_ecco(
    "OCEAN_AND_ICE_SURFACE_FW_FLUX_day_mean_native_llc090_ECCOV4r5.json",
    mzz_dir,
    chunks={"time": 1, "tile": 13, "i": 90, "j": 90, "k": 50}
).sel(tile=4).isel(time=slice(100))
ds_ts = get_s3_fsspec_ecco(
    "OCEAN_TEMPERATURE_SALINITY_day_mean_native_llc090_ECCOV4r5.json",
    mzz_dir,
    chunks={"time": 1, "tile": 13, "i": 90, "j": 90, "k": 50}
).sel(tile=4).isel(k=0).isel(time=slice(100))

In [6]:
ds_ts

Unnamed: 0,Array,Chunk
Bytes,31.64 kiB,31.64 kiB
Shape,"(90, 90)","(90, 90)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 31.64 kiB 31.64 kiB Shape (90, 90) (90, 90) Dask graph 1 chunks in 3 graph layers Data type float32 numpy.ndarray",90  90,

Unnamed: 0,Array,Chunk
Bytes,31.64 kiB,31.64 kiB
Shape,"(90, 90)","(90, 90)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,126.56 kiB,126.56 kiB
Shape,"(90, 90, 4)","(90, 90, 4)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 126.56 kiB 126.56 kiB Shape (90, 90, 4) (90, 90, 4) Dask graph 1 chunks in 3 graph layers Data type float32 numpy.ndarray",4  90  90,

Unnamed: 0,Array,Chunk
Bytes,126.56 kiB,126.56 kiB
Shape,"(90, 90, 4)","(90, 90, 4)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,31.64 kiB,31.64 kiB
Shape,"(90, 90)","(90, 90)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 31.64 kiB 31.64 kiB Shape (90, 90) (90, 90) Dask graph 1 chunks in 3 graph layers Data type float32 numpy.ndarray",90  90,

Unnamed: 0,Array,Chunk
Bytes,31.64 kiB,31.64 kiB
Shape,"(90, 90)","(90, 90)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,31.64 kiB,31.64 kiB
Shape,"(90, 90)","(90, 90)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 31.64 kiB 31.64 kiB Shape (90, 90) (90, 90) Dask graph 1 chunks in 3 graph layers Data type float32 numpy.ndarray",90  90,

Unnamed: 0,Array,Chunk
Bytes,31.64 kiB,31.64 kiB
Shape,"(90, 90)","(90, 90)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,126.56 kiB,126.56 kiB
Shape,"(90, 90, 4)","(90, 90, 4)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 126.56 kiB 126.56 kiB Shape (90, 90, 4) (90, 90, 4) Dask graph 1 chunks in 3 graph layers Data type float32 numpy.ndarray",4  90  90,

Unnamed: 0,Array,Chunk
Bytes,126.56 kiB,126.56 kiB
Shape,"(90, 90, 4)","(90, 90, 4)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,31.64 kiB,31.64 kiB
Shape,"(90, 90)","(90, 90)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 31.64 kiB 31.64 kiB Shape (90, 90) (90, 90) Dask graph 1 chunks in 3 graph layers Data type float32 numpy.ndarray",90  90,

Unnamed: 0,Array,Chunk
Bytes,31.64 kiB,31.64 kiB
Shape,"(90, 90)","(90, 90)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4 B,4 B
Shape,(),()
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
Array Chunk Bytes 4 B 4 B Shape () () Dask graph 1 chunks in 3 graph layers Data type float32 numpy.ndarray,,

Unnamed: 0,Array,Chunk
Bytes,4 B,4 B
Shape,(),()
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,"(2,)","(2,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 8 B 8 B Shape (2,) (2,) Dask graph 1 chunks in 3 graph layers Data type float32 numpy.ndarray",2  1,

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,"(2,)","(2,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,200 B,200 B
Shape,"(50,)","(50,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 200 B 200 B Shape (50,) (50,) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",50  1,

Unnamed: 0,Array,Chunk
Bytes,200 B,200 B
Shape,"(50,)","(50,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,204 B,204 B
Shape,"(51,)","(51,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 204 B 204 B Shape (51,) (51,) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",51  1,

Unnamed: 0,Array,Chunk
Bytes,204 B,204 B
Shape,"(51,)","(51,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,200 B,200 B
Shape,"(50,)","(50,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 200 B 200 B Shape (50,) (50,) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",50  1,

Unnamed: 0,Array,Chunk
Bytes,200 B,200 B
Shape,"(50,)","(50,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.56 kiB,16 B
Shape,"(100, 2)","(1, 2)"
Dask graph,100 chunks in 3 graph layers,100 chunks in 3 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 1.56 kiB 16 B Shape (100, 2) (1, 2) Dask graph 100 chunks in 3 graph layers Data type datetime64[ns] numpy.ndarray",2  100,

Unnamed: 0,Array,Chunk
Bytes,1.56 kiB,16 B
Shape,"(100, 2)","(1, 2)"
Dask graph,100 chunks in 3 graph layers,100 chunks in 3 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.09 MiB,31.64 kiB
Shape,"(100, 90, 90)","(1, 90, 90)"
Dask graph,100 chunks in 5 graph layers,100 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.09 MiB 31.64 kiB Shape (100, 90, 90) (1, 90, 90) Dask graph 100 chunks in 5 graph layers Data type float32 numpy.ndarray",90  90  100,

Unnamed: 0,Array,Chunk
Bytes,3.09 MiB,31.64 kiB
Shape,"(100, 90, 90)","(1, 90, 90)"
Dask graph,100 chunks in 5 graph layers,100 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.09 MiB,31.64 kiB
Shape,"(100, 90, 90)","(1, 90, 90)"
Dask graph,100 chunks in 5 graph layers,100 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.09 MiB 31.64 kiB Shape (100, 90, 90) (1, 90, 90) Dask graph 100 chunks in 5 graph layers Data type float32 numpy.ndarray",90  90  100,

Unnamed: 0,Array,Chunk
Bytes,3.09 MiB,31.64 kiB
Shape,"(100, 90, 90)","(1, 90, 90)"
Dask graph,100 chunks in 5 graph layers,100 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [9]:
da_oceFWflx = llc_to_latlon(
    ds_precip["XC"],
    ds_precip["YC"],
    ds_precip["oceFWflx"],
    min_lat = -20,
    max_lat = 10,
    min_lon = 90,
    max_lon = 120,
)
da_oceFWflx.name = "surface_freshwater_flux"

In [7]:
da_theta = llc_to_latlon(
    ds_ts["XC"],
    ds_ts["YC"],
    ds_ts["THETA"],
    min_lat = -20,
    max_lat = 10,
    min_lon = 90,
    max_lon = 120,
)
da_theta.name = "sea_surface_temperature"

In [8]:
da_salt = llc_to_latlon(
    ds_ts["XC"],
    ds_ts["YC"],
    ds_ts["SALT"],
    min_lat = -20,
    max_lat = 10,
    min_lon = 90,
    max_lon = 120,
)
da_salt.name = "sea_surface_salinity"

In [9]:
ds = xr.merge([da_oceFWflx, da_theta, da_salt])

In [10]:
ds.to_netcdf("/efs_ecco/ascherer/datasets/surface_sal_temp_fw_daily_latlon_10N-20S_90E-120E.nc")