In [2]:
# Inspect one ATL24 HDF5 file (depth) from downloads/depth/sample

from pathlib import Path
import os, re, textwrap

# 1) Locate downloads/depth/sample walking up from the current notebook
def find_depth_sample_dir(start: Path | None = None) -> Path:
    start = (start or Path.cwd()).resolve()
    for parent in [start, *start.parents]:
        cand = parent / "downloads" / "depth" / "sample"
        if cand.is_dir():
            return cand
    raise FileNotFoundError(f"Could not find 'downloads/depth/sample' starting from {start}")

sample_dir = find_depth_sample_dir()
files = sorted(list(sample_dir.glob("*.h5")) + list(sample_dir.glob("*.hdf5")))
assert files, f"No .h5/.hdf5 files in {sample_dir}"
h5_path = files[0]  # open the first one (change if you want)

def fmt_size(b):
    for unit in ("B","KB","MB","GB","TB"):
        if b < 1024 or unit == "TB":
            return f"{b:,.1f} {unit}"
        b /= 1024.0

print(f"📂 Folder: {sample_dir}")
print(f"📄 Opening: {h5_path.name}  ({fmt_size(os.path.getsize(h5_path))})\n")

# 2) Try opening with xarray (optional; may fail if it's not netCDF-like)
try:
    import xarray as xr
    tried, last_err, ds = [], None, None
    for eng in ("netcdf4", "h5netcdf", "scipy"):
        try:
            ds = xr.open_dataset(h5_path, engine=eng, decode_cf=False, mask_and_scale=False)
            print(f"✅ xarray.open_dataset succeeded with engine='{eng}'")
            display(ds)
            break
        except Exception as e:
            tried.append(eng); last_err = e
    if ds is None:
        print(f"⚠️ xarray could not open this file with engines {tried}. "
              f"Last error:\n{textwrap.fill(str(last_err), 100)}")
    else:
        try: ds.close()
        except: pass
except Exception as e:
    print("⚠️ xarray not available or failed to import:", e)

# 3) Always show the HDF5 structure with h5py (groups, datasets, shapes, dtypes, root attrs)
import h5py

MAX_ITEMS = 300  # keep output manageable
print("\n🔎 HDF5 tree (groups and datasets):")
with h5py.File(h5_path, "r") as f:
    # Root attributes
    if f.attrs:
        print("\n— Root attributes —")
        for k, v in f.attrs.items():
            v_str = str(v)
            if len(v_str) > 160: v_str = v_str[:157] + "..."
            print(f"  * {k}: {v_str}")

    print("\n— Structure —")
    counter = {"n": 0}  # mutable counter to avoid global/nonlocal

    def visit(name, obj):
        if counter["n"] >= MAX_ITEMS:
            return
        if isinstance(obj, h5py.Dataset):
            print(f"[D] {name}  shape={obj.shape}  dtype={obj.dtype}")
        else:
            print(f"[G] {name}")
        counter["n"] += 1

    f.visititems(visit)

    # Heuristic: highlight datasets with lat/lon/time/depth-like names
    print("\n— Name hints (possible geovars) —")
    pat = re.compile(r"(lat|lon|long|time|depth|elev|x|y)", re.IGNORECASE)
    hints = []
    def collect(name, obj):
        if isinstance(obj, h5py.Dataset) and pat.search(name):
            hints.append((name, obj.shape, str(obj.dtype)))
    f.visititems(collect)
    if hints:
        for n, sh, dt in hints[:40]:
            print(f"  ~ {n}  shape={sh}  dtype={dt}")
        if len(hints) > 40:
            print(f"  ... and {len(hints)-40} more")
    else:
        print("  (no obvious matches)")

print("\nDone.")


📂 Folder: C:\Users\Crist\Desktop\NASA\tag-and-satellite-data-model\downloads\depth\sample
📄 Opening: ATL24_20190614091833_11780301_006_02_001_01.h5  (94.3 MB)

✅ xarray.open_dataset succeeded with engine='netcdf4'



🔎 HDF5 tree (groups and datasets):

— Structure —
[G] ancillary_data
[D] ancillary_data/atlas_sdp_gps_epoch  shape=()  dtype=float64
[D] ancillary_data/data_end_utc  shape=()  dtype=object
[D] ancillary_data/data_start_utc  shape=()  dtype=object
[D] ancillary_data/end_cycle  shape=()  dtype=int32
[D] ancillary_data/end_delta_time  shape=()  dtype=float64
[D] ancillary_data/end_geoseg  shape=()  dtype=int32
[D] ancillary_data/end_gpssow  shape=()  dtype=float64
[D] ancillary_data/end_gpsweek  shape=()  dtype=int32
[D] ancillary_data/end_orbit  shape=()  dtype=int32
[D] ancillary_data/end_region  shape=()  dtype=int32
[D] ancillary_data/end_rgt  shape=()  dtype=int32
[D] ancillary_data/granule_end_utc  shape=()  dtype=object
[D] ancillary_data/granule_start_utc  shape=()  dtype=object
[D] ancillary_data/release  shape=()  dtype=object
[D] ancillary_data/resource  shape=()  dtype=object
[D] ancillary_data/sliderule_commit  shape=()  dtype=object
[D] ancillary_data/sliderule_environment 