# Tanager HDF5 explorer

Browse the two Tanager HDF5 products (radiance + surface reflectance). This notebook only reports structure/types and attributes; it does not print dataset values.

In [None]:

from pathlib import Path
import os, sys, h5py, numpy as np

NOTEBOOK_ROOT = Path.cwd().resolve()
REPO_ROOT = NOTEBOOK_ROOT
while not (REPO_ROOT / 'scripts').exists() and REPO_ROOT.parent != REPO_ROOT:
    REPO_ROOT = REPO_ROOT.parent
if not (REPO_ROOT / 'scripts').exists():
    raise RuntimeError('Could not locate repository root containing scripts directory.')

os.environ.setdefault('PYTHONPATH', str(REPO_ROOT))
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

# <<< Update these paths >>>
RAD_PATH = REPO_ROOT / 'test_data' / 'tanager' / 'GHG-plumes' / '20250423_134026_31_4001' / 'basic_radiance_hdf5__20250423_134026_31_4001_basic_radiance_hdf5.h5'
SR_PATH  = REPO_ROOT / 'test_data' / 'tanager' / 'GHG-plumes' / '20250423_134026_31_4001' / 'basic_sr_hdf5__20250423_134026_31_4001_basic_sr_hdf5.h5'
MAX_DEPTH = 3  # change to None for full tree

print('Notebook root:', NOTEBOOK_ROOT)
print('Repo root:', REPO_ROOT)
print('Radiance path:', RAD_PATH)
print('Surface reflectance path:', SR_PATH)
for p in [RAD_PATH, SR_PATH]:
    if not p.exists():
        print('WARNING: missing', p)


Notebook root: /mnt/d/Lavoro/Assegno_Ricerca_Sapienza/CLEAR_UP/CH4_detection/Matched_filter_approach/hygas/notebooks
Repo root: /mnt/d/Lavoro/Assegno_Ricerca_Sapienza/CLEAR_UP/CH4_detection/Matched_filter_approach/hygas
Radiance path: /mnt/d/Lavoro/Assegno_Ricerca_Sapienza/CLEAR_UP/CH4_detection/Matched_filter_approach/hygas/test_data/tanager/GHG-plumes/20250423_134026_31_4001/basic_radiance_hdf5__20250423_134026_31_4001_basic_radiance_hdf5.h5
Surface reflectance path: /mnt/d/Lavoro/Assegno_Ricerca_Sapienza/CLEAR_UP/CH4_detection/Matched_filter_approach/hygas/test_data/tanager/GHG-plumes/20250423_134026_31_4001/basic_sr_hdf5__20250423_134026_31_4001_basic_sr_hdf5.h5


In [23]:

def walk_tree(h5obj, max_depth=None):
    lines = []
    def _rec(name, obj, depth):
        if max_depth is not None and depth > max_depth:
            return
        indent = '  ' * depth
        label = name.split('/')[-1] if name else '/'
        if isinstance(obj, h5py.Dataset):
            lines.append(f"{indent}- {label} [dataset] shape={obj.shape} dtype={obj.dtype}")
        else:
            lines.append(f"{indent}+ {label} [group]")
            for key, child in obj.items():
                _rec(f"{name}/{key}" if name else key, child, depth+1)
    _rec('', h5obj, 0)
    return ''.join(lines)

def list_attrs(obj):
    return {k: obj.attrs[k] for k in obj.attrs}


In [24]:

for label, path in [('Radiance', RAD_PATH), ('Surface reflectance', SR_PATH)]:
    if not path.exists():
        print(f"{label}: file missing -> {path}")
        continue
    print(f"=== {label} ({path}) ===")
    with h5py.File(path, 'r') as f:
        print(walk_tree(f, max_depth=MAX_DEPTH))
        attrs = list_attrs(f)
        if attrs:
            print('Root attributes:')
            for k,v in attrs.items():
                print(f"  {k}: {v}")
        else:
            print('Root attributes: none')


=== Radiance (/mnt/d/Lavoro/Assegno_Ricerca_Sapienza/CLEAR_UP/CH4_detection/Matched_filter_approach/hygas/test_data/tanager/GHG-plumes/20250423_134026_31_4001/basic_radiance_hdf5__20250423_134026_31_4001_basic_radiance_hdf5.h5) ===
+ / [group]  + HDFEOS [group]    + ADDITIONAL [group]      + FILE_ATTRIBUTES [group]    + SWATHS [group]      + HYP [group]  + HDFEOS INFORMATION [group]    - StructMetadata.0 [dataset] shape=() dtype=|S32000
Root attributes: none
=== Surface reflectance (/mnt/d/Lavoro/Assegno_Ricerca_Sapienza/CLEAR_UP/CH4_detection/Matched_filter_approach/hygas/test_data/tanager/GHG-plumes/20250423_134026_31_4001/basic_sr_hdf5__20250423_134026_31_4001_basic_sr_hdf5.h5) ===
+ / [group]  + HDFEOS [group]    + ADDITIONAL [group]      + FILE_ATTRIBUTES [group]    + SWATHS [group]      + HYP [group]  + HDFEOS INFORMATION [group]    - StructMetadata.0 [dataset] shape=() dtype=|S32000
Root attributes: none


In [25]:

# Search for water vapour-related datasets/attributes in both files
KEYWORDS = ['vapor', 'vapour', 'wv']
for label, path in [('Radiance', RAD_PATH), ('Surface reflectance', SR_PATH)]:
    if not path.exists():
        print(f"{label}: missing -> {path}")
        continue
    print(f"=== Searching {label} ({path}) ===")
    with h5py.File(path, 'r') as f:
        hits = []
        def _walk(name, obj):
            lname = name.lower()
            if any(k in lname for k in KEYWORDS):
                kind = 'dataset' if isinstance(obj, h5py.Dataset) else 'group'
                hits.append((name, kind, obj.shape if isinstance(obj, h5py.Dataset) else None))
            for ak in obj.attrs:
                akl = ak.lower()
                if any(k in akl for k in KEYWORDS):
                    hits.append((name + ' @' + ak, 'attr', obj.attrs[ak]))
        f.visititems(_walk)
        if hits:
            for h in hits:
                print(h)
        else:
            print('No vapor-related names found.')


=== Searching Radiance (/mnt/d/Lavoro/Assegno_Ricerca_Sapienza/CLEAR_UP/CH4_detection/Matched_filter_approach/hygas/test_data/tanager/GHG-plumes/20250423_134026_31_4001/basic_radiance_hdf5__20250423_134026_31_4001_basic_radiance_hdf5.h5) ===
No vapor-related names found.
=== Searching Surface reflectance (/mnt/d/Lavoro/Assegno_Ricerca_Sapienza/CLEAR_UP/CH4_detection/Matched_filter_approach/hygas/test_data/tanager/GHG-plumes/20250423_134026_31_4001/basic_sr_hdf5__20250423_134026_31_4001_basic_sr_hdf5.h5) ===
('HDFEOS/SWATHS/HYP/Data Fields/column_water_vapour', 'dataset', (640, 607))


In [26]:

# Summaries: median SZA (deg) from radiance; median WV (g/cm^2) from SR
import numpy as np, h5py
if RAD_PATH.exists():
    with h5py.File(RAD_PATH, 'r') as f:
        sza = np.asarray(f['HDFEOS/SWATHS/HYP/Data Fields/sun_zenith'])
        print('SZA deg mean/median/min/max:', float(np.nanmean(sza)), float(np.nanmedian(sza)), float(np.nanmin(sza)), float(np.nanmax(sza)))
else:
    print('Radiance path missing:', RAD_PATH)
if SR_PATH.exists():
    with h5py.File(SR_PATH, 'r') as f:
        if 'HDFEOS/SWATHS/HYP/Data Fields/column_water_vapour' in f:
            wv = np.asarray(f['HDFEOS/SWATHS/HYP/Data Fields/column_water_vapour'])
            print('WV g/cm^2 mean/median/min/max:', float(np.nanmean(wv)), float(np.nanmedian(wv)), float(np.nanmin(wv)), float(np.nanmax(wv)))
        else:
            print('column_water_vapour not found in SR file')
else:
    print('Surface reflectance path missing:', SR_PATH)


SZA deg mean/median/min/max: 41.84391403198242 41.843955993652344 41.64802169799805 42.03989791870117
WV g/cm^2 mean/median/min/max: 2.4816229343414307 2.4799857139587402 2.3340890407562256 2.658592700958252


In [27]:

# Choose which file to inspect: 'rad' or 'sr'
FILE_SELECTOR = 'sr'
# Default path per file; override if needed
if FILE_SELECTOR == 'rad':
    INSPECT_PATH = 'HDFEOS/SWATHS/HYP/Data Fields/toa_radiance'
else:
    INSPECT_PATH = 'HDFEOS/SWATHS/HYP/Data Fields/surface_reflectance'

sel_path = RAD_PATH if FILE_SELECTOR == 'rad' else SR_PATH
if not sel_path.exists():
    print('Selected file missing:', sel_path)
else:
    print(f'Inspecting {sel_path}Path: {INSPECT_PATH}')
    with h5py.File(sel_path, 'r') as f:
        if INSPECT_PATH not in f:
            print('Path not found. Available Data Fields keys:')
            df = f['HDFEOS/SWATHS/HYP/Data Fields']
            print(list(df.keys()))
        else:
            obj = f[INSPECT_PATH]
            print('Type:', type(obj))
            if isinstance(obj, h5py.Dataset):
                print('Shape:', obj.shape, 'dtype:', obj.dtype)
            else:
                print('Members:', list(obj.keys()))
            attrs = list_attrs(obj)
            if attrs:
                print('Attributes:')
                for k,v in attrs.items():
                    print(f"  {k}: {v}")
            else:
                print('No attributes')


Inspecting /mnt/d/Lavoro/Assegno_Ricerca_Sapienza/CLEAR_UP/CH4_detection/Matched_filter_approach/hygas/test_data/tanager/GHG-plumes/20250423_134026_31_4001/basic_sr_hdf5__20250423_134026_31_4001_basic_sr_hdf5.h5Path: HDFEOS/SWATHS/HYP/Data Fields/surface_reflectance
Type: <class 'h5py._hl.dataset.Dataset'>
Shape: (426, 640, 607) dtype: float32
Attributes:
  Unit: Unitless
  _FillValue: -9999.0
  fwhm: [5.39 5.42 5.45 5.48 5.52 5.55 5.58 5.6  5.63 5.66 5.69 5.72 5.75 5.77
 5.8  5.82 5.85 5.88 5.9  5.92 5.95 5.97 5.99 6.02 6.04 6.06 6.08 6.1
 6.13 6.15 6.17 6.19 6.2  6.22 6.24 6.26 6.28 6.3  6.31 6.33 6.35 6.36
 6.38 6.39 6.41 6.42 6.44 6.45 6.46 6.48 6.49 6.5  6.52 6.53 6.54 6.55
 6.56 6.57 6.59 6.6  6.61 6.62 6.62 6.63 6.64 6.65 6.66 6.67 6.68 6.68
 6.69 6.7  6.71 6.71 6.72 6.72 6.73 6.74 6.74 6.75 6.75 6.76 6.76 6.76
 6.77 6.77 6.78 6.78 6.78 6.79 6.79 6.79 6.79 6.8  6.8  6.8  6.8  6.8
 6.8  6.8  6.8  6.81 6.81 6.81 6.81 6.81 6.81 6.81 6.81 6.8  6.8  6.8
 6.8  6.8  6.8  6.8  6.79 6.79