# Demo Notebook to Test Metadata Functions

In [6]:
import pandas as pd
import os
import xarray as xr
import socket
import sys
from pathlib import Path
from typing import Dict, Any

In [2]:

def get_python_path():
    hostname = socket.gethostname()                                 # 1. Identify the computer by hostname
    code_locations = {                                              # 2. Set default Python code location based on hostname
        "NECMAC04363461.local": "/Users/kimberly.hyde/Documents/",  # Mac laptop
        "nefscsatdata": "/mnt/EDAB_Archive/",                       # Satdata
        "guihyde": "/mnt/EDAB_Archive/"                             # Kim's Satdata container
    }

    base_path = code_locations.get(hostname)
    if not base_path:
        print(f"Unknown hostname: {hostname}")
        return None

    default_utility_path = Path(base_path) / "nadata/python"
    if not default_utility_path.is_dir():
        print(f"Directory not found: {default_utility_path}")
        return None

    print(f"Default utilities path: {default_utility_path}")
    return default_utility_path

python_path = get_python_path()
if str(python_path) not in sys.path:
    sys.path.insert(0, str(python_path))

from utilities import date_utilities, gridding_utilities, file_utilities, import_utilities, calc_daylength, metadata_utilities

Default utilities path: /Users/kimberly.hyde/Documents/nadata/python


## Read Metadata Spreadsheet

In [5]:
from utilities import get_python_dir

dir = get_python_dir(resources=True)
metapath = os.path.join(dir,'metadata','EDAB_metadata.xlsx')

metadict = pd.read_excel(metapath,sheet_name=None)
allmeta = pd.concat(metadict.values(), ignore_index=True)
print(allmeta.head())

                     TAG                                              VALUE  \
0            Conventions                          CF-1.11, COARDS, ACDD-1.3   
1  Metadata _Conventions                     Unidata Dataset Discovery v1.0   
2        acknowledgement  The data are sponsored by NOAA and may be free...   
3            institution  DOC | NOAA | National Marine Fisheries Service...   
4           creator_type                                             person   

  Name Email  URL Initials geospatial_bounds summary history Dataset  ...  \
0  NaN   NaN  NaN      NaN               NaN     NaN     NaN     NaN  ...   
1  NaN   NaN  NaN      NaN               NaN     NaN     NaN     NaN  ...   
2  NaN   NaN  NaN      NaN               NaN     NaN     NaN     NaN  ...   
3  NaN   NaN  NaN      NaN               NaN     NaN     NaN     NaN  ...   
4  NaN   NaN  NaN      NaN               NaN     NaN     NaN     NaN  ...   

  units valid_min valid_max comment reference wavelengths plot

In [None]:
def read_metadata_lookup(excel_path: str) -> Dict[str, Dict[str, Any]]:
    """
    Reads an Excel file with multiple sheets containing metadata mappings.
    Returns a dictionary: {sheet_name: {attribute_name: value, ...}, ...}
    """
    metadata_dict = {}
    xls = pd.ExcelFile(excel_path)
    for sheet in xls.sheet_names:
        df = xls.parse(sheet)
        # Assumes two columns: 'Attribute' and 'Value'
        if 'Attribute' in df.columns and 'Value' in df.columns:
            metadata_dict[sheet] = dict(zip(df['Attribute'], df['Value']))
        else:
            raise ValueError(f"Sheet '{sheet}' must contain 'Attribute' and 'Value' columns.")
    return metadata_dict

In [None]:
def extract_netcdf_metadata(nc_path: str) -> Dict[str, Any]:
    """
    Extracts global attributes from a NetCDF file.
    """
    with xr.open_dataset(nc_path) as ds:
        return dict(ds.attrs)

In [None]:
def update_netcdf_metadata(nc_path: str, updates: Dict[str, Any], output_path: str = None) -> None:
    """
    Updates or adds global attributes in a NetCDF file.
    Writes to output_path if provided, otherwise overwrites original.
    """
    output_path = output_path or nc_path
    with xr.open_dataset(nc_path) as ds:
        ds.attrs.update(updates)
        ds.to_netcdf(output_path)

In [None]:
def apply_metadata_updates(nc_path: str, excel_path: str, sheet: str = 'global', output_path: str = None) -> None:
    """
    Wrapper to apply metadata updates from a specific sheet in the Excel file to a NetCDF file.
    """
    metadata_lookup = read_metadata_lookup(excel_path)
    if sheet not in metadata_lookup:
        raise KeyError(f"Sheet '{sheet}' not found in Excel file.")
    
    updates = metadata_lookup[sheet]
    update_netcdf_metadata(nc_path, updates, output_path)

In [None]:
apply_metadata_updates(
    nc_path="data.nc",
    excel_path="metadata_lookup.xlsx",
    sheet="global",  # or any other sheet name
    output_path="data_updated.nc"
)