# Notebook to demostrate the new EDAB python utility functions

In [1]:
# Import libraries
import os
import xarray as xr
import socket
import sys
from pathlib import Path



## Load all utility functions

In [2]:

def get_python_path():
    hostname = socket.gethostname()                                 # 1. Identify the computer by hostname
    code_locations = {                                              # 2. Set default Python code location based on hostname
        "NECMAC04363461.local": "/Users/kimberly.hyde/Documents/",  # Mac laptop
        "nefscsatdata": "/mnt/EDAB_Archive/",                       # Satdata
        "guihyde": "/mnt/EDAB_Archive/"                             # Kim's Satdata container
    }

    base_path = code_locations.get(hostname)
    if not base_path:
        print(f"Unknown hostname: {hostname}")
        return None

    default_utility_path = Path(base_path) / "nadata/python"
    if not default_utility_path.is_dir():
        print(f"Directory not found: {default_utility_path}")
        return None

    print(f"Default utilities path: {default_utility_path}")
    return default_utility_path

python_path = get_python_path()
if str(python_path) not in sys.path:
    sys.path.insert(0, str(python_path))

from utilities import date_utilities, gridding_utilities, file_utilities, import_utilities

Default utilities path: /Users/kimberly.hyde/Documents/nadata/python


### Show the default python directory and get the names of the functions

In [3]:
# Get default python directory
from utilities import get_python_dir
get_python_dir()

'/Users/kimberly.hyde/Documents/nadata/python/utilities'

In [4]:
# Get the names of the .py files in the utilities folder
from utilities import get_pyfile_functions
get_pyfile_functions()

{'import_utilities': ['get_python_dir',
  'get_pyfile_functions',
  'import_utility_functions'],
 'date_utilities': ['format_date', 'get_dates', 'get_dates_from_source_files'],
 'file_utilities': ['dataset_defaults',
  'product_defaults',
  'get_datasets_source',
  'get_dataset_dirs',
  'get_dataset_products',
  'get_prod_files',
  'parse_dataset_info',
  'get_dataset_vars',
  'set_file_permissions',
  'file_make'],
 'gridding_utilities': ['hash_grid',
  'get_regrid_weights',
  'regrid_dataset',
  'regrid_wrapper']}

## Use getfiles.py functions to find datasets and files

In [5]:
from utilities import get_datasets_source
# Auto-detect the default datasets directory
print(get_datasets_source())

# Manually select 'laptop' datasts directory
print(get_datasets_source(preferred="laptop"))

# Manually select 'server', but default back to laptop if not found
print(get_datasets_source(preferred="server"))

/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/
/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/
✗ Preferred input source 'server' not available — falling back to defaults.
/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/


## Show the dataset and product defaults

In [6]:
from utilities import dataset_defaults
dataset_defaults()

{'ACSPO': ('V2.8.1', 'MAPPED_2KM', 'SST'),
 'ACSPONRT': ('V2.8.1', 'MAPPED_2KM', 'SST'),
 'AVHRR': ('V5.3', 'MAPPED_4KM', 'SST'),
 'CORALSST': ('V3.1', 'MAPPED_5KM', 'SST'),
 'GLOBCOLOUR': ('V4.2.1', 'MAPPED_4KM', 'CHL1'),
 'MUR': ('V4.1', 'MAPPED_1KM', 'SST'),
 'OCCCI': ('V6.0', 'MAPPED_4KM', 'CHL'),
 'OISST': ('V2', 'MAPPED_25KM', 'SST')}

In [7]:
from utilities import product_defaults
product_defaults()

{'CHL': ('CHL', 'OCCCI', 'SOURCE', ''),
 'CHLOR_A': ('CHLOR_A', 'OCCCI', 'OUTPUT', 'DAILY'),
 'SST': ('SST', 'ACSPO', 'SOURCE', ''),
 'PPD': ('PPD', 'OCCCI', 'OUTPUT', 'DAILY'),
 'PSC': ('PSC', 'OCCCI', 'OUTPUT', 'DAILY'),
 'PAR': ('PAR', 'GLOBCOLOUR', 'SOURCE', ''),
 'CHL_TEMP': ('CHL1', 'GLOBCOLOUR', 'SOURCE', ''),
 'SST_TEMP': ('SST', 'ACSPONRT', 'SOURCE', ''),
 'CHL_FRONTS': ('CHL_FRONTS', 'OCCCI', 'OUTPUT', 'DAILY'),
 'SST_FRONTS': ('SST_FRONTS', 'ACSPO', 'OUTPUT', 'DAILY'),
 'FRONTS': ('SST_FRONTS', 'ACSPO', 'OUTPUT', 'DAILY')}

# Get dataset directories

In [8]:
from utilities import get_dataset_dirs
get_dataset_dirs()

{'GLOBCOLOUR': {'SOURCE_DATA': '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/GLOBCOLOUR/V4.2.1/SOURCE_DATA'},
 'OCCCI': {'SOURCE_DATA': '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA',
  'OUTPUT': '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/OUTPUT'},
 'CORALSST': {'SOURCE_DATA': '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA'}}

In [9]:
from utilities import get_dataset_products
dirs = get_dataset_products('OCCCI')
for source_type, map_types in dirs.items():
    print(f"\n🔷 Source Type: {source_type}")
    for map_type, products in map_types.items():
        print(f"  📦 Map Type: {map_type}")
        for product, path in products.items():
            print(f"    🧪 Product: {product}")
            print(f"      📍 Path: {path}")



🔷 Source Type: SOURCE_DATA
  📦 Map Type: MAPPED_4KM_DAILY
    🧪 Product: CHL
      📍 Path: /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/CHL
    🧪 Product: RRS
      📍 Path: /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/RRS
  📦 Map Type: BINNED_1KM_DAILY
    🧪 Product: IOP
      📍 Path: /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/BINNED_1KM_DAILY/IOP

🔷 Source Type: OUTPUT
  📦 Map Type: MAPPED_4KM
    🧪 Product: PPD
      📍 Path: /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/OUTPUT/MAPPED_4KM/PPD


### Get product files

In [10]:
from utilities import get_prod_files
get_prod_files('chl')

📦 Found 30 .nc files in: /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/CHL


['/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/CHL/ESACCI-OC-L3S-CHLOR_A-MERGED-1D_DAILY_4km_GEO_PML_OCx-19980129-fv6.0.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/CHL/ESACCI-OC-L3S-CHLOR_A-MERGED-1D_DAILY_4km_GEO_PML_OCx-19980128-fv6.0.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/CHL/ESACCI-OC-L3S-CHLOR_A-MERGED-1D_DAILY_4km_GEO_PML_OCx-19980117-fv6.0.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/CHL/ESACCI-OC-L3S-CHLOR_A-MERGED-1D_DAILY_4km_GEO_PML_OCx-19980116-fv6.0.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/CHL/ESACCI-OC-L3S-CHLOR_A-MERGED-1D_DAILY_4km_GEO_PML_OCx-19980123-fv6.0.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/CHL/ESACCI-OC-L3S-CHLOR_A-MERGED-1

In [11]:
get_prod_files('SST')


❌ ACSPO' not found in available sources.
⚠ No product structure found for dataset 'ACSPO'.


In [12]:
get_prod_files('SST',dataset='CORALSST')

📦 Found 32 .nc files in: /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA/MAPPED_5KM_DAILY/SST


['/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA/MAPPED_5KM_DAILY/SST/coraltemp_v3.1_19980116.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA/MAPPED_5KM_DAILY/SST/coraltemp_v3.1_19980122.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA/MAPPED_5KM_DAILY/SST/coraltemp_v3.1_19980106.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA/MAPPED_5KM_DAILY/SST/coraltemp_v3.1_19980126.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA/MAPPED_5KM_DAILY/SST/coraltemp_v3.1_19980112.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA/MAPPED_5KM_DAILY/SST/coraltemp_v3.1_19980102.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA/MAPPED_5KM_DAILY/SST/coraltemp_v3.1_19980127.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE

In [13]:
get_prod_files('PPD')

📦 Found 1 .nc files in: /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/OUTPUT/MAPPED_4KM/PPD/DAILY


['/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/OUTPUT/MAPPED_4KM/PPD/DAILY/D_20220101-OCCCI-V6.0-GLOBAL_MAPPED-PPD.nc']

### Filename dates

In [3]:
from utilities import get_source_file_dates
from utilities import get_prod_files

cfiles = get_prod_files('chl')
pfiles = get_prod_files('par')

print(get_source_file_dates(cfiles))
print(get_source_file_dates(pfiles))

📦 Found 30 .nc files in: /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/CHL
📦 Found 31 .nc files in: /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/GLOBCOLOUR/V4.2.1/SOURCE_DATA/MAPPED_4KM_DAILY/PAR
['19980129', '19980128', '19980117', '19980116', '19980123', '19980122', '19980101', '19980107', '19980106', '19980110', '19980111', '19980124', '19980125', '19980109', '19980108', '19980103', '19980102', '19980120', '19980121', '19980114', '19980115', '19980127', '19980126', '19980119', '19980118', '19980113', '19980112', '19980131', '19980104', '19980105']
['19980119', '19980117', '19980102', '19980126', '19980128', '19980101', '19980114', '19980130', '19980125', '19980123', '19980112', '19980109', '19980107', '19980120', '19980104', '19980111', '19980110', '19980105', '19980121', '19980106', '19980108', '19980113', '19980122', '19980124', '19980131', '19980115', '19980129', '19980127', '19980103', '19980116', '19980118']


### Get dates

In [3]:
from utilities import get_dates
dates = get_dates([2023,2024],format='datetime')

### Regrid Demo

In [None]:
from utilities import get_prod_files
chl_path = get_prod_files('chl',dataset='OCCCI',getfilepath=True)
sst_path = get_prod_files('sst',dataset='CORALSST',getfilepath=True)
par_path = get_prod_files('par',dataset='GLOBCOLOUR',getfilepath=True)

from utilities import regrid_wrapper
sst_ds = regrid_wrapper(chl_path,sst_path,source_vars=['analysed_sst'])
par_ds = regrid_wrapper(chl_path,par_path,source_vars=['PAR_mean'])
chl_ds = xr.open_mfdataset(os.path.join(chl_path,"*.nc"))


In [None]:
def compare_grids(ds1, ds2, tol=1e-6):
    lat_match = np.allclose(ds1["lat"].values, ds2["lat"].values, atol=tol)
    lon_match = np.allclose(ds1["lon"].values, ds2["lon"].values, atol=tol)
    return lat_match and lon_match

print("✅ SST grid matches CHL:", compare_grids(sst_ds, chl_ds))
print("✅ PAR grid matches CHL:", compare_grids(par_ds, chl_ds))

def grid_summary(ds, name):
    print(f"📦 {name} grid:")
    print(f"  lat: shape={ds.lat.shape}, range=({ds.lat.min().values:.2f}, {ds.lat.max().values:.2f})")
    print(f"  lon: shape={ds.lon.shape}, range=({ds.lon.min().values:.2f}, {ds.lon.max().values:.2f})")

grid_summary(chl_ds, "CHL")
grid_summary(sst_ds, "SST")
grid_summary(par_ds, "PAR")

In [None]:
# Old steps to do a global import of all functions


def global_import(function_map,verbose=False):
    for module_name, function_names in function_map.items():
        try:
            module = importlib.import_module(module_name)
        except ModuleNotFoundError:
            print(f"❌ Module '{module_name}' could not be imported.")
            continue

        for name in function_names:
            try:
                func = getattr(module, name)
                globals()[name] = func
                print(f"✅ Imported: {name} from {module_name}")
            except AttributeError:
                print(f"⚠ Function '{name}' not found in '{module_name}'")
    

# Get the path to the utilities folder
util_path = get_python_path()

# Add to sys.path
if util_path not in sys.path:
    sys.path.append(util_path)

# Find the default utiltity functions and import into global
from import_utilities import get_pyfile_functions
functions = get_pyfile_functions()
gl = global_import(functions)

# Import local calc functions
calc_functions = get_pyfile_functions(os.getcwd(),"calc")
gl = global_import(calc_functions)

