# Notebook to demostrate the new EDAB python utility functions

In [1]:
# Import libraries
import os
import xarray as xr

## Load all utility functions

In [2]:

# Import initial utility function
from import_utilities import import_utility_functions

# Find all utiltity functions
functions = import_utility_functions()

# Load all utiltiey functions
globals().update(functions)

functions

{'get_python_dir': <function import_utilities.get_python_dir()>,
 'get_pyfile_functions': <function import_utilities.get_pyfile_functions(directory=None, search_string=None)>,
 'import_utility_functions': <function import_utilities.import_utility_functions(directory=None, function_map=None)>,
 'dataset_defaults': <function getfiles.dataset_defaults()>,
 'product_defaults': <function getfiles.product_defaults()>,
 'get_datasets_source': <function getfiles.get_datasets_source(preferred=None)>,
 'get_dataset_dirs': <function getfiles.get_dataset_dirs(dataset=None)>,
 'get_dataset_products': <function getfiles.get_dataset_products(dataset, dataset_map=None)>,
 'get_prod_files': <function getfiles.get_prod_files(prod, dataset=None, dataset_version=None, dataset_type=None, dataset_map=None, prod_type=None, period=None)>}

### Show the default python directory and get the names of the functions

In [7]:
# Get default python directory
get_python_dir()

'/Users/kimberly.hyde/Documents/nadata/python/utilities/py'

In [8]:
# Get the names of the .py files in the utilities folder
get_pyfile_functions()

{'import_utilities': ['get_python_dir',
  'get_pyfile_functions',
  'import_utility_functions'],
 'getfiles': ['dataset_defaults_source',
  'product_defaults',
  'get_datasets_source',
  'get_dataset_dirs',
  'get_dataset_products',
  'get_prod_files']}

## Use getfiles.py functions to find datasets and files

In [9]:
# Auto-detect the default datasets directory
print(get_datasets_source())

# Manually select 'laptop' datasts directory
print(get_datasets_source(preferred="laptop"))

# Manually select 'server', but default back to laptop if not found
print(get_datasets_source(preferred="server"))

✓ Using default input data directory: [laptop] → /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/
/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/
✓ Using specified input directory: [laptop] → /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/
/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/
✗ Preferred input source 'server' not available — falling back to defaults.
✓ Using default input data directory: [laptop] → /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/
/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/


## Show the dataset and product defaults

In [7]:
# Default datasets source location
get_datasets_source()

✓ Using default input data directory: [laptop] → /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/


'/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/'

In [4]:
dataset_defaults()

{'ACSPO': ('V2.8.1', 'MAPPED_2KM_DAILY', 'SST'),
 'ACSPONRT': ('V2.8.1', 'MAPPED_2KM_DAILY', 'SST'),
 'AVHRR': ('V5.3', 'MAPPED_4KM_DAILY', 'SST'),
 'CORALSST': ('V3.1', 'MAPPED_5KM_DAILY', 'SST'),
 'GLOBCOLOUR': ('V4.2.1', 'MAPPED_4KM_DAILY', 'CHL1'),
 'MUR': ('V4.1', 'MAPPED_1KM_DAILY', 'SST'),
 'OCCCI': ('V6.0', 'MAPPED_4KM_DAILY', 'CHL'),
 'OISST': ('V2', 'MAPPED_25KM_DAILY', 'SST')}

In [5]:
product_defaults()

{'CHL': ('CHL', 'OCCCI', 'SOURCE', ''),
 'CHLOR_A': ('CHLOR_A', 'OCCCI', 'OUTPUT', 'DAILY'),
 'SST': ('SST', 'ACSPO', 'SOURCE', ''),
 'PPD': ('PPD', 'OCCCI', 'OUTPUT', 'DAILY'),
 'PSC': ('PSC', 'OCCCI', 'OUTPUT', 'DAILY'),
 'PAR': ('PAR', 'GLOBCOLOUR', 'SOURCE', ''),
 'CHL_TEMP': ('CHL1', 'GLOBCOLOUR', 'SOURCE', ''),
 'SST_TEMP': ('SST', 'ACSPONRT', 'SOURCE', ''),
 'CHL_FRONTS': ('CHL_FRONTS', 'OCCCI', 'OUTPUT', 'DAILY'),
 'SST_FRONTS': ('SST_FRONTS', 'ACSPO', 'OUTPUT', 'DAILY'),
 'FRONTS': ('SST_FRONTS', 'ACSPO', 'OUTPUT', 'DAILY')}

# Get dataset directories

In [17]:
get_dataset_dirs()

✓ Using default input data directory: [laptop] → /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/


{'GLOBCOLOUR': {'SOURCE_DATA': '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/GLOBCOLOUR/V4.2.1/SOURCE_DATA'},
 'OCCCI': {'SOURCE_DATA': '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA',
  'OUTPUT': '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/OUTPUT'},
 'CORALSST': {'SOURCE_DATA': '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA'}}

In [22]:
dirs = get_dataset_products('OCCCI')
for source_type, map_types in dirs.items():
    print(f"\n🔷 Source Type: {source_type}")
    for map_type, products in map_types.items():
        print(f"  📦 Map Type: {map_type}")
        for product, path in products.items():
            print(f"    🧪 Product: {product}")
            print(f"      📍 Path: {path}")


✓ Using default input data directory: [laptop] → /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/

🔷 Source Type: SOURCE_DATA
  📦 Map Type: MAPPED_4KM_DAILY
    🧪 Product: CHL
      📍 Path: /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/CHL
    🧪 Product: RRS
      📍 Path: /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/RRS
  📦 Map Type: BINNED_1KM_DAILY
    🧪 Product: IOP
      📍 Path: /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/BINNED_1KM_DAILY/IOP

🔷 Source Type: OUTPUT
  📦 Map Type: PPD
    🧪 Product: DAILY
      📍 Path: /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/OUTPUT/PPD/DAILY


### Get product files

In [3]:
get_prod_files('chl')

✓ Using default input data directory: [laptop] → /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/
✅ Found path for 'CHL' → /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/CHL
📦 Found 30 .nc files in: /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/CHL


['/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/CHL/ESACCI-OC-L3S-CHLOR_A-MERGED-1D_DAILY_4km_GEO_PML_OCx-19980129-fv6.0.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/CHL/ESACCI-OC-L3S-CHLOR_A-MERGED-1D_DAILY_4km_GEO_PML_OCx-19980128-fv6.0.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/CHL/ESACCI-OC-L3S-CHLOR_A-MERGED-1D_DAILY_4km_GEO_PML_OCx-19980117-fv6.0.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/CHL/ESACCI-OC-L3S-CHLOR_A-MERGED-1D_DAILY_4km_GEO_PML_OCx-19980116-fv6.0.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/CHL/ESACCI-OC-L3S-CHLOR_A-MERGED-1D_DAILY_4km_GEO_PML_OCx-19980123-fv6.0.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/SOURCE_DATA/MAPPED_4KM_DAILY/CHL/ESACCI-OC-L3S-CHLOR_A-MERGED-1

In [4]:
get_prod_files('SST')


✓ Using default input data directory: [laptop] → /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/
❌ ACSPO' not found in available sources.
⚠ No product structure found for dataset 'ACSPO'.


In [5]:
get_prod_files('SST',dataset='CORALSST')

✓ Using default input data directory: [laptop] → /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/
⚠ No 'OUTPUT' path found for 'CORALSST' – proceeding with SOURCE only.
✅ Found path for 'SST' → /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA/MAPPED_5KM_DAILY/SST
📦 Found 32 .nc files in: /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA/MAPPED_5KM_DAILY/SST


['/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA/MAPPED_5KM_DAILY/SST/coraltemp_v3.1_19980116.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA/MAPPED_5KM_DAILY/SST/coraltemp_v3.1_19980122.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA/MAPPED_5KM_DAILY/SST/coraltemp_v3.1_19980106.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA/MAPPED_5KM_DAILY/SST/coraltemp_v3.1_19980126.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA/MAPPED_5KM_DAILY/SST/coraltemp_v3.1_19980112.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA/MAPPED_5KM_DAILY/SST/coraltemp_v3.1_19980102.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE_DATA/MAPPED_5KM_DAILY/SST/coraltemp_v3.1_19980127.nc',
 '/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/CORALSST/V3.1/SOURCE

In [3]:
get_prod_files('PPD')

✓ Using default input data directory: [laptop] → /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/
✅ Found path for 'PPD' → /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/OUTPUT/MAPPED_4KM/PPD/DAILY
📦 Found 1 .nc files in: /Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/OUTPUT/MAPPED_4KM/PPD/DAILY


['/Users/kimberly.hyde/Documents/nadata/DATASETS_SOURCE/OCCCI/V6.0/OUTPUT/MAPPED_4KM/PPD/DAILY/D_20220101-OCCCI-V6.0-GLOBAL_MAPPED-PPD.nc']