# Explore Siphon Capabilities on NCAR/ISD Thredds Data Servers

Climate Data Gateway:   https://tds.ucar.edu

RDA : https://thredds.rda.ucar.edu/thredds/catalog/catalog.html

Unidata:  http://thredds.ucar.edu/thredds/catalog.xml

ESGF Data Node:   https://esgf-data.ucar.edu/thredds/catalog/catalog.html

Constrain ESGF search to NCAR node: https://esgf-node.llnl.gov/search/cmip6/?institution_id=NCAR

Aggregation Subset case to try eventually:  https://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p5deg/catalog.html

CORDEX datasets on Glade:  /glade/campaign/collections/cdg/data/cordex/data


## Things to demonstrate:

* Point subset, NCSS
* Time range subset, NCSS
* BBox subset, NCSS
* Query time range, geo range, variable metadata, NCSS

* See if aggregation across files within a dataset is possible with NCSS or some other service
* See what the JupyterNotebook service does

* Show OPENDAP

In [1]:
from siphon.catalog import TDSCatalog, SimpleService
from siphon.http_util import session_manager

# Exception handling
from urllib.error import HTTPError

import pprint
import json


## THREDDS Helper Functions

In [1]:
def print_catalog_children(catalog_URL):
    """ print the list of 'name-url' pairs describing child datasets for the given catalog URL.
    """
    # Load the given catalog into memory
    catalog = TDSCatalog(catalog_URL)
        
    # Collect the dataset names and child catalog names for this catalog
    datasets = list(catalog.datasets)
    childrefs = list(catalog.catalog_refs)
    try: 
        latest = catalog.latest
        latest_name = latest.name
    except AttributeError:
        latest = None
        latest_name = "Not Defined"
    
    print(f"  Num Datasets = {len(datasets):6d}, Num Child Catalogs = {len(childrefs):6d}, Latest: {latest_name}, Catalog_URL: {catalog.catalog_url}")
    for dataset_name in datasets:
        dataset = catalog.datasets[dataset_name]
        #service_names = [key for (key,value) in dataset.access_urls]
        service_names = dataset.access_urls.keys()
        service_urls = json.dumps(dataset.access_urls, indent=4)
        print(f"     '{dataset_name}':  {service_names}")   
    
    # Print name and metadata for each child catalog
    index_value = 0
    child_cats = []
    for childref in childrefs:
        child_cat = catalog.catalog_refs[childref]
        child_cat = child_cat.follow()
        print(f"child_cats[{index_value}]: '{childref}', num_datasets: {len(child_cat.datasets)},  child_url: {child_cat.catalog_url}")
        index_value = index_value + 1
        child_cats.append(child_cat)

    # Return useful items from catalog
    return datasets, child_cats, latest

In [4]:
# Explore RDA Aggregation Dataset
cat_url = 'https://thredds.rda.ucar.edu/thredds/catalog/catalog_ds083.2.xml'
print_catalog_children(cat_url)

      Latest Dataset:   Num Datasets =      0, Num Child Catalogs =      3, Latest: Not Defined, Catalog_URL: https://thredds.rda.ucar.edu/thredds/catalog/catalog_ds083.2.xml
child_cats[0]: 'ds083.2 Files', num_datasets: 2,  child_url: https://thredds.rda.ucar.edu/thredds/catalog/files/g/ds083.2/catalog.xml
child_cats[1]: 'ds083.2 Aggregation Oct 1999 to Dec 2007', num_datasets: 2,  child_url: https://thredds.rda.ucar.edu/thredds/catalog/aggregations/g/ds083.2/1/catalog.xml
child_cats[2]: 'ds083.2 Aggregation Dec 2007 to Current', num_datasets: 2,  child_url: https://thredds.rda.ucar.edu/thredds/catalog/aggregations/g/ds083.2/2/catalog.xml


In [7]:
# Explore RDA Aggregation Dataset, 2007-current
cat_url = 'https://thredds.rda.ucar.edu/thredds/catalog/aggregations/g/ds083.2/2/catalog.xml'
print_catalog_children(cat_url)

      Latest Dataset:   Num Datasets =      2, Num Child Catalogs =     17, Latest: ds083.2 Aggregation Dec 2007 to Current-2023, Catalog_URL: https://thredds.rda.ucar.edu/thredds/catalog/aggregations/g/ds083.2/2/catalog.xml
     'Full Collection Dataset':  dict_keys(['OpenDAP', 'DAP4', 'NetcdfSubset', 'CdmRemote', 'CdmrFeature', 'JupyterNotebook', 'WCS', 'WMS', 'ISO', 'NCML', 'UDDC'])
     'Latest Collection for ds083.2 Aggregation Dec 2007 to Current':  dict_keys(['OpenDAP', 'DAP4', 'NetcdfSubset', 'CdmRemote', 'CdmrFeature', 'JupyterNotebook', 'WCS', 'WMS', 'ISO', 'NCML', 'UDDC'])
child_cats[0]: 'ds083.2 Aggregation Dec 2007 to Current-2007', num_datasets: 103,  child_url: https://thredds.rda.ucar.edu/thredds/catalog/aggregations/g/ds083.2/2/ds083.2_Grib2-2007/catalog.xml
child_cats[1]: 'ds083.2 Aggregation Dec 2007 to Current-2008', num_datasets: 1465,  child_url: https://thredds.rda.ucar.edu/thredds/catalog/aggregations/g/ds083.2/2/ds083.2_Grib2-2008/catalog.xml
child_cats[2]: 'ds

In [10]:
cat_url = 'https://thredds.rda.ucar.edu/thredds/catalog/aggregations/g/ds083.2/2/catalog.xml'
catalog = TDSCatalog(cat_url)
dataset = catalog.datasets[0]
ncss = dataset.subset()
ncss.variables

{'5-Wave_Geopotential_Height_Anomaly_isobaric',
 '5-Wave_Geopotential_Height_isobaric',
 'Absolute_vorticity_isobaric',
 'Apparent_temperature_height_above_ground',
 'Best_4_layer_Lifted_Index_surface',
 'Categorical_Freezing_Rain_surface',
 'Categorical_Ice_Pellets_surface',
 'Categorical_Rain_surface',
 'Categorical_Snow_surface',
 'Cloud_mixing_ratio_hybrid',
 'Cloud_mixing_ratio_isobaric',
 'Cloud_water_entire_atmosphere_single_layer',
 'Composite_reflectivity_entire_atmosphere',
 'Convective_available_potential_energy_pressure_difference_layer',
 'Convective_available_potential_energy_surface',
 'Convective_inhibition_pressure_difference_layer',
 'Convective_inhibition_surface',
 'Dewpoint_temperature_height_above_ground',
 'Field_Capacity_surface',
 'Frictional_Velocity_surface',
 'Geopotential_height_anomaly_isobaric',
 'Geopotential_height_cloud_ceiling',
 'Geopotential_height_highest_tropospheric_freezing',
 'Geopotential_height_isobaric',
 'Geopotential_height_maximum_wind',


In [None]:
metadata = ncss.metadata
metadata.variables
# A ton of Grib metadata that looks confusing

In [16]:
time_span = metadata.time_span

{'begin': '2007-12-06T12:00:00Z', 'end': '2023-03-13T18:00:00Z'}

In [22]:
query = ncss.query()
# Subset to Boulder, Colorado
query.lonlat_point(lon=-105, lat=40)

# Choose surface temperature
query.variables('Temperature_surface')

# Aggregate over the available time range:  This results in a 504 timeout.
#query.all_times()

from datetime import datetime
start_time = datetime.fromisoformat('2021-03-13T18:00:00Z')
end_time = datetime.fromisoformat('2022-03-13T18:00:00Z')
query.time_range(start_time, end_time)


query.accept('netcdf4')

var=Temperature_surface&time_start=2021-03-13T18%3A00%3A00%2B00%3A00&time_end=2022-03-13T18%3A00%3A00%2B00%3A00&longitude=-105&latitude=40&accept=netcdf4

In [23]:
nc = ncss.get_data(query)


HTTPError: Error accessing https://thredds.rda.ucar.edu/thredds/ncss/grid/aggregations/g/ds083.2/2/TP?var=Temperature_surface&time_start=2021-03-13T18%3A00%3A00%2B00%3A00&time_end=2022-03-13T18%3A00%3A00%2B00%3A00&longitude=-105&latitude=40&accept=netcdf4
Server Error (504: Gateway Time-out)

In [None]:
# Plot
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
temp_f = 1.8 * (nc.variables['Temperature_surface'][:] - 273.15) + 32
ax.plot(temp_f, color='r')