In [None]:
import os
from pydap.client import open_url
import xarray as xr
import datetime
from datetime import timedelta, date
import netCDF4 as nc
from netCDF4 import Dataset
import os
import sys
sys.path.append(os.path.abspath(".."))
from utils.degree_day_equations import *
from utils.net_cdf_functions import *
from utils.processing_functions import *
#from utils.visualization_functions import *

import pandas as pd
#from visualization_functions import *
import numpy as np
data_path =  "../data/"

In [None]:

def fetch_ncss_data( start_date, n_days=None, bbox=None, variables=['tmin', 'tmax'], point = None, base_url = "https://thredds.climate.ncsu.edu/thredds/ncss/grid/prism/daily/combo",):
    """
    Fetch data from a THREDDS server using NCSS and collect it into an xarray Dataset.
    Parameters:
    - base_url: str, base URL of the THREDDS server
    - start_date: str, start date in the format 'YYYY-MM-DD'
    - n_days: int or None, number of days to fetch data for. If None, fetch all data to the present.
    - bbox: tuple or None, bounding box in the format (lon_min, lon_max, lat_min, lat_max)
    - variables: list or None, list of variables to fetch
    
    Returns:
    - xarray.Dataset containing the requested data
    """
    # Convert start_date to datetime
    start_date = datetime.strptime(start_date, '%Y-%m-%d')
    
    # Calculate end_date
    if n_days is None:
        end_date = datetime.now() - timedelta(days=2)
    else:
        end_date = start_date + timedelta(days=n_days)
    
    # Generate list of dates
    dates = pd.date_range(start=start_date, end=end_date, freq='D')
    
    # Initialize an empty list to store NCSS URLs
    ncss_urls = []
    
    # Loop through each date and construct the NCSS URL
    for date in dates:
        date_str = date.strftime('%Y-%m-%dT00:00:00Z')
        year = date.strftime('%Y')
        url = f"{base_url}/{year}/PRISM_combo_{date.strftime('%Y%m%d')}.nc"
        
        # Construct the NCSS URL
        var_params = "&".join([f"var={var}" for var in variables])
        if bbox:
            ncss_url = (
            f"{url}?{var_params}"
            f"&north={bbox[3]}&west={bbox[0]}&east={bbox[1]}&south={bbox[2]}"
            f"&horizStride=1&time_start={date_str}&time_end={date_str}&accept=netcdf4ext&addLatLon=true"
            )
        elif point:
            ncss_url = (
                f"{url}?{var_params}"
                f"&north={point[1]}&west={point[0]}&east={point[0]}&south={point[1]}"
                f"&horizStride=1&time_start={date_str}&time_end={date_str}&accept=netcdf4ext&addLatLon=true"
            )
        else:
            raise ValueError("Either bbox or point must be provided.")
            # Append the NCSS URL to the list

        ncss_urls.append(ncss_url)
    
    # Initialize an empty list to store datasets
    datasets = [None] * len(ncss_urls)
    
    # Use ThreadPoolExecutor to fetch data in parallel
    with ThreadPoolExecutor(max_workers=10) as executor:
        future_to_index = {executor.submit(fetch_single_day_ncss, url): i for i, url in enumerate(ncss_urls)}
        for future in as_completed(future_to_index):
            index = future_to_index[future]
            try:
                ds = future.result()
                datasets[index] = ds
            except Exception as e:

                try:
                    #wait 5 seconds
                    time.sleep(3)
                    ds = future.result()
                    datasets.append(ds)
                except:
                   print(e)
                   print(f"Error fetching data for URL {ncss_urls[index]}: {e}")
    
    # Combine all datasets into a single xarray Dataset
    combined_ds = xr.concat(datasets, dim='t', join = 'override')
    
    return combined_ds

In [21]:
from siphon.catalog import TDSCatalog
from siphon.http_util import session_manager
import xarray as xr
test_dat = TDSCatalog('https://thredds.climate.ncsu.edu/thredds/catalog/prism/daily/combo/1982/catalog.html').datasets
ncss = test_dat.subset()
query = ncss.query()    
query.lonlat_point(-105, 40)
query.variables('tmax', 'tmin')
query.accept('netcdf4')
data = ncss.get_data(query)
#ds = xr.open_dataset(data)

  test_dat = TDSCatalog('https://thredds.climate.ncsu.edu/thredds/catalog/prism/daily/combo/1982/catalog.html').datasets


AttributeError: 'DatasetCollection' object has no attribute 'subset'

In [19]:
data

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4_CLASSIC data model, file format HDF5):
    Conventions: CF-1.9
    history: Written by CFPointWriter
    title: Extracted data from TDS Feature Collection null
    geospatial_lat_min: 39.9995
    geospatial_lat_max: 40.0005
    geospatial_lon_min: -105.00049999955267
    geospatial_lon_max: -104.99949999955267
    featureType: timeSeries
    DSG_representation: Timeseries of station data in the indexed ragged array representation, H.2.5
    time_coverage_start: 1982-01-01T00:00:00Z
    time_coverage_end: 1982-01-01T00:00:00Z
    dimensions(sizes): obs(1), station(1), station_description_strlen(38), station_id_strlen(38)
    variables(dimensions): float64 latitude(station), float64 longitude(station), float64 stationAltitude(station), |S1 station_id(station, station_id_strlen), |S1 station_description(station, station_description_strlen), float64 tmin(obs), float64 tmax(obs), float64 time(obs), int32 stationIndex(obs)
    groups: 