In [None]:
from dask.distributed import Client, progress
import dask
import pandas as pd
import fsspec 
import numpy as np
import xarray as xr

In [None]:
# Setting up clients for DASK and Amazon webservices

try:
    from dask.distributed import get_client
    get_client().close()
except Exception:
    pass

client = Client()  # set up local cluster on your laptop
client

fs = fsspec.filesystem('s3', anon=True)

In [None]:
# Setting up the base url to retrieve precipitation files. There are two values of precipitation
# in this dataset, large-scale precipitation (lsp) and convective precipitation (cp). To find total precipitation
# you must combine the two. There are also multiple files per month, so I set up the framework for each type
# of precip per month, and then use fs.glob to find all the relevant files.

base_url = 's3://nsf-ncar-era5/e5.oper.fc.sfc.accumu/'
base_url2 = '/e5.oper.fc.sfc.accumu.128_'
base_url3 = '_*.nc'

lsp_tag = '142'
cp_tag = '143'

center_coords = [47.5, -122.3] # latitude/longitude for Seattle, Wa

lat_bounds = [center_coords[0] + 5, center_coords[0] - 5]
lon_bounds = [center_coords[1]- 5 + 360, center_coords[1] + 5 + 360]

# period of interest
pr = pd.date_range(start='1990-01',end='2020-12', freq='MS')

lsp_file_list=[]
cp_file_list=[]
for dt in pr:
    
    year_month = dt.strftime('%Y%m')

    # build glob file names for cp and lsp
    
    # lsp
    single_file=(base_url+year_month+base_url2+lsp_tag+base_url3)
    lsp_file_list.append(single_file)

    # cp
    single_file=(base_url+year_month+base_url2+cp_tag+base_url3)
    cp_file_list.append(single_file)

In [None]:
# using fs.glob to construct the complete file list.
lsp_final_list = []
cp_final_list = []

for file in lsp_file_list:
    lsp_final_list = lsp_final_list + fs.glob(file)

for file in cp_file_list:
    cp_final_list = cp_final_list + fs.glob(file)

lsp_final_list = ['s3://' + item for item in lsp_final_list]
cp_final_list = ['s3://' + item for item in cp_final_list]

In [None]:
first = True
for link in lsp_final_list:
  if first:
    ds = xr.open_dataset(
    #    cached,
        link,
        engine="h5netcdf",         # or "h5netcdf"
        chunks={"time": 12},      # modest time chunks
        backend_kwargs={"storage_options": {"anon": True }},
    )
    ds = ds.sel(latitude = slice(lat_bounds[0], lat_bounds[1]), longitude = slice(lon_bounds[0], lon_bounds[1]))
    first = False
  else:
    append_ds = xr.open_dataset(
    #    cached,
        link,
        engine="h5netcdf",         # or "h5netcdf"
        chunks={"time": 12},      # modest time chunks
        backend_kwargs={"storage_options": {"anon": True }},
    )

    append_ds = append_ds.sel(latitude = slice(lat_bounds[0], lat_bounds[1]), longitude = slice(lon_bounds[0], lon_bounds[1]))

    ds = xr.concat([ds, append_ds], dim = 'forecast_initial_time')


In [None]:

ds = xr.open_dataset(
  #    cached,
      lsp_final_list[0],
      engine="h5netcdf",         # or "h5netcdf"
      chunks={"time": 12},      # modest time chunks
      parallel=False,           # <- important: don't delay opens
      backend_kwargs={"storage_options": {"anon": True }},
)

In [None]:
ds.sel(latitude = slice(lat_bounds[0], lat_bounds[1]), longitude = slice(lon_bounds[0], lon_bounds[1]))