In [2]:
import xarray as xr
import fsspec
import numpy as np
import s3fs
import hvplot.pandas
import hvplot.xarray

#### Start a Dask cluster
###### This is not required, but speeds up computations. Here we start a local cluster that just uses the cores available on the computer running the notebook server, but there are many other ways to set up Dask clusters that can scale larger than this.

In [None]:
import dask
dask.config.set(temporary_directory='/wrds/dask-worker-space') # if you want to set a default worker directory

In [None]:
from dask.distributed import Client
client = Client()
client

### Use this for single year
###### Make sure to change the year to the one specific

In [3]:
year = '1979'

In [4]:
url = f's3://noaa-nws-aorc-v1-1-1km/{year}.zarr/'

In [5]:
%%time
ds_single = xr.open_zarr(fsspec.get_mapper(url, anon=True), consolidated=True)

CPU times: user 1.09 s, sys: 1.64 s, total: 2.73 s
Wall time: 1.94 s


#### Var to use:
###### APCP_surface, DLWRF_surface, DSWRF_surface, PRES_surface, SPFH_2maboveground, TMP_2maboveground, UGRD_10maboveground, VGRD_10maboveground

In [6]:
var='APCP_surface'

###### Info on the varable selected

In [8]:
ds_single[var]

Unnamed: 0,Array,Chunk
Bytes,1.03 TiB,18.00 MiB
Shape,"(8016, 4201, 8401)","(144, 128, 256)"
Dask graph,60984 chunks in 2 graph layers,60984 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.03 TiB 18.00 MiB Shape (8016, 4201, 8401) (144, 128, 256) Dask graph 60984 chunks in 2 graph layers Data type float32 numpy.ndarray",8401  4201  8016,

Unnamed: 0,Array,Chunk
Bytes,1.03 TiB,18.00 MiB
Shape,"(8016, 4201, 8401)","(144, 128, 256)"
Dask graph,60984 chunks in 2 graph layers,60984 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [9]:
print(f'Variable size: {ds_single[var].nbytes/1e12:.1f} TB')

Variable size: 1.1 TB


###### Grabbing a location based on lat and lon

In [10]:
%%time
da = ds_single[var].sel(longitude=-115.18, latitude=46.65, method='nearest').load()

CPU times: user 762 ms, sys: 217 ms, total: 978 ms
Wall time: 2.06 s


###### Printing a map

In [11]:
%%time
#APCP of cords
da.hvplot(x='time', grid=True)

CPU times: user 15.2 ms, sys: 438 µs, total: 15.6 ms
Wall time: 15.3 ms


# Use this for multiple years

In [12]:
url_multi_year = 's3://noaa-nws-aorc-v1-1-1km'

###### Adjust the years based on what you need - first number is the beginning year and second is the ending year + 1

In [13]:
dataset_years = list(range(1979,2023))


###### Mapping can be used to sort the years into a list

In [14]:
s3_out = s3fs.S3FileSystem(anon=False)
fileset = [s3fs.S3Map(
            root=f"s3://{url_multi_year}/{dataset_year}.zarr", s3=s3_out, check=False
        ) for dataset_year in dataset_years]


###### This can be used instead of the mapping above - it creates a simple string of the same years in a pythion list

In [None]:
#fileset = [f"{url}/{dataset_year}.zarr" for dataset_year in dataset_years]

In [15]:
%%time
ds_multi_year = xr.open_mfdataset(fileset, engine='zarr', consolidated=True )

CPU times: user 43.2 s, sys: 4.47 s, total: 47.7 s
Wall time: 1min 44s


#### Var to use:
###### APCP_surface, DLWRF_surface, DSWRF_surface, PRES_surface, SPFH_2maboveground, TMP_2maboveground, UGRD_10maboveground, VGRD_10maboveground

In [16]:
var='APCP_surface'

###### Info on the varable selected

In [17]:
ds_multi_year[var]

Unnamed: 0,Array,Chunk
Bytes,49.43 TiB,18.00 MiB
Shape,"(384960, 4201, 8401)","(144, 128, 256)"
Dask graph,2917431 chunks in 89 graph layers,2917431 chunks in 89 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 49.43 TiB 18.00 MiB Shape (384960, 4201, 8401) (144, 128, 256) Dask graph 2917431 chunks in 89 graph layers Data type float32 numpy.ndarray",8401  4201  384960,

Unnamed: 0,Array,Chunk
Bytes,49.43 TiB,18.00 MiB
Shape,"(384960, 4201, 8401)","(144, 128, 256)"
Dask graph,2917431 chunks in 89 graph layers,2917431 chunks in 89 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [18]:
print(f'Variable size: {ds_multi_year[var].nbytes/1e12:.1f} TB')

Variable size: 54.3 TB


###### Grabbing a location based on lat and lon for all the years

In [19]:
%%time
da_multi_year = ds_multi_year[var].sel(longitude=-115.18, latitude=46.65, method='nearest').load()

CPU times: user 38.6 s, sys: 3.8 s, total: 42.4 s
Wall time: 1min 34s


###### Printing a map of all the years selected

In [20]:
%%time
#APCP of cords
da_multi_year.hvplot(x='time', grid=True)

CPU times: user 30 ms, sys: 2.14 ms, total: 32.2 ms
Wall time: 30.3 ms
