# HRRR Model Downloader Sample Notebook

This notebook provides some examples of downloading, processing, and working with hrrr data for visualizations and analysis.

### Imports and Setup

In [None]:
# NOTE: ensure `wgrib2` executable detectable
import os 
import shutil
os.environ['PATH'] += f':../.pixi/envs/data-download/bin'
print(shutil.which('wgrib2'))
os.environ['PATH']

In [None]:
from herbie import Herbie, FastHerbie, wgrib2
import geopandas as gpd
import pandas as pd
import xarray as xr
import rioxarray as rxr
import geojson
import shapely
import dask.array as da
import glob
import xarray as xr
import numpy as np
import cfgrib

### Helper Functions

In [None]:
# Takes a dict of vars you want to extract, and the corresponding granularities
# Uses XARRAY accessors to limit them
def download_parameters(parameters: dict, fh: FastHerbie) -> None:
    fields = [f":{param}:{level}" for param, level in parameters.items()]
    param_regex = fr"^(?:{'|'.join(fields)})"
    print("Search String: " + param_regex)
    return fh.download(param_regex)
    

In [None]:
# Parse GeoJson polygon - see skagit_boundaries.json for example
def parseGeoJson(geojson_path: str) -> tuple[float, float, float, float]:
    mask = gpd.read_file(geojson_path)
    minLon, minLat, maxLon, maxLat = mask.total_bounds
    return (minLon, maxLon, minLat, maxLat)

def limitGeographicRange(bounds: tuple[float, float, float, float], subsetFiles: list) -> list:
    return [wgrib2.region(f, bounds, name='skagit-basin') for f in subsetFiles]
            

### Parameter Selection and download files

In [None]:
%%time

model = 'hrrr' # Use HRRR Model
product = 'sfc' # Use 2D surface level fields; 3-km resolution; 1 hr intervals
date_range = pd.date_range(
    start="2020-03-01 00:00",
    end="2020-03-02 00:00",
    freq="1h"
)

# Parameter Names scoped from here: 
# https://www.nco.ncep.noaa.gov/pmb/products/hrrr/hrrr.t00z.wrfsfcf00.grib2.shtml
# See https://github.com/blaylockbk/Herbie/discussions/10
# for modifications to accumulated precip to get hourly amount
parameters = {
    'TMP' : 'surface', # Temperature - Hourly
    'RH' : '2 m above ground', # Relative Humidity - Hourly
    'WIND': '10 m above ground', # Wind Speed - 0-0 day max f 
    'APCP': 'surface:0-1 hour acc fcst', # Total Precip - 0-0 day acc f 
    'DSWRF': 'surface', # Downward Short-Wave Radiation Flux - Hourly
    'DLWRF': 'surface' # Downward Long-Wave Rad. Flux - Hourly
}

# Download files
fh = FastHerbie(date_range, model=model, product=product, fxx=range(0,2), save_dir="../data/weather_data/")
fh_files = download_parameters(parameters, fh)

### Create Inventory files to appease wgrib2, and then geolimit them based on boundaries file

In [None]:
# NOTE: seems to be automatically created if doesn't already exist

# for f in fh_files:
#     wgrib2.create_inventory_file(f)

In [None]:
%%time 

aoi_path = '../data/GIS/SkagitBoundary.json'
mask = gpd.read_file(aoi_path)
bounds = parseGeoJson(aoi_path)
geo_limited_files = limitGeographicRange(bounds, fh_files)

In [None]:
geo_limited_files[:3]

### Load files, combine into xarray, and save as zarr

In [None]:
datasets = []
dropVars = ["surface", "heightAboveGround", "valid_time", "step"]
# if f001, grab just the accumlated precip by dropping the other forecast variables
dropVarsStep = dropVars + ["t", "r2", "si10", "sdswrf", "sdlwrf"]
for f in geo_limited_files:
    # NOTE: To silence this warning, set decode_timedelta to True, False, or a 'CFTimedeltaCoder' instance.
    unMergedDatasets = cfgrib.open_datasets(f, indexpath='', decode_timedelta=False)
    mergedDataset = xr.merge([ds.drop_vars(dropVarsStep, errors="ignore") if ds.step.values == np.timedelta64(1, 'h') else ds.drop_vars(dropVars, errors="ignore") for ds in unMergedDatasets])
    mergedDataset.load()
    datasets.append(mergedDataset)

other = [ds for ds in datasets if 'tp' not in ds.variables]
tp = [ds for ds in datasets if 'tp' in ds.variables]

tp_ds = xr.concat(tp, dim='time')
other = xr.concat(other, dim='time')
# NOTE: MergeError: conflicting values for variable 'max_10si' on objects to be combined
comb = xr.combine_by_coords([tp_ds, other], compat='override')
# Set Longitude to be in correct space
comb['longitude'] = comb.longitude-360

In [None]:
skagit_mask = shapely.contains_xy(mask.geometry[0], comb.longitude.values, comb.latitude.values)
skagit_mask

masked_data_set = comb.where(skagit_mask)
masked_data_set

In [None]:
zarr_path = '../data/weather_data/hrrr.zarr'
masked_data_set.to_zarr(zarr_path,  mode='w')