# Download NWM Retrospective Data

TEEHR provides access to gridded (forcing variables) and point-based (chrtout) NWM retrospective data for several NWM versions.
- `retrospective_points.py` supports CHRTOUT variables for NWM versions 2.0, 2.1, and 3.0
- `retrospective_grids.py` supports FORCING variables for NWM versions 2.1 and 3.0.  Mean values are computed for given polygons 
using a pre-calculated weights file (see: `teehr/utilities/generate_weights.py`)

## Retrospective points example

In [None]:
# Import the required packages.

# Need to install TEEHR to avoid this
import sys
import os
sys.path.insert(0, "../../src")

import teehr.loading.nwm.retrospective_points as nwm_retro
from pathlib import Path
from datetime import datetime

from dask.distributed import Client

## Set Variables 
Set variables to specify what to download and where to save the files.
When setting up a study, you will need to specify these variables as needed for your study.
This will likely involve generating a larger list of `location_ids` and specifying your study directory.

The CHUNK_BY argument defines how the data is accessed and saved to files. Options include: `day`, `week`, `month`, `year`, `location_id` (points only), and `None` (single chunk)

In [None]:
NWM_VERSION = "nwm20"
VARIABLE_NAME = "streamflow"
START_DATE = datetime(2000, 1, 1)
END_DATE = datetime(2000, 1, 2, 23)
LOCATION_IDS = [7086109, 7040481]
CHUNK_BY = "day"

OUTPUT_ROOT = Path(Path().home(), "temp")
OUTPUT_DIR = Path(OUTPUT_ROOT, "nwm20_retrospective")

In [None]:
# Start a dask cluster if you want to use it
n_workers = max(os.cpu_count() - 1, 1)
client = Client(n_workers=n_workers)
client

In [None]:
%%time
nwm_retro.nwm_retro_to_parquet(
    nwm_version=NWM_VERSION,
    variable_name=VARIABLE_NAME,
    start_date=START_DATE,
    end_date=END_DATE,
    location_ids=LOCATION_IDS,
    output_parquet_dir=OUTPUT_DIR,
    chunk_by=CHUNK_BY
)

## Retrospective grids example

In [None]:
import teehr.loading.nwm.retrospective_grids as nwm_retro

In [None]:
NWM_VERSION = "nwm30"
VARIABLE_NAME = "RAINRATE"
START_DATE = datetime(2000, 1, 1)
END_DATE = datetime(2000, 1, 2, 23)
CHUNK_BY = "day"
ZONAL_WEIGHTS = Path("tests", "data", "nwm22", "onehuc10_weights.parquet")  # Pre-computed pixel weights for given polygons (see: generate_weights.py)

OUTPUT_ROOT = Path(Path().home(), "temp")
OUTPUT_DIR = Path(OUTPUT_ROOT, "nwm30_retrospective")

In [None]:
nwm_retro.nwm_retro_grids_to_parquet(
    nwm_version=NWM_VERSION,
    variable_name=VARIABLE_NAME,
    zonal_weights_filepath=ZONAL_WEIGHTS,
    start_date=START_DATE,
    end_date=END_DATE,
    output_parquet_dir=OUTPUT_DIR,
    overwrite_output=True,
    chunk_by=None
)