# Data Pipeline 2026
This Jupyter notebook contains the pipelines to run in 2026 to fetch 2025 data and update the Water Rights Visualizer tool


In [1]:
%reload_ext autoreload
%autoreload 2

import sys
import os
from os.path import join
sys.path.append("../")

from pipelines.gee.gee_aws_pipeline import GEEAWSDataPipeline
from pipelines.prism.prism_aws_pipeline import PrismAWSDataPipeline
from pipelines.gridmet.gridmet_pipeline import GridMETPipeline

# Specify the path to the Google Drive client secret and key files
# To generate, follow these instructions: https://docs.iterative.ai/PyDrive2/quickstart/
current_dir = os.path.dirname(os.getcwd())
secret_path = join(current_dir, "pipelines/client_secret.json")
key_path = join(current_dir, "pipelines/google_drive_key.txt")

  import pkg_resources


In [None]:
# The Google Earth Engine project ID is shown in Google Cloud Platform (https://console.cloud.google.com/) when selecting the project name.
# This project must have the Google Earth Engine API enabled to work correctly
GEE_PROJECT_ID = "zippy-pad-465521-e2"

In [None]:
import ee

ee.Authenticate(auth_mode="notebook")
ee.Initialize(project=GEE_PROJECT_ID)

In [None]:
# Initialize the pipeline, configured for the OpenET ensemble product (ET, ET_MIN, ET_MAX)
pipeline = GEEAWSDataPipeline(
    bands=["et_ensemble_mad", "et_ensemble_mad_min", "et_ensemble_mad_max"],
    product="OpenET/ENSEMBLE/CONUS/GRIDMET/MONTHLY/v2_0",
    product_prefix="OPENET_ENSEMBLE",
    aws_bucket="ose-dev-inputs",
    aws_region="us-west-2",
    aws_profile="ose-nmw",
    gdrive_folder="OPENET_EXPORTS",
    temp_local_folder="temp_data",
    project=GEE_PROJECT_ID,
    gdrive_client_secrets_filename=secret_path,
    gdrive_key_filename=key_path,
)

In [None]:
# Initiate export of ET, ET_MIN, and ET_MAX tiles for the full year of 2025
pipeline.generate_tiles_for_date_range("2025-01-01", "2026-01-01")

In [None]:
# NOTE: Only run this after all tiles have been generated. This may take a couple hours to days to complete.
# You can check the status of the jobs in Google Cloud Console here: 
# https://console.cloud.google.com/earth-engine/tasks?project=zippy-pad-465521-e2
# While waiting, you can run the next non-Google Earth Engine pipelines
pipeline.transfer_gdrive_to_aws(delete_from_local=True)

In [None]:
# This pipeline directly downloads the PRISM data from the PRISM website (not Google Earth Engine)
prism_pipeline = PrismAWSDataPipeline(
    aws_bucket="ose-dev-inputs",
    aws_region="us-west-2",
    aws_profile="ose-nmw",
    raw_dir="prism_data",
    monthly_dir="prism_data_monthly",
    output_dir="prism_tiles",
    allow_provisional=True,
)

In [None]:
# Fetch, format, and process the PRISM data for 2025 and store locally (should only take a few minutes)
prism_pipeline.process_year(2025, upload=False)

In [None]:
# Upload the local PRISM data to AWS (might take a few hours depending on network speed)
prism_pipeline.upload_local_folder_to_aws()

In [None]:
# This pipeline directly downloads the GridMET data from the GridMET website (not Google Earth Engine)
gridmet_climate_engine_pipeline = GridMETPipeline(
    bands=["pet"],
    aws_profile="ose-nmw",
    aws_bucket="ose-dev-inputs",
    aws_region="us-west-2",
)
gridmet_climate_engine_pipeline.fetch_year(2025)
gridmet_climate_engine_pipeline.upload_to_aws(delete_on_success=True)

In [None]:
# After all data has been uploaded to AWS (including Google Earth Engine ET data), run this to update the tool's manifest
tracker = S3ManifestTracker(
    config_path="../variables.yaml",
    bucket_name="ose-dev-inputs",
    output_path="S3_filenames_dynamic.csv",
    profile_name="ose-nmw",
)

# This will replace the current manifest with the new data
tracker.update_manifest(output_path="../water_rights_visualizer/S3_filenames.csv")

# Final Steps:
After all above cells have been run and the manifest has been updated, the next step is to update the tool to use the new data.

1. Go to variables.yaml and update the end dates of the following data product IDs to "2026-01-01":
    - openet_ensemble_et
    - openet_ensemble_et_min
    - openet_ensemble_et_max
    - idaho_epscor_gridmet_eto
    - oregon_state_prism_ppt
2. Go to client/src/utils/constants.tsx and update the DATA_END_YEAR variable to 2025
3. Update the et_tool_data_docs.pdf to reflect the new data end date (and any new data caveats). This can be done by opening `water_rights_visualizer/et_tool_data_docs.docx` in Microsoft Word, updating, and saving as a PDF in the same directory.
4. Commit changes to git, merge to main, and deploy to production.
