From 9801177a088c7d1c39120611244ebca908ea6eaa Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Sun, 31 May 2026 20:50:38 -0400
Subject: [PATCH] refactor(errors): unify request errors under a
 DataRetrievalError taxonomy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

An HTTP failure used to surface as a different exception type depending on which
module made the request:

  - legacy query() (wqp, nwis, ngwmn, nldi): ValueError, or NoSitesError(Exception)
  - waterdata: RateLimited / ServiceUnavailable (RuntimeError),
    RequestTooLarge (ValueError), ChunkInterrupted (RuntimeError)
  - nadp / streamstats: bare httpx.HTTPStatusError

so no single `except` clause could catch "any dataretrieval request failure."

Add a `DataRetrievalError` base and root the existing exceptions on it, so a
caller can `except dataretrieval.DataRetrievalError` for any request failure.
Each subclass also keeps the built-in it has historically raised (BadRequestError
is a ValueError; RateLimited is a RuntimeError), so existing `except ValueError` /
`except RuntimeError` handlers keep working unchanged.

The taxonomy lives in a new, dependency-free `dataretrieval/exceptions.py` — the
single home for it (cf. requests.exceptions, botocore.exceptions) — rather than
buried in the pandas/httpx-heavy utils.py:

  - exceptions.py: DataRetrievalError + the legacy query() types (BadRequestError,
    NotFoundError, RequestTooLargeError, ServiceUnavailableError; all ValueError)
    + NoSitesError + the Water Data transport types (RateLimited, ServiceUnavailable,
    RequestTooLarge). Explicit __all__, re-exported from dataretrieval/__init__.py
    so the top-level export is intentional, not an accident of `import *`.
  - utils.py keeps the behavior: query() and a consolidated _raise_for_status()
    status->exception mapper; it imports the types it raises.
  - chunking.py keeps the chunker-specific resumable types (ChunkInterrupted and
    its subclasses, which carry a ChunkedCall resume handle) and imports the
    transport types from exceptions.

The old import paths still resolve (utils.NoSitesError,
waterdata.chunking.RateLimited, ...) via re-import, so nothing downstream breaks.

Out of scope (follow-ups): nadp/streamstats still raise httpx.HTTPStatusError;
nldi's manual non-200 ValueError isn't rooted; waterdata.utils._raise_for_non_200's
catch-all for non-retryable 4xx stays a bare RuntimeError (a deliberate
fatal/non-resumable signal the chunker relies on).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 dataretrieval/__init__.py           |   1 +
 dataretrieval/exceptions.py         | 142 ++++++++++++++++++++++++++++
 dataretrieval/utils.py              |  69 ++++++++------
 dataretrieval/waterdata/chunking.py |  71 ++------------
 dataretrieval/waterdata/utils.py    |   3 +-
 tests/utils_test.py                 |  70 +++++++++++++-
 6 files changed, 258 insertions(+), 98 deletions(-)
 create mode 100644 dataretrieval/exceptions.py

diff --git a/dataretrieval/__init__.py b/dataretrieval/__init__.py
index ca302ce0..960de546 100644
--- a/dataretrieval/__init__.py
+++ b/dataretrieval/__init__.py
@@ -5,6 +5,7 @@
 except PackageNotFoundError:
     __version__ = "version-unknown"
 
+from dataretrieval.exceptions import *
 from dataretrieval.nadp import *
 from dataretrieval.nwis import *
 from dataretrieval.samples import *
diff --git a/dataretrieval/exceptions.py b/dataretrieval/exceptions.py
new file mode 100644
index 00000000..de9d3db6
--- /dev/null
+++ b/dataretrieval/exceptions.py
@@ -0,0 +1,142 @@
+"""Exception taxonomy for ``dataretrieval``.
+
+A failed request from any service module (``nwis``, ``wqp``, ``waterdata``,
+``nldi``, ...) raises a subclass of :class:`DataRetrievalError`, so a caller can
+handle any request failure with a single ``except dataretrieval.DataRetrievalError``.
+
+This module deliberately has no third-party dependencies, so any module can
+import it without pulling in pandas/httpx.
+"""
+
+from __future__ import annotations
+
+__all__ = [
+    "DataRetrievalError",
+    "BadRequestError",
+    "NotFoundError",
+    "RequestTooLargeError",
+    "ServiceUnavailableError",
+    "NoSitesError",
+    "RateLimited",
+    "ServiceUnavailable",
+    "RequestTooLarge",
+]
+
+
+class DataRetrievalError(Exception):
+    """Base class for errors raised when a request to a USGS or EPA web
+    service fails.
+
+    Every service module (``nwis``, ``wqp``, ``waterdata``, ``nldi``, ...)
+    raises a subclass of this when a request fails, so a caller can handle any
+    request failure uniformly::
+
+        try:
+            df, md = dataretrieval.wqp.get_results(...)
+        except dataretrieval.DataRetrievalError:
+            ...
+
+    Subclasses also inherit from the built-in exception this package has
+    historically raised for the same condition (e.g. :class:`BadRequestError`
+    is also a :class:`ValueError`, :class:`RateLimited` is also a
+    :class:`RuntimeError`), so existing ``except ValueError`` / ``except
+    RuntimeError`` handlers keep working unchanged.
+    """
+
+
+# Legacy ``query()`` path: HTTP status families mapped to ValueError-compatible
+# types (the type that path has always raised).
+class BadRequestError(DataRetrievalError, ValueError):
+    """The service rejected the request parameters (HTTP 400)."""
+
+
+class NotFoundError(DataRetrievalError, ValueError):
+    """The requested resource was not found; often an empty query (HTTP 404)."""
+
+
+class RequestTooLargeError(DataRetrievalError, ValueError):
+    """The request URL was too long for the service (HTTP 414, or rejected
+    client-side before it was sent)."""
+
+
+class ServiceUnavailableError(DataRetrievalError, ValueError):
+    """The service is down or returned a server error (HTTP 5xx)."""
+
+
+class NoSitesError(DataRetrievalError):
+    """The selection criteria matched no sites/data."""
+
+    def __init__(self, url):
+        self.url = url
+
+    def __str__(self):
+        return (
+            "No sites/data found using the selection criteria specified in "
+            f"url: {self.url}"
+        )
+
+
+# Water Data API transport errors: retryable HTTP status families, surfaced as
+# RuntimeError-compatible types the chunker detects via ``isinstance`` and wraps
+# as resumable interruptions.
+class _RetryableTransportError(DataRetrievalError, RuntimeError):
+    """
+    Base for typed HTTP transport failures the chunker recognizes as
+    transient.
+
+    Raised by :func:`dataretrieval.waterdata.utils._raise_for_non_200`
+    and walked by :func:`dataretrieval.waterdata.chunking._classify_chunk_error`.
+    One subclass per recoverable HTTP status family (429 → :class:`RateLimited`,
+    5xx → :class:`ServiceUnavailable`); ``ChunkedCall`` wraps them as resumable
+    :class:`~dataretrieval.waterdata.chunking.ChunkInterrupted` subclasses.
+
+    Parameters
+    ----------
+    message : str
+        Human-readable error message.
+    retry_after : float, optional
+        Seconds to wait before retrying, parsed from the
+        ``Retry-After`` response header.
+
+    Attributes
+    ----------
+    retry_after : float or None
+        Seconds to wait before retrying, parsed from the
+        ``Retry-After`` response header. ``None`` when the header was
+        absent or unparseable.
+    """
+
+    def __init__(self, message: str, *, retry_after: float | None = None) -> None:
+        super().__init__(message)
+        self.retry_after = retry_after
+
+
+class RateLimited(_RetryableTransportError):
+    """
+    A USGS Water Data API request was rejected with HTTP 429.
+
+    Exposed as a typed exception so callers (notably the multi-value
+    chunker) can detect rate-limit failures via ``isinstance`` instead
+    of string-matching error messages.
+    """
+
+
+class ServiceUnavailable(_RetryableTransportError):
+    """
+    A USGS Water Data API request was rejected with HTTP 5xx.
+
+    Surfaced as a typed exception (parallel to :class:`RateLimited`)
+    so ``ChunkedCall`` can treat transient server failures as
+    resumable interruptions rather than fatal programmer errors.
+    """
+
+
+class RequestTooLarge(DataRetrievalError, ValueError):
+    """
+    No chunking plan fits the URL byte limit.
+
+    Raised when even the smallest reducible plan (every list axis at
+    singleton chunks and the filter at one clause per sub-request)
+    still exceeds the server's byte limit. Shrink the input lists,
+    simplify the filter, or split the call manually.
+    """
diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py
index 7bb03a69..532742f2 100644
--- a/dataretrieval/utils.py
+++ b/dataretrieval/utils.py
@@ -10,6 +10,13 @@
 
 import dataretrieval
 from dataretrieval.codes import tz
+from dataretrieval.exceptions import (
+    BadRequestError,
+    NoSitesError,
+    NotFoundError,
+    RequestTooLargeError,
+    ServiceUnavailableError,
+)
 
 HTTPX_DEFAULTS = {
     "follow_redirects": True,
@@ -270,14 +277,42 @@ def __repr__(self) -> str:
                         data_list.append(data)  # append results to list"""
 
 
-def _url_too_long_error(detail: str) -> ValueError:
-    return ValueError(
+def _url_too_long_error(detail: str) -> RequestTooLargeError:
+    return RequestTooLargeError(
         "Request URL too long. Modify your query to use fewer sites. "
         f"{detail}. Pseudo-code example of how to split your query: "
         f"\n {_URL_TOO_LONG_EXAMPLE}"
     )
 
 
+def _raise_for_status(response: httpx.Response) -> None:
+    """Raise a typed :class:`DataRetrievalError` for an unsuccessful response.
+
+    Centralizes the HTTP-status-to-exception mapping for the shared
+    :func:`query` path so every legacy service module (``wqp``, ``nwis``,
+    ``ngwmn``, ``nldi``) surfaces request failures the same way. A successful
+    response returns ``None``. The raised types are also :class:`ValueError`
+    subclasses, preserving this module's historical contract.
+    """
+    status = response.status_code
+    if status == 400:
+        raise BadRequestError(
+            f"Bad Request, check that your parameters are correct. URL: {response.url}"
+        )
+    elif status == 404:
+        raise NotFoundError(
+            "Page Not Found Error. May be the result of an empty query. "
+            f"URL: {response.url}"
+        )
+    elif status == 414:
+        raise _url_too_long_error(f"API response reason: {response.reason_phrase}")
+    elif 500 <= status < 600:
+        raise ServiceUnavailableError(
+            f"Service Unavailable: {status} {response.reason_phrase}. "
+            f"The service at {response.url} may be down or experiencing issues."
+        )
+
+
 def query(url, payload, delimiter=",", ssl_check=True):
     """Send a query.
 
@@ -321,37 +356,9 @@ def query(url, payload, delimiter=",", ssl_check=True):
     except httpx.InvalidURL as exc:
         raise _url_too_long_error(f"httpx rejected the URL client-side: {exc}") from exc
 
-    if response.status_code == 400:
-        raise ValueError(
-            f"Bad Request, check that your parameters are correct. URL: {response.url}"
-        )
-    elif response.status_code == 404:
-        raise ValueError(
-            "Page Not Found Error. May be the result of an empty query. "
-            + f"URL: {response.url}"
-        )
-    elif response.status_code == 414:
-        raise _url_too_long_error(f"API response reason: {response.reason_phrase}")
-    elif 500 <= response.status_code < 600:
-        raise ValueError(
-            f"Service Unavailable: {response.status_code} {response.reason_phrase}. "
-            + f"The service at {response.url} may be down or experiencing issues."
-        )
+    _raise_for_status(response)
 
     if response.text.startswith("No sites/data"):
         raise NoSitesError(response.url)
 
     return response
-
-
-class NoSitesError(Exception):
-    """Custom error class used when selection criteria returns no sites/data."""
-
-    def __init__(self, url):
-        self.url = url
-
-    def __str__(self):
-        return (
-            "No sites/data found using the selection criteria specified in "
-            f"url: {self.url}"
-        )
diff --git a/dataretrieval/waterdata/chunking.py b/dataretrieval/waterdata/chunking.py
index ab079070..ffb4fad9 100644
--- a/dataretrieval/waterdata/chunking.py
+++ b/dataretrieval/waterdata/chunking.py
@@ -66,6 +66,12 @@
 import pandas as pd
 from anyio.from_thread import start_blocking_portal
 
+from dataretrieval.exceptions import (
+    DataRetrievalError,
+    RateLimited,
+    RequestTooLarge,
+    ServiceUnavailable,
+)
 from dataretrieval.utils import HTTPX_DEFAULTS
 
 from . import _progress
@@ -383,70 +389,7 @@ def _passthrough_result(
     return frame, response
 
 
-class _RetryableTransportError(RuntimeError):
-    """
-    Base for typed HTTP transport failures the chunker recognizes as
-    transient.
-
-    Raised by :func:`dataretrieval.waterdata.utils._raise_for_non_200`
-    and walked by :func:`_classify_chunk_error`. One subclass per
-    recoverable HTTP status family (429 → :class:`RateLimited`,
-    5xx → :class:`ServiceUnavailable`); ``ChunkedCall`` wraps them as
-    resumable :class:`ChunkInterrupted` subclasses.
-
-    Parameters
-    ----------
-    message : str
-        Human-readable error message.
-    retry_after : float, optional
-        Seconds to wait before retrying, parsed from the
-        ``Retry-After`` response header.
-
-    Attributes
-    ----------
-    retry_after : float or None
-        Seconds to wait before retrying, parsed from the
-        ``Retry-After`` response header. ``None`` when the header was
-        absent or unparseable.
-    """
-
-    def __init__(self, message: str, *, retry_after: float | None = None) -> None:
-        super().__init__(message)
-        self.retry_after = retry_after
-
-
-class RateLimited(_RetryableTransportError):
-    """
-    A USGS Water Data API request was rejected with HTTP 429.
-
-    Exposed as a typed exception so callers (notably the multi-value
-    chunker) can detect rate-limit failures via ``isinstance`` instead
-    of string-matching error messages.
-    """
-
-
-class ServiceUnavailable(_RetryableTransportError):
-    """
-    A USGS Water Data API request was rejected with HTTP 5xx.
-
-    Surfaced as a typed exception (parallel to :class:`RateLimited`)
-    so ``ChunkedCall`` can treat transient server failures as
-    resumable interruptions rather than fatal programmer errors.
-    """
-
-
-class RequestTooLarge(ValueError):
-    """
-    No chunking plan fits the URL byte limit.
-
-    Raised when even the smallest reducible plan (every list axis at
-    singleton chunks and the filter at one clause per sub-request)
-    still exceeds the server's byte limit. Shrink the input lists,
-    simplify the filter, or split the call manually.
-    """
-
-
-class ChunkInterrupted(RuntimeError):
+class ChunkInterrupted(DataRetrievalError, RuntimeError):
     """
     Base class for mid-stream chunk failures whose completed work is
     preserved and resumable.
diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py
index dd622efd..35491c88 100644
--- a/dataretrieval/waterdata/utils.py
+++ b/dataretrieval/waterdata/utils.py
@@ -26,12 +26,11 @@
 from anyio.from_thread import start_blocking_portal
 
 from dataretrieval import __version__
+from dataretrieval.exceptions import RateLimited, ServiceUnavailable
 from dataretrieval.utils import HTTPX_DEFAULTS, BaseMetadata
 from dataretrieval.waterdata import _progress, chunking
 from dataretrieval.waterdata.chunking import (
     _QUOTA_HEADER,
-    RateLimited,
-    ServiceUnavailable,
     _safe_elapsed,
     get_active_client,
 )
diff --git a/tests/utils_test.py b/tests/utils_test.py
index c25e1084..167acc35 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -5,7 +5,7 @@
 import pandas as pd
 import pytest
 
-from dataretrieval import nwis, utils
+from dataretrieval import exceptions, nwis, utils
 
 
 class Test_query:
@@ -42,6 +42,74 @@ def test_header(self):
         assert "user-agent" in response.request.headers
 
 
+class Test_error_taxonomy:
+    """The unified request-error hierarchy.
+
+    Every module's request failures are catchable as ``DataRetrievalError``,
+    while remaining backward-compatible with the built-in type each path
+    historically raised (``ValueError`` for the legacy ``query`` path,
+    ``RuntimeError`` for the waterdata retryable types).
+    """
+
+    @pytest.mark.parametrize(
+        "status, exc_name, match",
+        [
+            (400, "BadRequestError", "Bad Request"),
+            (404, "NotFoundError", "Page Not Found"),
+            (414, "RequestTooLargeError", "Request URL too long"),
+            (503, "ServiceUnavailableError", "Service Unavailable: 503"),
+        ],
+    )
+    def test_query_maps_status_to_typed_error(
+        self, httpx_mock, status, exc_name, match
+    ):
+        """``query`` maps each HTTP status family to a typed error that is both
+        a ``DataRetrievalError`` (new, unified) and a ``ValueError`` (the type
+        this path has always raised), with the message preserved."""
+        exc_cls = getattr(exceptions, exc_name)
+        url = "https://example.invalid/x"
+        httpx_mock.add_response(method="GET", url=f"{url}?a=1", status_code=status)
+        with pytest.raises(exc_cls, match=match) as excinfo:
+            utils.query(url, {"a": "1"})
+        assert isinstance(excinfo.value, exceptions.DataRetrievalError)
+        assert isinstance(excinfo.value, ValueError)  # backward compatibility
+
+    def test_query_failure_catchable_as_base(self, httpx_mock):
+        """A bare ``except DataRetrievalError`` catches a legacy query failure."""
+        url = "https://example.invalid/y"
+        httpx_mock.add_response(method="GET", url=f"{url}?a=1", status_code=400)
+        with pytest.raises(exceptions.DataRetrievalError):
+            utils.query(url, {"a": "1"})
+
+    def test_no_sites_error_is_data_retrieval_error(self):
+        """``NoSitesError`` joins the root (was a bare ``Exception``)."""
+        assert issubclass(exceptions.NoSitesError, exceptions.DataRetrievalError)
+        assert not issubclass(exceptions.NoSitesError, ValueError)  # unchanged
+
+    def test_waterdata_exceptions_share_the_root(self):
+        """waterdata's typed exceptions are ``DataRetrievalError`` too, so one
+        ``except`` clause spans the legacy and waterdata subsystems — while
+        keeping their historical ``RuntimeError``/``ValueError`` bases."""
+        from dataretrieval.waterdata.chunking import (
+            ChunkInterrupted,
+            RateLimited,
+            RequestTooLarge,
+            ServiceUnavailable,
+        )
+
+        for cls in (RateLimited, ServiceUnavailable, RequestTooLarge, ChunkInterrupted):
+            assert issubclass(cls, exceptions.DataRetrievalError)
+        assert issubclass(RateLimited, RuntimeError)
+        assert issubclass(ServiceUnavailable, RuntimeError)
+        assert issubclass(RequestTooLarge, ValueError)
+
+    def test_base_exported_at_top_level(self):
+        """Users can write ``except dataretrieval.DataRetrievalError``."""
+        import dataretrieval
+
+        assert dataretrieval.DataRetrievalError is exceptions.DataRetrievalError
+
+
 class Test_BaseMetadata:
     """Tests of BaseMetadata"""