Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dataretrieval/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
except PackageNotFoundError:
__version__ = "version-unknown"

from dataretrieval.exceptions import *
from dataretrieval.nadp import *
from dataretrieval.nwis import *
from dataretrieval.samples import *
Expand Down
142 changes: 142 additions & 0 deletions dataretrieval/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
"""Exception taxonomy for ``dataretrieval``.

A failed request from any service module (``nwis``, ``wqp``, ``waterdata``,
``nldi``, ...) raises a subclass of :class:`DataRetrievalError`, so a caller can
handle any request failure with a single ``except dataretrieval.DataRetrievalError``.

This module deliberately has no third-party dependencies, so any module can
import it without pulling in pandas/httpx.
"""

from __future__ import annotations

__all__ = [
"DataRetrievalError",
"BadRequestError",
"NotFoundError",
"RequestTooLargeError",
"ServiceUnavailableError",
"NoSitesError",
"RateLimited",
"ServiceUnavailable",
"RequestTooLarge",
]


class DataRetrievalError(Exception):
"""Base class for errors raised when a request to a USGS or EPA web
service fails.

Every service module (``nwis``, ``wqp``, ``waterdata``, ``nldi``, ...)
raises a subclass of this when a request fails, so a caller can handle any
request failure uniformly::

try:
df, md = dataretrieval.wqp.get_results(...)
except dataretrieval.DataRetrievalError:
...

Subclasses also inherit from the built-in exception this package has
historically raised for the same condition (e.g. :class:`BadRequestError`
is also a :class:`ValueError`, :class:`RateLimited` is also a
:class:`RuntimeError`), so existing ``except ValueError`` / ``except
RuntimeError`` handlers keep working unchanged.
"""


# Legacy ``query()`` path: HTTP status families mapped to ValueError-compatible
# types (the type that path has always raised).
class BadRequestError(DataRetrievalError, ValueError):
"""The service rejected the request parameters (HTTP 400)."""


class NotFoundError(DataRetrievalError, ValueError):
"""The requested resource was not found; often an empty query (HTTP 404)."""


class RequestTooLargeError(DataRetrievalError, ValueError):
"""The request URL was too long for the service (HTTP 414, or rejected
client-side before it was sent)."""


class ServiceUnavailableError(DataRetrievalError, ValueError):
"""The service is down or returned a server error (HTTP 5xx)."""


class NoSitesError(DataRetrievalError):
"""The selection criteria matched no sites/data."""

def __init__(self, url):
self.url = url

def __str__(self):
return (
"No sites/data found using the selection criteria specified in "
f"url: {self.url}"
)


# Water Data API transport errors: retryable HTTP status families, surfaced as
# RuntimeError-compatible types the chunker detects via ``isinstance`` and wraps
# as resumable interruptions.
class _RetryableTransportError(DataRetrievalError, RuntimeError):
"""
Base for typed HTTP transport failures the chunker recognizes as
transient.

Raised by :func:`dataretrieval.waterdata.utils._raise_for_non_200`
and walked by :func:`dataretrieval.waterdata.chunking._classify_chunk_error`.
One subclass per recoverable HTTP status family (429 → :class:`RateLimited`,
5xx → :class:`ServiceUnavailable`); ``ChunkedCall`` wraps them as resumable
:class:`~dataretrieval.waterdata.chunking.ChunkInterrupted` subclasses.

Parameters
----------
message : str
Human-readable error message.
retry_after : float, optional
Seconds to wait before retrying, parsed from the
``Retry-After`` response header.

Attributes
----------
retry_after : float or None
Seconds to wait before retrying, parsed from the
``Retry-After`` response header. ``None`` when the header was
absent or unparseable.
"""

def __init__(self, message: str, *, retry_after: float | None = None) -> None:
super().__init__(message)
self.retry_after = retry_after


class RateLimited(_RetryableTransportError):
"""
A USGS Water Data API request was rejected with HTTP 429.

Exposed as a typed exception so callers (notably the multi-value
chunker) can detect rate-limit failures via ``isinstance`` instead
of string-matching error messages.
"""


class ServiceUnavailable(_RetryableTransportError):
"""
A USGS Water Data API request was rejected with HTTP 5xx.

Surfaced as a typed exception (parallel to :class:`RateLimited`)
so ``ChunkedCall`` can treat transient server failures as
resumable interruptions rather than fatal programmer errors.
"""


class RequestTooLarge(DataRetrievalError, ValueError):
"""
No chunking plan fits the URL byte limit.

Raised when even the smallest reducible plan (every list axis at
singleton chunks and the filter at one clause per sub-request)
still exceeds the server's byte limit. Shrink the input lists,
simplify the filter, or split the call manually.
"""
69 changes: 38 additions & 31 deletions dataretrieval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@

import dataretrieval
from dataretrieval.codes import tz
from dataretrieval.exceptions import (
BadRequestError,
NoSitesError,
NotFoundError,
RequestTooLargeError,
ServiceUnavailableError,
)

HTTPX_DEFAULTS = {
"follow_redirects": True,
Expand Down Expand Up @@ -270,14 +277,42 @@ def __repr__(self) -> str:
data_list.append(data) # append results to list"""


def _url_too_long_error(detail: str) -> ValueError:
return ValueError(
def _url_too_long_error(detail: str) -> RequestTooLargeError:
return RequestTooLargeError(
"Request URL too long. Modify your query to use fewer sites. "
f"{detail}. Pseudo-code example of how to split your query: "
f"\n {_URL_TOO_LONG_EXAMPLE}"
)


def _raise_for_status(response: httpx.Response) -> None:
"""Raise a typed :class:`DataRetrievalError` for an unsuccessful response.

Centralizes the HTTP-status-to-exception mapping for the shared
:func:`query` path so every legacy service module (``wqp``, ``nwis``,
``ngwmn``, ``nldi``) surfaces request failures the same way. A successful
response returns ``None``. The raised types are also :class:`ValueError`
subclasses, preserving this module's historical contract.
"""
status = response.status_code
if status == 400:
raise BadRequestError(
f"Bad Request, check that your parameters are correct. URL: {response.url}"
)
elif status == 404:
raise NotFoundError(
"Page Not Found Error. May be the result of an empty query. "
f"URL: {response.url}"
)
elif status == 414:
raise _url_too_long_error(f"API response reason: {response.reason_phrase}")
elif 500 <= status < 600:
raise ServiceUnavailableError(
f"Service Unavailable: {status} {response.reason_phrase}. "
f"The service at {response.url} may be down or experiencing issues."
)


def query(url, payload, delimiter=",", ssl_check=True):
"""Send a query.

Expand Down Expand Up @@ -321,37 +356,9 @@ def query(url, payload, delimiter=",", ssl_check=True):
except httpx.InvalidURL as exc:
raise _url_too_long_error(f"httpx rejected the URL client-side: {exc}") from exc

if response.status_code == 400:
raise ValueError(
f"Bad Request, check that your parameters are correct. URL: {response.url}"
)
elif response.status_code == 404:
raise ValueError(
"Page Not Found Error. May be the result of an empty query. "
+ f"URL: {response.url}"
)
elif response.status_code == 414:
raise _url_too_long_error(f"API response reason: {response.reason_phrase}")
elif 500 <= response.status_code < 600:
raise ValueError(
f"Service Unavailable: {response.status_code} {response.reason_phrase}. "
+ f"The service at {response.url} may be down or experiencing issues."
)
_raise_for_status(response)

if response.text.startswith("No sites/data"):
raise NoSitesError(response.url)

return response


class NoSitesError(Exception):
"""Custom error class used when selection criteria returns no sites/data."""

def __init__(self, url):
self.url = url

def __str__(self):
return (
"No sites/data found using the selection criteria specified in "
f"url: {self.url}"
)
71 changes: 7 additions & 64 deletions dataretrieval/waterdata/chunking.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@
import pandas as pd
from anyio.from_thread import start_blocking_portal

from dataretrieval.exceptions import (
DataRetrievalError,
RateLimited,
RequestTooLarge,
ServiceUnavailable,
)
from dataretrieval.utils import HTTPX_DEFAULTS

from . import _progress
Expand Down Expand Up @@ -383,70 +389,7 @@ def _passthrough_result(
return frame, response


class _RetryableTransportError(RuntimeError):
"""
Base for typed HTTP transport failures the chunker recognizes as
transient.

Raised by :func:`dataretrieval.waterdata.utils._raise_for_non_200`
and walked by :func:`_classify_chunk_error`. One subclass per
recoverable HTTP status family (429 → :class:`RateLimited`,
5xx → :class:`ServiceUnavailable`); ``ChunkedCall`` wraps them as
resumable :class:`ChunkInterrupted` subclasses.

Parameters
----------
message : str
Human-readable error message.
retry_after : float, optional
Seconds to wait before retrying, parsed from the
``Retry-After`` response header.

Attributes
----------
retry_after : float or None
Seconds to wait before retrying, parsed from the
``Retry-After`` response header. ``None`` when the header was
absent or unparseable.
"""

def __init__(self, message: str, *, retry_after: float | None = None) -> None:
super().__init__(message)
self.retry_after = retry_after


class RateLimited(_RetryableTransportError):
"""
A USGS Water Data API request was rejected with HTTP 429.

Exposed as a typed exception so callers (notably the multi-value
chunker) can detect rate-limit failures via ``isinstance`` instead
of string-matching error messages.
"""


class ServiceUnavailable(_RetryableTransportError):
"""
A USGS Water Data API request was rejected with HTTP 5xx.

Surfaced as a typed exception (parallel to :class:`RateLimited`)
so ``ChunkedCall`` can treat transient server failures as
resumable interruptions rather than fatal programmer errors.
"""


class RequestTooLarge(ValueError):
"""
No chunking plan fits the URL byte limit.

Raised when even the smallest reducible plan (every list axis at
singleton chunks and the filter at one clause per sub-request)
still exceeds the server's byte limit. Shrink the input lists,
simplify the filter, or split the call manually.
"""


class ChunkInterrupted(RuntimeError):
class ChunkInterrupted(DataRetrievalError, RuntimeError):
"""
Base class for mid-stream chunk failures whose completed work is
preserved and resumable.
Expand Down
3 changes: 1 addition & 2 deletions dataretrieval/waterdata/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,11 @@
from anyio.from_thread import start_blocking_portal

from dataretrieval import __version__
from dataretrieval.exceptions import RateLimited, ServiceUnavailable
from dataretrieval.utils import HTTPX_DEFAULTS, BaseMetadata
from dataretrieval.waterdata import _progress, chunking
from dataretrieval.waterdata.chunking import (
_QUOTA_HEADER,
RateLimited,
ServiceUnavailable,
_safe_elapsed,
get_active_client,
)
Expand Down
Loading