From 283230cf044af8b064b81546bfedb79256585cb7 Mon Sep 17 00:00:00 2001 From: Kan Fu Date: Mon, 3 Nov 2025 16:39:56 -0800 Subject: [PATCH 1/2] feat: add data availability api, test case and documentation --- .../Discover_Data_Availability.md | 37 ++++++++++ doc/source/Code_Examples/index.md | 1 + src/onc/modules/_OncDiscovery.py | 4 ++ src/onc/modules/_OncService.py | 1 + src/onc/onc.py | 72 +++++++++++++++++++ .../test_data_availability.py | 50 +++++++++++++ 6 files changed, 165 insertions(+) create mode 100644 doc/source/Code_Examples/Discover_Data_Availability.md create mode 100644 tests/discover_data_availability/test_data_availability.py diff --git a/doc/source/Code_Examples/Discover_Data_Availability.md b/doc/source/Code_Examples/Discover_Data_Availability.md new file mode 100644 index 0000000..bef6df2 --- /dev/null +++ b/doc/source/Code_Examples/Discover_Data_Availability.md @@ -0,0 +1,37 @@ +# Discover Data Availability for Data Products + +```python +# Get the token from your Oceans 3.0 profile page +from onc import ONC + +onc = ONC("YOUR_TOKEN") +``` + +## [/dataAvailability/dataproducts](https://data.oceannetworks.ca/OpenAPI#get-/dataAvailability/dataproducts) + +### Get data availability from a specific location and a device category + +Return which data products are available with _deviceCategoryCode_ "**BPR**" at location Barkley Upper Slope ( +_locationCode_:"**NCBC**"). + +```python + +params = { + "deviceCategoryCode": "BPR", + "locationCode": "NCBC", +} +onc.getDataAvailability(params) +``` + +### Get data availability from a specific device with a specific extension + +Return which data products are available with _deviceCode_ "**BPR_BC**" and extension "**raw**" + +```python + +params = { + "deviceCode": "BPR_BC", + "extension": "raw", +} +onc.getDataAvailability(params) +``` \ No newline at end of file diff --git a/doc/source/Code_Examples/index.md b/doc/source/Code_Examples/index.md index 5deceb0..bdc8917 100644 --- a/doc/source/Code_Examples/index.md +++ b/doc/source/Code_Examples/index.md @@ -12,6 +12,7 @@ Discover_Deployments.ipynb Discover_Device_Categories.ipynb Discover_Properties.ipynb Discover_Data_Products.ipynb +Discover_Data_Availability.ipynb Download_Data_Products.ipynb Request_Real_Time_Data.ipynb Download_Archived_Files.ipynb diff --git a/src/onc/modules/_OncDiscovery.py b/src/onc/modules/_OncDiscovery.py index 297d3df..8c9280c 100644 --- a/src/onc/modules/_OncDiscovery.py +++ b/src/onc/modules/_OncDiscovery.py @@ -46,6 +46,10 @@ def getProperties(self, filters: dict): def getDataProducts(self, filters: dict): filters = filters or {} return self._discoveryRequest(filters, service="dataProducts") + + def getDataAvailability(self, filters: dict): + filters = filters or {} + return self._discoveryRequest(filters, service="dataAvailability/dataproducts") def _sanitizeBooleans(self, data: list): """ diff --git a/src/onc/modules/_OncService.py b/src/onc/modules/_OncService.py index a1d3c78..df85537 100644 --- a/src/onc/modules/_OncService.py +++ b/src/onc/modules/_OncService.py @@ -104,6 +104,7 @@ def _serviceUrl(self, service: str): "archivefile", "scalardata", "rawdata", + "dataAvailability/dataproducts", ]: return f"{self._config('baseUrl')}api/{service}" diff --git a/src/onc/onc.py b/src/onc/onc.py index facdc41..e1dcb69 100644 --- a/src/onc/onc.py +++ b/src/onc/onc.py @@ -794,6 +794,78 @@ def getDataProducts(self, filters: dict | None = None): ] """ # noqa: E501 return self.discovery.getDataProducts(filters) + + def getDataAvailability(self, filters: dict | None = None): + """ + Return information about which data products are available at a given time. + + The API endpoint is ``/dataAvailability/dataproducts``. + + See https://data.oceannetworks.ca/OpenAPI#get-/dataAvailability/dataproducts + for usage and available filters. + + Parameters + ---------- + filters : dict, optional + Query string parameters in the API request. + + Supported parameters are: + + - locationCode + - deviceCategoryCode + - deviceCode + - propertyCode + - dateFrom + - dateTo + - dataProductCode + - extension + - minimumCoverage + - maximumCoverage + - getLatest + - groupBy + - mergeGaps + - includeEmptyDays + - rowLimit + + Returns + ------- + list of dict + API response. + + Examples + -------- + >>> params = { + ... "locationCode": "NCBC", + ... "deviceCategoryCode": "BPR", + ... "dateFrom": "2019-11-23", + ... "dateTo": "2019-11-30", + ... } # doctest: +SKIP + >>> onc.getDataAvailability(params) # doctest: +SKIP + { + "availableDataProducts": [ + { + "averageFileCoverage": 0.875, + "dataProductCode": "LF", + "dateFrom": "2019-11-23T00:00:00.000Z", + "dateTo": "2019-12-01T00:00:00.000Z", + "deviceCode": "BPR-Folger-59", + "extension": "txt", + "fileCount": 7, + "maxFileCoverage": 1.0, + "maxFileCoverageDate": "2019-11-23T00:00:00.000Z", + "minFileCoverage": 0.0, + "minFileCoverageDate": "2019-11-30T00:00:00.000Z", + "totalFileSize": 8707618, + "totalUncompressedFileSize": 33868912, + } + ], + "messages": [], + "next": None, + "queryUrl": "https://data.oceannetworks.ca/api/dataAvailability/dataproducts?locationCode=NCBC&deviceCategoryCode=BPR&dateFrom=2019-11-23&dateTo=2019-11-30&token=ONC_TOKEN", + } + + """ # noqa: E501 + return self.discovery.getDataAvailability(filters) # Delivery methods diff --git a/tests/discover_data_availability/test_data_availability.py b/tests/discover_data_availability/test_data_availability.py new file mode 100644 index 0000000..1a4a201 --- /dev/null +++ b/tests/discover_data_availability/test_data_availability.py @@ -0,0 +1,50 @@ +import pytest +import requests + + +@pytest.fixture +def params(): + return { + "locationCode": "NCBC", + "deviceCategoryCode": "BPR", + "dateFrom": "2019-11-23", + "dateTo": "2019-11-30", + } + + +def test_invalid_param_value(requester, params): + params_invalid_param_value = params | {"locationCode": "INVALID"} + with pytest.raises(requests.HTTPError, match=r"API Error 127"): + requester.getDataAvailability(params_invalid_param_value) + + +def test_valid_params(requester, params, util): + data = requester.getDataAvailability(params) + + expected_keys = { + "availableDataProducts": list, + "messages": list, + "next": None, + "queryUrl": str, + } + + expected_keys_available_data_products = { + "averageFileCoverage": float, + "dataProductCode": str, + "dateFrom": str, + "dateTo": str, + "deviceCode": str, + "extension": str, + "fileCount": int, + "maxFileCoverage": float, + "maxFileCoverageDate": str, + "minFileCoverage": float, + "minFileCoverageDate": str, + "totalFileSize": int, + "totalUncompressedFileSize": int, + } + + util.assert_dict_key_types(data, expected_keys) + util.assert_dict_key_types( + data["availableDataProducts"][0], expected_keys_available_data_products + ) From c286c9fa0405e9cb0263a1acf45fce3a84b2825e Mon Sep 17 00:00:00 2001 From: Kan Fu Date: Wed, 5 Nov 2025 10:55:39 -0800 Subject: [PATCH 2/2] refactor: remove legacy "method" query parameter to path parameter --- src/onc/modules/_DataProductFile.py | 3 +-- src/onc/modules/_MultiPage.py | 36 ++++++++++++++--------------- src/onc/modules/_OncArchive.py | 22 ++++++++---------- src/onc/modules/_OncDelivery.py | 9 +++----- src/onc/modules/_OncDiscovery.py | 7 +++--- src/onc/modules/_OncRealTime.py | 13 ++++------- src/onc/modules/_OncService.py | 15 ++++++++---- src/onc/onc.py | 14 +++++------ 8 files changed, 58 insertions(+), 61 deletions(-) diff --git a/src/onc/modules/_DataProductFile.py b/src/onc/modules/_DataProductFile.py index 0eba625..b3d1e23 100644 --- a/src/onc/modules/_DataProductFile.py +++ b/src/onc/modules/_DataProductFile.py @@ -23,14 +23,13 @@ def __init__(self, dpRunId: int, index: str, baseUrl: str, token: str): self._retries = 0 self._status = 202 self._downloaded = False - self._baseUrl = f"{baseUrl}api/dataProductDelivery" + self._baseUrl = f"{baseUrl}api/dataProductDelivery/download" self._filePath = "" self._fileSize = 0 self._runningTime = 0 self._downloadingTime = 0 self._filters = { - "method": "download", "token": token, "dpRunId": dpRunId, "index": index, diff --git a/src/onc/modules/_MultiPage.py b/src/onc/modules/_MultiPage.py index 1443234..6369773 100644 --- a/src/onc/modules/_MultiPage.py +++ b/src/onc/modules/_MultiPage.py @@ -23,7 +23,7 @@ def getAllPages(self, service: str, url: str, filters: dict): """ # pop archivefiles extension extension = None - if service == "archivefiles" and "extension" in filters: + if service.startswith("archivefile") and "extension" in filters: extension = filters["extension"] del filters["extension"] @@ -84,7 +84,7 @@ def _doPageRequest( @param extension: Only provide for archivefiles filtering Returns a tuple (jsonResponse, duration) """ - if service == "archivefiles": + if service.startswith("archivefile"): response, duration = self.parent()._doRequest(url, filters, getTime=True) response = self.parent()._filterByExtension(response, extension) else: @@ -97,7 +97,7 @@ def _catenateData(self, response: object, nextResponse: object, service: str): Concatenates the data results from nextResponse into response Compatible with the row structure of different services """ - if service == "scalardata": + if service.startswith("scalardata"): keys = response["sensorData"][0]["data"].keys() for sensorData in response["sensorData"]: @@ -111,11 +111,11 @@ def _catenateData(self, response: object, nextResponse: object, service: str): for key in keys: sensorData["data"][key] += nextSensor["data"][key] - elif service == "rawdata": + elif service.startswith("rawdata"): for key in response["data"]: response["data"][key] += nextResponse["data"][key] - elif service == "archivefiles": + elif service.startswith("archivefile"): response["files"] += nextResponse["files"] def _estimatePages(self, response: object, service: str): @@ -154,13 +154,13 @@ def _rowCount(self, response, service: str): """ Returns the number of records in the response """ - if service == "scalardata": + if service.startswith("scalardata"): return len(response["sensorData"][0]["data"]["sampleTimes"]) - elif service == "rawdata": + elif service.startswith("rawdata"): return len(response["data"]["times"]) - elif service == "archivefiles": + elif service.startswith("archivefile"): return len(response["files"]) return 0 @@ -171,15 +171,16 @@ def _responseTimespan(self, response, service: str): Returns a timedelta object """ # grab the first and last sample times - if service in ["scalardata", "rawdata"]: - if service == "scalardata": - first = response["sensorData"][0]["data"]["sampleTimes"][0] - last = response["sensorData"][0]["data"]["sampleTimes"][-1] - elif service == "rawdata": - first = response["data"]["times"][0] - last = response["data"]["times"][-1] - - elif service == "archivefiles": + + if service.startswith("scalardata"): + first = response["sensorData"][0]["data"]["sampleTimes"][0] + last = response["sensorData"][0]["data"]["sampleTimes"][-1] + + elif service.startswith("rawdata"): + first = response["data"]["times"][0] + last = response["data"]["times"][-1] + + elif service.startswith("archivefile"): row0 = response["files"][0] if isinstance(row0, str): regExp = r"\d{8}T\d{6}\.\d{3}Z" @@ -198,7 +199,6 @@ def _responseTimespan(self, response, service: str): last = response["files"][-1]["dateFrom"] # compute the timedelta - # print(first, last) dateFirst = dateutil.parser.parse(first) dateLast = dateutil.parser.parse(last) return dateLast - dateFirst diff --git a/src/onc/modules/_OncArchive.py b/src/onc/modules/_OncArchive.py index ecc6e09..9129a99 100644 --- a/src/onc/modules/_OncArchive.py +++ b/src/onc/modules/_OncArchive.py @@ -23,7 +23,7 @@ def getArchivefileByLocation(self, filters: dict, allPages: bool): The filenames obtained can be used to download files using the ``downloadArchivefile`` method. """ # noqa: E501 - return self._getList(filters, by="location", allPages=allPages) + return self._getList(filters, service="archivefile/location", allPages=allPages) def getArchivefileByDevice(self, filters: dict, allPages: bool): """ @@ -31,7 +31,7 @@ def getArchivefileByDevice(self, filters: dict, allPages: bool): The filenames obtained can be used to download files using the ``downloadArchivefile`` method. """ # noqa: E501 - return self._getList(filters, by="device", allPages=allPages) + return self._getList(filters, service="archivefile/device", allPages=allPages) def getArchivefile(self, filters: dict, allPages: bool): return self._delegateByFilters( @@ -49,16 +49,15 @@ def getArchivefileUrl(self, filename: str) -> str: """ Return an archivefile absolute download URL for a filename """ - url = self._serviceUrl("archivefile") + url = self._serviceUrl("archivefile/download") token = self._config("token") - return f"{url}/download?filename={filename}&token={token}" + return f"{url}?filename={filename}&token={token}" def downloadArchivefile(self, filename: str = "", overwrite: bool = False): - url = self._serviceUrl("archivefiles") + url = self._serviceUrl("archivefile/download") filters = { "token": self._config("token"), - "method": "getFile", "filename": filename, } @@ -151,15 +150,14 @@ def downloadDirectArchivefile( "stats": {"totalSize": size, "downloadTime": time, "fileCount": successes}, } - def _getList(self, filters: dict, by: str = "location", allPages: bool = False): + def _getList( + self, filters: dict, service: str = "location", allPages: bool = False + ): """ Wraps archivefiles getArchivefileByLocation and getArchivefileByDevice methods """ - url = self._serviceUrl("archivefiles") + url = self._serviceUrl(service) filters["token"] = self._config("token") - filters["method"] = ( - "getListByLocation" if by == "location" else "getListByDevice" - ) # parse and remove the artificial parameter extension extension = None @@ -169,7 +167,7 @@ def _getList(self, filters: dict, by: str = "location", allPages: bool = False): if allPages: mp = _MultiPage(self) - result = mp.getAllPages("archivefiles", url, filters2) + result = mp.getAllPages(service, url, filters2) else: if "extension" in filters2: del filters2["extension"] diff --git a/src/onc/modules/_OncDelivery.py b/src/onc/modules/_OncDelivery.py index bc210b2..616ce24 100644 --- a/src/onc/modules/_OncDelivery.py +++ b/src/onc/modules/_OncDelivery.py @@ -65,9 +65,8 @@ def requestDataProduct(self, filters: dict): """ Data product request """ - filters["method"] = "request" filters["token"] = self._config("token") - url = "{:s}api/dataProductDelivery".format(self._config("baseUrl")) + url = f"{self._config('baseUrl')}api/dataProductDelivery/request" response = self._doRequest(url, filters) self._estimatePollPeriod(response) @@ -93,7 +92,7 @@ def runDataProduct(self, dpRequestId: int, waitComplete: bool): print( f"To cancel the running data product, run 'onc.cancelDataProduct({dpRequestId})'" # noqa: E501 ) - url = f"{self._config('baseUrl')}api/dataProductDelivery" + url = f"{self._config('baseUrl')}api/dataProductDelivery/run" runResult = {"runIds": [], "fileCount": 0, "runTime": 0, "requestCount": 0} start = time() @@ -101,7 +100,6 @@ def runDataProduct(self, dpRequestId: int, waitComplete: bool): response = requests.get( url, { - "method": "run", "token": self._config("token"), "dpRequestId": dpRequestId, }, @@ -283,9 +281,8 @@ def _countFilesInProduct(self, runId: int): Given a runId, polls the "download" method to count files. Uses HTTP HEAD to avoid downloading the files. """ - url = f"{self._config('baseUrl')}api/dataProductDelivery" + url = f"{self._config('baseUrl')}api/dataProductDelivery/download" filters = { - "method": "download", "token": self._config("token"), "dpRunId": runId, "index": 1, diff --git a/src/onc/modules/_OncDiscovery.py b/src/onc/modules/_OncDiscovery.py index 8c9280c..8ae32f8 100644 --- a/src/onc/modules/_OncDiscovery.py +++ b/src/onc/modules/_OncDiscovery.py @@ -10,9 +10,8 @@ class _OncDiscovery(_OncService): def __init__(self, parent: object): super().__init__(parent) - def _discoveryRequest(self, filters: dict, service: str, method: str = "get"): + def _discoveryRequest(self, filters: dict, service: str): url = self._serviceUrl(service) - filters["method"] = method filters["token"] = self._config("token") result = self._doRequest(url, filters) @@ -25,7 +24,7 @@ def getLocations(self, filters: dict): def getLocationHierarchy(self, filters: dict): filters = filters or {} - return self._discoveryRequest(filters, service="locations", method="getTree") + return self._discoveryRequest(filters, service="locations/tree") def getDeployments(self, filters: dict): filters = filters or {} @@ -46,7 +45,7 @@ def getProperties(self, filters: dict): def getDataProducts(self, filters: dict): filters = filters or {} return self._discoveryRequest(filters, service="dataProducts") - + def getDataAvailability(self, filters: dict): filters = filters or {} return self._discoveryRequest(filters, service="dataAvailability/dataproducts") diff --git a/src/onc/modules/_OncRealTime.py b/src/onc/modules/_OncRealTime.py index cad4386..5ec55cb 100644 --- a/src/onc/modules/_OncRealTime.py +++ b/src/onc/modules/_OncRealTime.py @@ -19,7 +19,7 @@ def getScalardataByLocation(self, filters: dict, allPages: bool): See https://wiki.oceannetworks.ca/display/O2A/scalardata+service for usage and available filters """ - return self._getDirectAllPages(filters, "scalardata", "getByLocation", allPages) + return self._getDirectAllPages(filters, "scalardata/location", allPages) def getScalardataByDevice(self, filters: dict, allPages: bool): """ @@ -28,7 +28,7 @@ def getScalardataByDevice(self, filters: dict, allPages: bool): See https://wiki.oceannetworks.ca/display/O2A/scalardata+service for usage and available filters. """ - return self._getDirectAllPages(filters, "scalardata", "getByDevice", allPages) + return self._getDirectAllPages(filters, "scalardata/device", allPages) def getScalardata(self, filters: dict, allPages: bool): return self._delegateByFilters( @@ -45,7 +45,7 @@ def getRawdataByLocation(self, filters: dict, allPages: bool): See https://wiki.oceannetworks.ca/display/O2A/rawdata+service for usage and available filters. """ - return self._getDirectAllPages(filters, "rawdata", "getByLocation", allPages) + return self._getDirectAllPages(filters, "rawdata/location", allPages) def getRawdataByDevice(self, filters: dict, allPages: bool): """ @@ -54,7 +54,7 @@ def getRawdataByDevice(self, filters: dict, allPages: bool): See https://wiki.oceannetworks.ca/display/O2A/rawdata+service for usage and available filters. """ - return self._getDirectAllPages(filters, "rawdata", "getByDevice", allPages) + return self._getDirectAllPages(filters, "rawdata/device", allPages) def getRawdata(self, filters: dict, allPages: bool): return self._delegateByFilters( @@ -68,9 +68,7 @@ def getSensorCategoryCodes(self, filters: dict): updated_filters = filters | {"returnOptions": "excludeScalarData"} return self.getScalardata(updated_filters, False)["sensorData"] - def _getDirectAllPages( - self, filters: dict, service: str, method: str, allPages: bool - ) -> Any: + def _getDirectAllPages(self, filters: dict, service: str, allPages: bool) -> Any: """ Keeps downloading all scalar or raw data pages until finished. @@ -83,7 +81,6 @@ def _getDirectAllPages( # prepare filters for first page request filters = filters or {} url = self._serviceUrl(service) - filters["method"] = method filters["token"] = self._config("token") # if sensorCategoryCodes is an array, join it into a comma-separated string diff --git a/src/onc/modules/_OncService.py b/src/onc/modules/_OncService.py index df85537..d1b382c 100644 --- a/src/onc/modules/_OncService.py +++ b/src/onc/modules/_OncService.py @@ -95,16 +95,23 @@ def _serviceUrl(self, service: str): """ if service in [ "locations", + "locations/tree", "deployments", "devices", "deviceCategories", "properties", "dataProducts", - "archivefiles", - "archivefile", - "scalardata", - "rawdata", "dataAvailability/dataproducts", + "archivefile/device", + "archivefile/location", + "archivefile/download", + "scalardata/location", + "scalardata/device", + "rawdata/location", + "rawdata/device", + "dataProductDelivery/request", + "dataProductDelivery/run", + "dataProductDelivery/download", ]: return f"{self._config('baseUrl')}api/{service}" diff --git a/src/onc/onc.py b/src/onc/onc.py index e1dcb69..2176214 100644 --- a/src/onc/onc.py +++ b/src/onc/onc.py @@ -794,7 +794,7 @@ def getDataProducts(self, filters: dict | None = None): ] """ # noqa: E501 return self.discovery.getDataProducts(filters) - + def getDataAvailability(self, filters: dict | None = None): """ Return information about which data products are available at a given time. @@ -807,7 +807,7 @@ def getDataAvailability(self, filters: dict | None = None): Parameters ---------- filters : dict, optional - Query string parameters in the API request. + Query string parameters in the API request. Supported parameters are: @@ -823,14 +823,14 @@ def getDataAvailability(self, filters: dict | None = None): - maximumCoverage - getLatest - groupBy - - mergeGaps - - includeEmptyDays - - rowLimit + - mergeGaps + - includeEmptyDays + - rowLimit Returns ------- list of dict - API response. + API response. Examples -------- @@ -863,7 +863,7 @@ def getDataAvailability(self, filters: dict | None = None): "next": None, "queryUrl": "https://data.oceannetworks.ca/api/dataAvailability/dataproducts?locationCode=NCBC&deviceCategoryCode=BPR&dateFrom=2019-11-23&dateTo=2019-11-30&token=ONC_TOKEN", } - + """ # noqa: E501 return self.discovery.getDataAvailability(filters)