Skip to content

Commit

Permalink
fix: fmt_dcor did not properly retry failed connections
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed Nov 3, 2023
1 parent 348058c commit 72b65c4
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 19 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
0.55.2
- fix: fmt_dcor did not properly retry failed connections
- enh: minor optimizations for fmt_http and fmt_s3
0.55.1
- fix: disable instance cache fsspec.HTTPFileSystem (fmt_http)
0.55.0
Expand Down
30 changes: 16 additions & 14 deletions dclab/rtdc_dataset/fmt_dcor/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def __init__(self, url, api_key="", cert_path=None, dcserv_api_version=2):
self.api_key = api_key
#: ckanext-dc_serve dcserv API version
self.dcserv_api_version = dcserv_api_version
#: create a session
self.session = requests.Session()
self._cache = {}

@classmethod
Expand All @@ -55,7 +57,7 @@ def add_api_key(cls, api_key):
APIHandler.api_keys.append(api_key)

def _get(self, query, feat=None, trace=None, event=None, api_key="",
retries=3):
retries=13):
# "version=2" introduced in dclab 0.54.3
# (supported since ckanext.dc_serve 0.13.2)
qstr = f"&version={self.dcserv_api_version}&query={query}"
Expand All @@ -68,24 +70,24 @@ def _get(self, query, feat=None, trace=None, event=None, api_key="",
apicall = self.url + qstr
fail_reasons = []
for _ in range(retries):
req = requests.get(apicall,
headers={"Authorization": api_key},
verify=self.verify,
timeout=9.1,
)
try:
# try-except both requests and json conversion
req = self.session.get(apicall,
headers={"Authorization": api_key},
verify=self.verify,
timeout=1,
)
jreq = req.json()
except json.decoder.JSONDecodeError:
fail_reasons.append("invalid json")
time.sleep(0.1) # wait a bit, maybe the server is overloaded
continue
except requests.urllib3.exceptions.ConnectionError:
except requests.urllib3.exceptions.ConnectionError: # requests
fail_reasons.append("connection problem")
time.sleep(5) # wait a bit, maybe the server is overloaded
continue
except requests.urllib3.exceptions.ReadTimeoutError:
except (requests.urllib3.exceptions.ReadTimeoutError,
requests.exceptions.ConnectTimeout): # requests
fail_reasons.append("timeout")
time.sleep(5) # wait a bit, maybe the server is overloaded
except json.decoder.JSONDecodeError: # json
fail_reasons.append("invalid json")
time.sleep(1) # wait a bit, maybe the server is overloaded
continue
else:
break
else:
Expand Down
12 changes: 9 additions & 3 deletions dclab/rtdc_dataset/fmt_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


try:
from fsspec.implementations.http import HTTPFileSystem
import fsspec
import requests
except ModuleNotFoundError:
FSSPEC_AVAILABLE = False
Expand Down Expand Up @@ -52,8 +52,10 @@ def __init__(self,
raise ModuleNotFoundError(
"Package `fsspec[http]` required for http format!")

HTTPFileSystem.cachable = False
self._fs = HTTPFileSystem()
self._fs = fsspec.filesystem("http",
skip_instance_cache=True,
use_listings_cache=False,
)
self._fhttp = self._fs.open(url,
block_size=2**18,
cache_type="readahead")
Expand All @@ -65,6 +67,10 @@ def __init__(self,
# Override self.path with the actual HTTP URL
self.path = url

def close(self):
super(RTDC_HTTP, self).close()
self._fhttp.close()


class HTTPBasin(Basin):
basin_format = "http"
Expand Down
4 changes: 4 additions & 0 deletions dclab/rtdc_dataset/fmt_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ def __init__(self,
# Override self.path with the actual S3 URL
self.path = url

def close(self):
super(RTDC_S3, self).close()
self._f3d.close()


class S3Basin(Basin):
basin_format = "s3"
Expand Down
6 changes: 4 additions & 2 deletions tests/test_rtdc_fmt_dcor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


pytest.importorskip("requests")
pytest.importorskip("s3fs")
pytest.importorskip("fsspec")


with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
Expand Down Expand Up @@ -118,7 +118,9 @@ def test_dcor_cache_trace():
def test_dcor_data():
# reticulocytes.rtdc contains contour data
with dclab.new_dataset("13247dd0-3d8b-711d-a410-468b4de6fb7a") as ds:
assert np.allclose(ds["circ"][0], 0.7309052348136902, rtol=0, atol=1e-5)
assert np.allclose(ds["circ"][0],
0.7309052348136902,
rtol=0, atol=1e-5)
assert np.allclose(ds["area_um"][391], 37.5122, rtol=0, atol=1e-5)
assert np.all(ds["contour"][24][22] == np.array([87, 61]))
assert np.median(ds["image"][1]) == 58
Expand Down

0 comments on commit 72b65c4

Please sign in to comment.