Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
SHELL=/bin/bash

test: lint install
coverage run --source=$$(python setup.py --name) -m unittest discover
coverage run --source=$$(python setup.py --name) -m unittest discover -v

lint:
./setup.py flake8
Expand Down
29 changes: 15 additions & 14 deletions hca/dss/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from io import open

import requests
from requests.packages.urllib3.exceptions import ProtocolError, DecodeError, ReadTimeoutError

from ..util import SwaggerClient
from ..util.exceptions import SwaggerAPIException
Expand Down Expand Up @@ -37,24 +38,24 @@ def download(self, bundle_uuid, replica, version="", dest_name=""):
file_uuid = file_["uuid"]
filename = file_.get("name", file_uuid)

logger.info("File %s: Retrieving...", filename)
response = self.get_file._request(dict(uuid=file_uuid, replica=replica), stream=True)
logger.info("Retrieving file %s", filename)

file_path = os.path.join(dest_name, filename)

retries = self.get_session().adapters["https://"].max_retries
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not entirely sure I agree with this, but I don't know what's better.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is retrieving the retry policy that the request logic ought to apply, and applying it manually. Arguably all this machinery might be better off in the client class, but we can move it there later. Do you have an issue with the policies being the same, or...?

# When streaming response data, requests/urllib3 does not obey its usual retry logic, so we reenact it here.
try:
if response.ok:
file_path = os.path.join(dest_name, filename)
with self.get_file.stream(uuid=file_uuid, replica=replica) as fh, open(file_path, "wb") as dest_fh:
logger.info("%s", "File {}: GET SUCCEEDED. Writing to disk.".format(filename))
with open(file_path, "wb") as fh:
for chunk in response.iter_content(chunk_size=1024*1024):
if chunk:
fh.write(chunk)
while True:
chunk = fh.raw.read(1024*1024)
if len(chunk) == 0:
break
dest_fh.write(chunk)
logger.info("%s", "File {}: GET SUCCEEDED. Stored at {}.".format(filename, file_path))

else:
logger.error("%s", "File {}: GET FAILED.".format(filename))
logger.error("%s", "Response: {}".format(response.text))
finally:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think you still want the finally clause.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR switches from using requests.Response.iter_content (the questionable streaming read wrapper in requests) to the Swagger client context manager, which incorporates the cleanup logic:

def stream(self, **kwargs):
self._context_manager_response = self._request(kwargs, stream=True)
return self
def __enter__(self, **kwargs):
assert self._context_manager_response is not None
return self._context_manager_response
def __exit__(self, exc_type, exc_val, exc_tb):
self._context_manager_response.close()
self._context_manager_response = None

With this logic in place, the finally clause should no longer be necessary.

response.close()
except (ProtocolError, DecodeError, ReadTimeoutError) as e:
logger.error(e)
retries = retries.increment(method="GET", error=e)
return {}

def upload(self, src_dir, replica, staging_bucket, timeout_seconds=1200):
Expand Down