Skip to content

Commit

Permalink
feat: add convenience method get_s3_attributes_for_artifact
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed Mar 7, 2024
1 parent 20d44b1 commit 3bb70e8
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
0.9.1
- feat: add convenience method `get_s3_attributes_for_artifact`
0.9.0
- feat: introduce `testing.make_dataset_via_s3` and
`testing.make_resource_via_s3` which uploads resources via S3
Expand Down
75 changes: 75 additions & 0 deletions dcor_shared/s3cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,81 @@ def create_presigned_url(
filename=filename)


def get_s3_attributes_for_artifact(
resource_id: str,
artifact: Literal["condensed", "preview", "resource"] = "resource"):
"""Return all attribute for an artifact in the S3 object store
Returns
-------
meta: dict
Metadata dictionary with the keys "etag", "server", "size",
and "success".
"""
bucket_name, object_name = get_s3_bucket_object_for_artifact(
resource_id=resource_id, artifact=artifact)
s3_client, _, _ = s3.get_s3()
attr_info = s3_client.head_object(Bucket=bucket_name, Key=object_name)
# Example output from MinIO::
#
# {'AcceptRanges': 'bytes',
# 'ContentLength': 904729,
# 'ContentType': 'application/octet-stream',
# 'ETag': '"108d47e80f3e5f35110493b1fdcd30d5"',
# 'LastModified': datetime.datetime(2024, 3, 7, 8, 15,
# tzinfo=tzutc()),
# 'Metadata': {},
# 'ResponseMetadata': {
# 'HTTPHeaders': {
# 'accept-ranges': 'bytes',
# 'content-length': '904729',
# 'content-type': 'application/octet-stream',
# 'date': 'Thu, 07 Mar 2024 08:15:02 GMT',
# 'etag': '"108d47e80f3e5f35110493b1fdcd30d5"',
# 'last-modified': 'Thu, 07 Mar 2024 '
# '08:15:00 GMT',
# 'server': 'MinIO',
# 'strict-transport-security': 'max-age=31536000; '
# 'includeSubDomains',
# 'vary': 'Origin, Accept-Encoding',
# 'x-amz-id-2': 'dd9025bab4ad464b049177c95eb6e...',
# 'x-amz-request-id': '17BA6D680CB67A2C',
# 'x-amz-tagging-count': '1',
# 'x-content-type-options': 'nosniff',
# 'x-xss-protection': '1; mode=block'},
# 'HTTPStatusCode': 200,
# 'HostId': 'dd9025bab4ad464b049177c95eb6ebf3...',
# 'RequestId': '17BA6D680CB67A2C',
# 'RetryAttempts': 0}
# }
meta = {}
for key, funcs in [
("etag", [lambda m: m.get("ETag"),
lambda m: m.get("ResponseMetadata",
{}).get("HTTPHeaders",
{}).get("etag"),
]),
("server", [lambda m: m.get("ResponseMetadata",
{}).get("HTTPHeaders",
{}).get("server", "unknown")
]),
("size", [lambda m: m.get("ContentLength"),
lambda m: m.get("ResponseMetadata",
{}).get("HTTPHeaders",
{}).get("content-length"),
]),
("success", [lambda m: m.get("ResponseMetadata",
{}).get("HTTPStatusCode", 404) == 200
]),
]:
for fn in funcs:
val = fn(attr_info)
if val is not None:
meta[key] = val
break
return meta


def get_s3_bucket_object_for_artifact(
resource_id: str,
artifact: Literal["condensed", "preview", "resource"] = "resource"):
Expand Down
19 changes: 19 additions & 0 deletions tests/test_s3cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,25 @@ def test_create_presigned_url(enqueue_job_mock, tmp_path):
"490efdf5d9bb4cd4b2a6bcf2fe54d4dc201c38530140bcb168980bf8bf846c73"


@pytest.mark.ckan_config('ckan.plugins', 'dcor_schemas')
@pytest.mark.usefixtures('clean_db', 'with_request_context')
@mock.patch('ckan.plugins.toolkit.enqueue_job',
side_effect=synchronous_enqueue_job)
def test_get_s3_attributes_for_artifact(enqueue_job_mock):
rid, _, _, org_dict = setup_s3_resource_on_ckan()

# Make sure the resource exists
res_dict = helpers.call_action("resource_show", id=rid)
assert res_dict["id"] == rid, "sanity check"

# get the size
meta = s3cc.get_s3_attributes_for_artifact(rid)
assert meta["size"] == 904729
assert meta["success"]
assert meta["etag"]
assert meta["server"]


@pytest.mark.ckan_config('ckan.plugins', 'dcor_schemas')
@pytest.mark.usefixtures('clean_db', 'with_request_context')
@mock.patch('ckan.plugins.toolkit.enqueue_job',
Expand Down

0 comments on commit 3bb70e8

Please sign in to comment.