From 9c2aeca6bc6dbedb785185accc3829b553e9b6eb Mon Sep 17 00:00:00 2001 From: Gang Li Date: Mon, 29 Nov 2021 15:27:08 +0800 Subject: [PATCH] Feat: Add list_folder_content function for s3 client This function can just list the contents in specified folder without full scan of s3 content, which is useful to do listing index --- charon/storage.py | 32 ++++++++++++++++++++++++++++- tests/test_s3client.py | 46 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/charon/storage.py b/charon/storage.py index fd623af0..22dd832c 100644 --- a/charon/storage.py +++ b/charon/storage.py @@ -344,11 +344,41 @@ def get_files(self, bucket_name: str, prefix=None, suffix=None) -> Tuple[List[st files = [i.key for i in objs] return (files, True) - def read_file_content(self, bucket_name=None, key=None): + def read_file_content(self, bucket_name, key): bucket = self.get_bucket(bucket_name) fileObject = bucket.Object(key) return str(fileObject.get()['Body'].read(), 'utf-8') + def list_folder_content(self, bucket_name, folder) -> List[str]: + bucket = self.get_bucket(bucket_name) + try: + if not folder or folder.strip() == "/" or folder.strip() == "": + result = bucket.meta.client.list_objects( + Bucket=bucket.name, + Delimiter='/' + ) + else: + prefix = folder if folder.endswith("/") else folder+"/" + result = bucket.meta.client.list_objects( + Bucket=bucket.name, + Prefix=prefix, + Delimiter='/' + ) + except (ClientError, HTTPClientError) as e: + logger.error("ERROR: Can not get contents of %s from bucket" + " %s due to error: %s ", folder, + bucket_name, e) + return [] + + contents = [] + folders = result.get("CommonPrefixes") + if folders: + contents.extend([f.get("Prefix") for f in folders]) + files = result.get("Contents") + if files: + contents.extend([f.get("Key") for f in files]) + return contents + def get_bucket(self, bucket_name: str): return self.client.Bucket(bucket_name) diff --git a/tests/test_s3client.py b/tests/test_s3client.py index b73b52a8..e38e8ca2 100644 --- a/tests/test_s3client.py +++ b/tests/test_s3client.py @@ -95,6 +95,52 @@ def test_get_files(self): self.assertNotIn("org/x/y/1.0/x-y-1.0.pom", files) self.assertNotIn("org/x/y/1.0/x-y-1.0.jar", files) + def test_list_folder_content(self): + bucket = self.mock_s3.Bucket(MY_BUCKET) + bucket.put_object( + Key="index.html", Body="test content html" + ) + bucket.put_object( + Key="org/index.html", Body="test content html" + ) + bucket.put_object( + Key="org/foo/bar/1.0/foo-bar-1.0.pom", Body="test content pom" + ) + bucket.put_object( + Key="org/foo/bar/1.0/foo-bar-1.0.jar", Body="test content jar" + ) + bucket.put_object(Key="org/x/y/1.0/x-y-1.0.pom", Body="test content pom") + bucket.put_object(Key="org/x/y/1.0/x-y-1.0.jar", Body="test content jar") + + contents = self.s3_client.list_folder_content(MY_BUCKET, "/") + self.assertEqual(2, len(contents)) + self.assertIn("index.html", contents) + self.assertIn("org/", contents) + + contents = self.s3_client.list_folder_content(MY_BUCKET, "org") + self.assertEqual(3, len(contents)) + self.assertIn("org/foo/", contents) + self.assertIn("org/x/", contents) + self.assertIn("org/index.html", contents) + + contents = self.s3_client.list_folder_content(MY_BUCKET, "org/foo") + self.assertEqual(1, len(contents)) + self.assertIn("org/foo/bar/", contents) + + contents = self.s3_client.list_folder_content(MY_BUCKET, "org/foo/bar") + self.assertEqual(1, len(contents)) + self.assertIn("org/foo/bar/1.0/", contents) + + contents = self.s3_client.list_folder_content(MY_BUCKET, "org/foo/bar/1.0") + self.assertEqual(2, len(contents)) + self.assertIn("org/foo/bar/1.0/foo-bar-1.0.pom", contents) + self.assertIn("org/foo/bar/1.0/foo-bar-1.0.jar", contents) + + contents = self.s3_client.list_folder_content(MY_BUCKET, "org/x/y/1.0") + self.assertEqual(2, len(contents)) + self.assertIn("org/x/y/1.0/x-y-1.0.pom", contents) + self.assertIn("org/x/y/1.0/x-y-1.0.jar", contents) + def test_upload_and_delete_files(self): (temp_root, root, all_files) = self.__prepare_files()