diff --git a/charon/cmd/command.py b/charon/cmd/command.py index 8e197675..a4a87397 100644 --- a/charon/cmd/command.py +++ b/charon/cmd/command.py @@ -104,7 +104,7 @@ def upload( version: str, target: str, root_path="maven-repository", - ignore_patterns=None, + ignore_patterns: List[str] = None, debug=False, quiet=False, dryrun=False @@ -128,6 +128,7 @@ def upload( archive_path = __get_local_repo(repo) npm_archive_type = detect_npm_archive(archive_path) product_key = f"{product}-{version}" + prefix_ = conf.get_bucket_prefix(target) if npm_archive_type != NpmArchiveType.NOT_NPM: logger.info("This is a npm archive") handle_npm_uploading(archive_path, product_key, @@ -144,6 +145,7 @@ def upload( ignore_patterns_list, root=root_path, bucket_name=aws_bucket, + prefix=prefix_, dry_run=dryrun) @@ -221,7 +223,7 @@ def delete( version: str, target: str, root_path="maven-repository", - ignore_patterns=None, + ignore_patterns: List[str] = None, debug=False, quiet=False, dryrun=False @@ -245,6 +247,7 @@ def delete( archive_path = __get_local_repo(repo) npm_archive_type = detect_npm_archive(archive_path) product_key = f"{product}-{version}" + prefix_ = conf.get_bucket_prefix(target) if npm_archive_type != NpmArchiveType.NOT_NPM: logger.info("This is a npm archive") handle_npm_del(archive_path, product_key, @@ -261,6 +264,7 @@ def delete( ignore_patterns_list, root=root_path, bucket_name=aws_bucket, + prefix=prefix_, dry_run=dryrun) diff --git a/charon/config.py b/charon/config.py index 2010bc7f..db28c970 100644 --- a/charon/config.py +++ b/charon/config.py @@ -19,6 +19,8 @@ import os import logging +from charon.utils.strings import remove_prefix + CONFIG_FILE = "charon.yaml" logger = logging.getLogger(__name__) @@ -62,8 +64,8 @@ def get_bucket_prefix(self, target: str) -> str: "in charon configuration, so no prefix will " "be used", target) prefix = "" - if prefix.startswith("/"): - prefix = prefix[1:] + # removing first slash as it is not needed. + prefix = remove_prefix(prefix, "/") return prefix diff --git a/charon/pkgs/indexing.py b/charon/pkgs/indexing.py index 447904f8..693db242 100644 --- a/charon/pkgs/indexing.py +++ b/charon/pkgs/indexing.py @@ -21,6 +21,8 @@ import logging from typing import List, Set +from charon.utils.strings import remove_prefix + logger = logging.getLogger(__name__) @@ -50,7 +52,11 @@ def generate_index_file_content(self) -> str: def generate_indexes( - top_level: str, changed_dirs: List[str], s3_client: S3Client, bucket: str + top_level: str, + changed_dirs: List[str], + s3_client: S3Client, + bucket: str, + prefix: str ) -> List[str]: if top_level[-1] != '/': top_level += '/' @@ -70,13 +76,13 @@ def generate_indexes( s3_folders = sorted(s3_folders, key=FolderLenCompareKey) for folder_ in s3_folders: index_html = __generate_index_html( - s3_client, bucket, folder_, top_level + s3_client, bucket, folder_, top_level, prefix ) if index_html: generated_htmls.append(index_html) root_index = __generate_index_html( - s3_client, bucket, "/", top_level + s3_client, bucket, "/", top_level, prefix ) if root_index: generated_htmls.append(root_index) @@ -88,11 +94,16 @@ def __generate_index_html( s3_client: S3Client, bucket: str, folder_: str, - top_level: str + top_level: str, + prefix: str = None ) -> str: + if folder_ != "/": + search_folder = os.path.join(prefix, folder_) if prefix else folder_ + else: + search_folder = prefix if prefix else "/" contents = s3_client.list_folder_content( bucket_name=bucket, - folder=folder_ + folder=search_folder ) index = None if len(contents) == 1 and contents[0].endswith("index.html"): @@ -106,10 +117,22 @@ def __generate_index_html( file_paths=[removed_index], bucket_name=bucket, product=None, - root=top_level + root=top_level, + key_prefix=prefix ) elif len(contents) >= 1: - index = __to_html(contents, folder_, top_level) + real_contents = [] + if prefix and prefix.strip() != "": + for c in contents: + if c.startswith(prefix): + real_c = remove_prefix(c, prefix) + real_c = remove_prefix(real_c, "/") + real_contents.append(real_c) + else: + real_contents.append(c) + else: + real_contents = contents + index = __to_html(real_contents, folder_, top_level) return index @@ -119,6 +142,7 @@ def __to_html(contents: List[str], folder: str, top_level: str) -> str: if folder != "/": items.append("../") for c in contents: + # index.html does not need to be included in html content. if not c.endswith("index.html"): items.append(c[len(folder):]) else: diff --git a/charon/pkgs/maven.py b/charon/pkgs/maven.py index bcec8f47..c7857e75 100644 --- a/charon/pkgs/maven.py +++ b/charon/pkgs/maven.py @@ -16,6 +16,7 @@ import charon.pkgs.indexing as indexing from charon.utils.files import write_file from charon.utils.archive import extract_zip_all +from charon.utils.strings import remove_prefix from charon.storage import S3Client from charon.pkgs.pkg_utils import upload_post_process, rollback_post_process from charon.config import get_template @@ -181,6 +182,7 @@ def handle_maven_uploading( ignore_patterns=None, root="maven-repository", bucket_name=None, + prefix=None, dir_=None, do_index=True, dry_run=False @@ -221,18 +223,24 @@ def handle_maven_uploading( _handle_error(err_msgs) # Question: should we exit here? + prefix_ = remove_prefix(prefix, "/") # 4. Do uploading logger.info("Start uploading files to s3") s3_client = S3Client(dry_run=dry_run) bucket = bucket_name - (_, failed_files) = s3_client.upload_files( - file_paths=valid_mvn_paths, bucket_name=bucket, product=prod_key, root=top_level + _, failed_files = s3_client.upload_files( + file_paths=valid_mvn_paths, bucket_name=bucket, + product=prod_key, root=top_level, key_prefix=prefix_ ) logger.info("Files uploading done\n") # 5. Use uploaded poms to scan s3 for metadata refreshment logger.info("Start generating maven-metadata.xml files for all artifacts") - meta_files = _generate_metadatas(s3_client, bucket, valid_poms, top_level) + meta_files = _generate_metadatas( + s3=s3_client, bucket=bucket, + poms=valid_poms, root=top_level, + prefix=prefix_ + ) logger.info("maven-metadata.xml files generation done\n") failed_metas = meta_files.get(META_FILE_FAILED, []) @@ -243,7 +251,8 @@ def handle_maven_uploading( meta_file_paths=meta_files[META_FILE_GEN_KEY], bucket_name=bucket, product=prod_key, - root=top_level + root=top_level, + key_prefix=prefix_ ) failed_metas.extend(_failed_metas) logger.info("maven-metadata.xml updating done\n") @@ -252,14 +261,18 @@ def handle_maven_uploading( # index is similar to metadata, it will be overwritten everytime if do_index: logger.info("Start generating index files to s3") - created_indexes = indexing.generate_indexes(top_level, valid_dirs, s3_client, bucket) + created_indexes = indexing.generate_indexes( + top_level, valid_dirs, s3_client, bucket, prefix_ + ) logger.info("Index files generation done.\n") logger.info("Start updating index files to s3") (_, _failed_metas) = s3_client.upload_metadatas( meta_file_paths=created_indexes, bucket_name=bucket, - product=None, root=top_level + product=None, + root=top_level, + key_prefix=prefix_ ) failed_metas.extend(_failed_metas) logger.info("Index files updating done\n") @@ -275,6 +288,7 @@ def handle_maven_del( ignore_patterns=None, root="maven-repository", bucket_name=None, + prefix=None, dir_=None, do_index=True, dry_run=False @@ -312,6 +326,7 @@ def handle_maven_del( logger.debug("G: %s, A: %s", g, a) ga_paths.append(os.path.join("/".join(g.split(".")), a)) + prefix_ = remove_prefix(prefix, "/") # 4. Delete all valid_paths from s3 logger.info("Start deleting files from s3") s3_client = S3Client(dry_run=dry_run) @@ -320,13 +335,19 @@ def handle_maven_del( valid_mvn_paths, bucket_name=bucket, product=prod_key, - root=top_level + root=top_level, + key_prefix=prefix_ ) logger.info("Files deletion done\n") # 5. Use changed GA to scan s3 for metadata refreshment logger.info("Start generating maven-metadata.xml files for all changed GAs") - meta_files = _generate_metadatas(s3_client, bucket, valid_poms, top_level) + meta_files = _generate_metadatas( + s3=s3_client, bucket=bucket, + poms=valid_poms, root=top_level, + prefix=prefix_ + ) + logger.info("maven-metadata.xml files generation done\n") # 6. Upload all maven-metadata.xml. We need to delete metadata files @@ -336,7 +357,11 @@ def handle_maven_del( for _, files in meta_files.items(): all_meta_files.extend(files) s3_client.delete_files( - file_paths=all_meta_files, bucket_name=bucket, product=prod_key, root=top_level + file_paths=all_meta_files, + bucket_name=bucket, + product=prod_key, + root=top_level, + key_prefix=prefix_ ) failed_metas = meta_files.get(META_FILE_FAILED, []) if META_FILE_GEN_KEY in meta_files: @@ -344,14 +369,17 @@ def handle_maven_del( meta_file_paths=meta_files[META_FILE_GEN_KEY], bucket_name=bucket, product=None, - root=top_level + root=top_level, + key_prefix=prefix_ ) failed_metas.extend(_failed_metas) logger.info("maven-metadata.xml updating done\n") if do_index: logger.info("Start generating index files for all changed entries") - created_indexes = indexing.generate_indexes(top_level, valid_dirs, s3_client, bucket) + created_indexes = indexing.generate_indexes( + top_level, valid_dirs, s3_client, bucket, prefix_ + ) logger.info("Index files generation done.\n") logger.info("Start updating index to s3") @@ -359,7 +387,8 @@ def handle_maven_del( meta_file_paths=created_indexes, bucket_name=bucket, product=None, - root=top_level + root=top_level, + key_prefix=prefix_ ) failed_metas.extend(_failed_index_files) logger.info("Index files updating done.\n") @@ -445,7 +474,9 @@ def _scan_paths(files_root: str, ignore_patterns: List[str], def _generate_metadatas( - s3: S3Client, bucket: str, poms: List[str], root: str + s3: S3Client, bucket: str, + poms: List[str], root: str, + prefix: str = None ) -> Dict[str, List[str]]: """Collect GAVs and generating maven-metadata.xml. As all valid poms has been stored in s3 bucket, @@ -467,9 +498,12 @@ def _generate_metadatas( for path, _ in gas_dict.items(): # avoid some wrong prefix, like searching a/b # but got a/b-1 + ga_prefix = path + if prefix: + ga_prefix = os.path.join(prefix, path) if not path.endswith("/"): - path = path + "/" - (existed_poms, success) = s3.get_files(bucket, path, ".pom") + ga_prefix = ga_prefix + "/" + (existed_poms, success) = s3.get_files(bucket, ga_prefix, ".pom") if len(existed_poms) == 0: if success: logger.debug( @@ -488,7 +522,13 @@ def _generate_metadatas( logger.debug( "Got poms in s3 bucket %s for GA path %s: %s", bucket, path, poms ) - all_poms.extend(existed_poms) + un_prefixed_poms = existed_poms + if prefix: + if not prefix.endswith("/"): + un_prefixed_poms = [__remove_prefix(pom, prefix) for pom in existed_poms] + else: + un_prefixed_poms = [__remove_prefix(pom, prefix + "/") for pom in existed_poms] + all_poms.extend(un_prefixed_poms) gav_dict = parse_gavs(all_poms) if len(gav_dict) > 0: meta_files_generation = [] @@ -524,6 +564,11 @@ def _handle_error(err_msgs: List[str]): pass +def __remove_prefix(s: str, prefix: str) -> str: + if s.startswith(prefix): + return s[len(prefix):] + + class VersionCompareKey: 'Used as key function for version sorting' def __init__(self, obj): diff --git a/charon/pkgs/npm.py b/charon/pkgs/npm.py index 8c69d5f4..8efca327 100644 --- a/charon/pkgs/npm.py +++ b/charon/pkgs/npm.py @@ -62,8 +62,9 @@ def __init__(self, metadata, is_version): def handle_npm_uploading( tarball_path: str, product: str, - bucket_name=None, dir_=None, - do_index=True, dry_run=False + bucket_name=None, prefix=None, + dir_=None, do_index=True, + dry_run=False ): """ Handle the npm product release tarball uploading process. For NPM uploading, tgz file and version metadata will be relocated based @@ -76,7 +77,7 @@ def handle_npm_uploading( tmp dir if None. """ target_dir, valid_paths, package_metadata = _scan_metadata_paths_from_archive( - tarball_path, prefix=product, dir__=dir_ + tarball_path, prod=product, dir__=dir_ ) if not os.path.isdir(target_dir): logger.error("Error: the extracted target_dir path %s does not exist.", target_dir) @@ -87,11 +88,12 @@ def handle_npm_uploading( logger.info("Start uploading files to s3") client = S3Client(dry_run=dry_run) bucket = bucket_name - uploaded_files = [] - _uploaded_files, failed_files = client.upload_files( - file_paths=valid_paths, bucket_name=bucket, product=product, root=target_dir + _, failed_files = client.upload_files( + file_paths=valid_paths, + bucket_name=bucket, + product=product, + root=target_dir ) - uploaded_files.extend(_uploaded_files) logger.info("Files uploading done\n") logger.info("Start generating package.json for package: %s", package_metadata.name) @@ -102,28 +104,29 @@ def handle_npm_uploading( failed_metas = [] if META_FILE_GEN_KEY in meta_files: - _uploaded_files, _failed_metas = client.upload_metadatas( + _, _failed_metas = client.upload_metadatas( meta_file_paths=[meta_files[META_FILE_GEN_KEY]], bucket_name=bucket, product=product, root=target_dir ) failed_metas.extend(_failed_metas) - uploaded_files.extend(_uploaded_files) logger.info("package.json uploading done") # this step generates index.html for each dir and add them to file list # index is similar to metadata, it will be overwritten everytime if do_index: logger.info("Start generating index files to s3") - created_indexes = indexing.generate_indexes(target_dir, valid_dirs, client, bucket) + created_indexes = indexing.generate_indexes( + target_dir, valid_dirs, client, bucket, prefix + ) logger.info("Index files generation done.\n") logger.info("Start updating index files to s3") (_, _failed_metas) = client.upload_metadatas( meta_file_paths=created_indexes, - bucket_name=bucket, - product=None, root=target_dir + bucket_name=bucket, product=None, + root=target_dir, key_prefix=prefix ) failed_metas.extend(_failed_metas) logger.info("Index files updating done\n") @@ -135,8 +138,9 @@ def handle_npm_uploading( def handle_npm_del( tarball_path: str, product: str, - bucket_name=None, dir_=None, - do_index=True, dry_run=False + bucket_name=None, prefix=None, + dir_=None, do_index=True, + dry_run=False ): """ Handle the npm product release tarball deletion process. * tarball_path is the location of the tarball in filesystem @@ -147,7 +151,7 @@ def handle_npm_del( tmp dir if None. """ target_dir, package_name_path, valid_paths = _scan_paths_from_archive( - tarball_path, prefix=product, dir__=dir_ + tarball_path, prod=product, dir__=dir_ ) valid_dirs = __get_path_tree(valid_paths, target_dir) @@ -156,12 +160,15 @@ def handle_npm_del( client = S3Client(dry_run=dry_run) bucket = bucket_name deleted_files, _ = client.delete_files( - file_paths=valid_paths, bucket_name=bucket, product=product, root=target_dir + file_paths=valid_paths, bucket_name=bucket, + product=product, root=target_dir ) logger.info("Files deletion done\n") logger.info("Start generating package.json for package: %s", package_name_path) - meta_files = _gen_npm_package_metadata_for_del(client, bucket, target_dir, package_name_path) + meta_files = _gen_npm_package_metadata_for_del( + client, bucket, target_dir, package_name_path + ) logger.info("package.json generation done\n") logger.info("Start uploading package.json to s3") @@ -169,7 +176,8 @@ def handle_npm_del( for _, file in meta_files.items(): all_meta_files.append(file) deleted_metas, failed_files = client.delete_files( - file_paths=all_meta_files, bucket_name=bucket, product=product, root=target_dir + file_paths=all_meta_files, bucket_name=bucket, + product=product, root=target_dir ) deleted_files += deleted_metas failed_metas = [] @@ -190,7 +198,9 @@ def handle_npm_del( if do_index: logger.info("Start generating index files for all changed entries") - created_indexes = indexing.generate_indexes(target_dir, valid_dirs, client, bucket) + created_indexes = indexing.generate_indexes( + target_dir, valid_dirs, client, bucket, prefix + ) logger.info("Index files generation done.\n") logger.info("Start updating index to s3") @@ -198,7 +208,8 @@ def handle_npm_del( meta_file_paths=created_indexes, bucket_name=bucket, product=None, - root=target_dir + root=target_dir, + key_prefix=prefix ) failed_metas.extend(_failed_index_files) logger.info("Index files updating done.\n") @@ -289,9 +300,9 @@ def _gen_npm_package_metadata_for_del( return meta_files -def _scan_metadata_paths_from_archive(path: str, prefix="", dir__=None) -> Tuple[ +def _scan_metadata_paths_from_archive(path: str, prod="", dir__=None) -> Tuple[ str, list, NPMPackageMetadata]: - tmp_root = mkdtemp(prefix=f"npm-charon-{prefix}-", dir=dir__) + tmp_root = mkdtemp(prefix=f"npm-charon-{prod}-", dir=dir__) try: _, valid_paths = extract_npm_tarball(path, tmp_root, True) if len(valid_paths) > 1: @@ -303,14 +314,15 @@ def _scan_metadata_paths_from_archive(path: str, prefix="", dir__=None) -> Tuple sys.exit(1) -def _scan_paths_from_archive(path: str, prefix="", dir__=None) -> Tuple[str, str, list]: - tmp_root = mkdtemp(prefix=f"npm-charon-{prefix}-", dir=dir__) +def _scan_paths_from_archive(path: str, prod="", dir__=None) -> Tuple[str, str, list]: + tmp_root = mkdtemp(prefix=f"npm-charon-{prod}-", dir=dir__) package_name_path, valid_paths = extract_npm_tarball(path, tmp_root, False) return tmp_root, package_name_path, valid_paths def _merge_package_metadata( - package_metadata: NPMPackageMetadata, client: S3Client, bucket: str, + package_metadata: NPMPackageMetadata, + client: S3Client, bucket: str, key: str ): content = client.read_file_content(bucket, key) diff --git a/charon/storage.py b/charon/storage.py index 26f91b2d..61b6ab40 100644 --- a/charon/storage.py +++ b/charon/storage.py @@ -86,7 +86,7 @@ def __enable_acceleration(self, extra_conf) -> bool: def upload_files( self, file_paths: List[str], bucket_name: str, - product: str, root="/" + product: str, root="/", key_prefix: str = None ) -> Tuple[List[str], List[str]]: """ Upload a list of files to s3 bucket. * Use the cut down file path as s3 key. The cut down way is move root from the file path if it starts with root. Example: if file_path is @@ -115,7 +115,8 @@ def path_upload_handler(full_file_path: str, path: str) -> bool: full_file_path, product) return False logger.info('Uploading %s to bucket %s', full_file_path, bucket_name) - fileObject = bucket.Object(path) + path_key = os.path.join(key_prefix, path) if key_prefix else path + fileObject = bucket.Object(path_key) existed = self.file_exists(fileObject) sha1 = read_sha1(full_file_path) (content_type, _) = mimetypes.guess_type(full_file_path) @@ -141,7 +142,7 @@ def path_upload_handler(full_file_path: str, path: str) -> bool: ExtraArgs={'ContentType': content_type} ) logger.info('Uploaded %s to bucket %s', full_file_path, bucket_name) - uploaded_files.append(path) + uploaded_files.append(path_key) except (ClientError, HTTPClientError) as e: logger.error("ERROR: file %s not uploaded to bucket" " %s due to error: %s ", full_file_path, @@ -158,7 +159,7 @@ def path_upload_handler(full_file_path: str, path: str) -> bool: ) if checksum != "" and checksum.strip() != sha1: logger.error('Error: checksum check failed. The file %s is different from the ' - 'one in S3. Product: %s', path, product) + 'one in S3. Product: %s', path_key, product) return False prods = [] @@ -173,7 +174,7 @@ def path_upload_handler(full_file_path: str, path: str) -> bool: product, ) prods.append(product) - self.__update_file_metadata(fileObject, bucket_name, path, + self.__update_file_metadata(fileObject, bucket_name, path_key, {PRODUCT_META_KEY: ",".join(prods)}) return True @@ -183,7 +184,7 @@ def path_upload_handler(full_file_path: str, path: str) -> bool: def upload_metadatas( self, meta_file_paths: List[str], bucket_name: str, - product: str, root="/" + product: str, root="/", key_prefix: str = None ) -> Tuple[List[str], List[str]]: """ Upload a list of metadata files to s3 bucket. This function is very similar to upload_files, except: @@ -201,7 +202,8 @@ def path_upload_handler(full_file_path: str, path: str): full_file_path, product) return False logger.info('Updating metadata %s to bucket %s', path, bucket_name) - fileObject = bucket.Object(path) + path_key = os.path.join(key_prefix, path) if key_prefix else path + fileObject = bucket.Object(path_key) existed = self.file_exists(fileObject) f_meta = {} need_overwritten = True @@ -233,9 +235,9 @@ def path_upload_handler(full_file_path: str, path: str): ContentType=content_type ) else: - self.__update_file_metadata(fileObject, bucket_name, path, f_meta) + self.__update_file_metadata(fileObject, bucket_name, path_key, f_meta) logger.info('Updated metadata %s to bucket %s', path, bucket_name) - uploaded_files.append(path) + uploaded_files.append(path_key) except (ClientError, HTTPClientError) as e: logger.error("ERROR: file %s not uploaded to bucket" " %s due to error: %s ", full_file_path, @@ -248,7 +250,8 @@ def path_upload_handler(full_file_path: str, path: str): )) def delete_files( - self, file_paths: List[str], bucket_name: str, product: str, root="/" + self, file_paths: List[str], bucket_name: str, + product: str, root="/", key_prefix: str = None ) -> Tuple[List[str], List[str]]: """ Deletes a list of files to s3 bucket. * Use the cut down file path as s3 key. The cut down way is move root from the file path if it starts with root. Example: if file_path is @@ -266,7 +269,8 @@ def delete_files( def path_delete_handler(full_file_path: str, path: str): logger.info('Deleting %s from bucket %s', path, bucket_name) - fileObject = bucket.Object(path) + path_key = os.path.join(key_prefix, path) if key_prefix else path + fileObject = bucket.Object(path_key) existed = self.file_exists(fileObject) if existed: prods = [] @@ -286,7 +290,7 @@ def path_delete_handler(full_file_path: str, path: str): self.__update_file_metadata( fileObject, bucket_name, - path, + path_key, {PRODUCT_META_KEY: ",".join(prods)}, ) logger.info( @@ -303,7 +307,7 @@ def path_delete_handler(full_file_path: str, path: str): elif len(prods) == 0: try: if not self.dry_run: - bucket.delete_objects(Delete={"Objects": [{"Key": path}]}) + bucket.delete_objects(Delete={"Objects": [{"Key": path_key}]}) logger.info("Deleted %s from bucket %s", path, bucket_name) deleted_files.append(path) return True @@ -416,7 +420,8 @@ def __update_file_metadata( ) def __do_path_cut_and( - self, file_paths: List[str], fn: Callable[[str, str], bool], root="/" + self, file_paths: List[str], + fn: Callable[[str, str], bool], root="/" ) -> List[str]: slash_root = root if not root.endswith("/"): diff --git a/charon/utils/archive.py b/charon/utils/archive.py index 3dba1ec5..9b46858c 100644 --- a/charon/utils/archive.py +++ b/charon/utils/archive.py @@ -149,7 +149,7 @@ def download_archive(url: str, base_dir=None) -> str: local_filename = os.path.join(dir_, url.split('/')[-1]) # NOTE the stream=True parameter below # NOTE(2) timeout=30 parameter to set a 30-second timeout, and prevent indefinite hang. - with requests.get(url, stream=True, timeout=30) as r: + with requests.get(url, stream=True, timeout=30, verify=False) as r: r.raise_for_status() with open(local_filename, 'wb') as f: for chunk in r.iter_content(chunk_size=8192): diff --git a/charon/utils/strings.py b/charon/utils/strings.py new file mode 100644 index 00000000..a3f0ff59 --- /dev/null +++ b/charon/utils/strings.py @@ -0,0 +1,4 @@ +def remove_prefix(string: str, prefix: str) -> str: + if string and prefix and string.startswith(prefix): + return string[len(prefix):] + return string diff --git a/tests/base.py b/tests/base.py index 37ac4e76..1eaee35e 100644 --- a/tests/base.py +++ b/tests/base.py @@ -20,6 +20,9 @@ from charon.utils.files import write_file from charon.config import CONFIG_FILE +SHORT_TEST_PREFIX = "ga" +LONG_TEST_PREFIX = "earlyaccess/all" + class BaseTest(unittest.TestCase): def setUp(self): diff --git a/tests/test_maven_del.py b/tests/test_maven_del.py index d5fcbc0c..f079dbe5 100644 --- a/tests/test_maven_del.py +++ b/tests/test_maven_del.py @@ -1,6 +1,7 @@ from charon.pkgs.maven import handle_maven_uploading, handle_maven_del from charon.storage import PRODUCT_META_KEY, CHECKSUM_META_KEY -from tests.base import BaseTest +from charon.utils.strings import remove_prefix +from tests.base import LONG_TEST_PREFIX, SHORT_TEST_PREFIX, BaseTest from moto import mock_s3 import boto3 import os @@ -184,17 +185,87 @@ def test_ignore_del(self): for f in non_sha1_files: self.assertNotIn(f, actual_files) - def __prepare_content(self): + def test_short_prefix_deletion(self): + self.__test_prefix_deletion(SHORT_TEST_PREFIX) + + def test_long_prefix_deletion(self): + self.__test_prefix_deletion(LONG_TEST_PREFIX) + + def test_root_prefix_deletion(self): + self.__test_prefix_deletion("/") + + def __test_prefix_deletion(self, prefix: str): + self.__prepare_content(prefix) + + test_zip = os.path.join(os.getcwd(), "tests/input/commons-client-4.5.6.zip") + product_456 = "commons-client-4.5.6" + handle_maven_del( + test_zip, product_456, + bucket_name=TEST_BUCKET, + prefix=prefix, + dir_=self.tempdir, do_index=False + ) + + test_bucket = self.mock_s3.Bucket(TEST_BUCKET) + objs = list(test_bucket.objects.all()) + actual_files = [obj.key for obj in objs] + self.assertEqual(12, len(actual_files)) + + prefix_ = remove_prefix(prefix, "/") + PREFIXED_COMMONS_CLIENT_456_FILES = [ + os.path.join(prefix_, f) for f in COMMONS_CLIENT_456_FILES] + for f in PREFIXED_COMMONS_CLIENT_456_FILES: + self.assertNotIn(f, actual_files) + PREFIXED_COMMONS_CLIENT_META = os.path.join(prefix_, COMMONS_CLIENT_META) + self.assertIn(PREFIXED_COMMONS_CLIENT_META, actual_files) + + PREFIXED_COMMONS_LOGGING_FILES = [ + os.path.join(prefix_, f) for f in COMMONS_LOGGING_FILES] + for f in PREFIXED_COMMONS_LOGGING_FILES: + self.assertIn(f, actual_files) + PREFIXED_COMMONS_LOGGING_META = os.path.join(prefix_, COMMONS_LOGGING_META) + self.assertIn(PREFIXED_COMMONS_LOGGING_META, actual_files) + + for obj in objs: + self.assertIn(CHECKSUM_META_KEY, obj.Object().metadata) + self.assertNotEqual("", obj.Object().metadata[CHECKSUM_META_KEY].strip()) + + product_459 = "commons-client-4.5.9" + meta_obj_client = test_bucket.Object(PREFIXED_COMMONS_CLIENT_META) + self.assertEqual(product_459, meta_obj_client.metadata[PRODUCT_META_KEY]) + + meta_obj_logging = test_bucket.Object(PREFIXED_COMMONS_LOGGING_META) + self.assertEqual(product_459, meta_obj_logging.metadata[PRODUCT_META_KEY]) + + test_zip = os.path.join(os.getcwd(), "tests/input/commons-client-4.5.9.zip") + handle_maven_del( + test_zip, product_459, + bucket_name=TEST_BUCKET, + prefix=prefix, + dir_=self.tempdir, + do_index=False + ) + + objs = list(test_bucket.objects.all()) + self.assertEqual(0, len(objs)) + + def __prepare_content(self, prefix=None): test_zip = os.path.join(os.getcwd(), "tests/input/commons-client-4.5.6.zip") product_456 = "commons-client-4.5.6" handle_maven_uploading( test_zip, product_456, - bucket_name=TEST_BUCKET, dir_=self.tempdir, do_index=False + bucket_name=TEST_BUCKET, + prefix=prefix, + dir_=self.tempdir, + do_index=False ) test_zip = os.path.join(os.getcwd(), "tests/input/commons-client-4.5.9.zip") product_459 = "commons-client-4.5.9" handle_maven_uploading( test_zip, product_459, - bucket_name=TEST_BUCKET, dir_=self.tempdir, do_index=False + bucket_name=TEST_BUCKET, + prefix=prefix, + dir_=self.tempdir, + do_index=False ) diff --git a/tests/test_maven_index.py b/tests/test_maven_index.py index 8e021a79..1fb0b4d5 100644 --- a/tests/test_maven_index.py +++ b/tests/test_maven_index.py @@ -15,7 +15,8 @@ """ from charon.pkgs.maven import handle_maven_uploading, handle_maven_del from charon.storage import CHECKSUM_META_KEY -from tests.base import BaseTest +from charon.utils.strings import remove_prefix +from tests.base import LONG_TEST_PREFIX, SHORT_TEST_PREFIX, BaseTest from moto import mock_s3 import boto3 import os @@ -162,6 +163,66 @@ def test_overlap_upload_index(self): ) self.assertNotIn("../", index_content) + def test_upload_index_with_short_prefix(self): + self.__test_upload_index_with_prefix(SHORT_TEST_PREFIX) + + def test_upload_index_with_long_prefix(self): + self.__test_upload_index_with_prefix(LONG_TEST_PREFIX) + + def test_upload_index_with_root_prefix(self): + self.__test_upload_index_with_prefix("/") + + def __test_upload_index_with_prefix(self, prefix: str): + test_zip = os.path.join(os.getcwd(), "tests/input/commons-client-4.5.6.zip") + product = "commons-client-4.5.6" + handle_maven_uploading( + test_zip, product, + bucket_name=TEST_BUCKET, + dir_=self.tempdir, + prefix=prefix + ) + + test_bucket = self.mock_s3.Bucket(TEST_BUCKET) + objs = list(test_bucket.objects.all()) + actual_files = [obj.key for obj in objs] + self.assertEqual(21, len(actual_files)) + + prefix_ = remove_prefix(prefix, "/") + PREFIXED_LOGGING_INDEXES = [ + os.path.join(prefix_, i) for i in COMMONS_LOGGING_INDEXES + ] + for f in PREFIXED_LOGGING_INDEXES: + self.assertIn(f, actual_files) + + PREFIXED_456_INDEXES = [ + os.path.join(prefix_, i) for i in COMMONS_CLIENT_456_INDEXES + ] + for f in PREFIXED_456_INDEXES: + self.assertIn(f, actual_files) + + for obj in objs: + self.assertIn(CHECKSUM_META_KEY, obj.Object().metadata) + self.assertNotEqual("", obj.Object().metadata[CHECKSUM_META_KEY].strip()) + + indedx_obj = test_bucket.Object(os.path.join(prefix_, COMMONS_CLIENT_INDEX)) + index_content = str(indedx_obj.get()["Body"].read(), "utf-8") + self.assertIn("4.5.6/", index_content) + self.assertIn( + "maven-metadata.xml", + index_content + ) + self.assertIn("../", index_content) + + indedx_obj = test_bucket.Object(os.path.join(prefix_, COMMONS_ROOT_INDEX)) + index_content = str(indedx_obj.get()["Body"].read(), "utf-8") + self.assertIn("org/", index_content) + self.assertIn( + "commons-logging/", + index_content + ) + self.assertNotIn("../", index_content) + def test_deletion_index(self): self.__prepare_content() @@ -218,12 +279,85 @@ def test_deletion_index(self): objs = list(test_bucket.objects.all()) self.assertEqual(0, len(objs)) - def __prepare_content(self): + def test_deletion_index_with_short_prefix(self): + self.__test_deletion_index_with_prefix(SHORT_TEST_PREFIX) + + def test_deletion_index_with_long_prefix(self): + self.__test_deletion_index_with_prefix(LONG_TEST_PREFIX) + + def test_deletion_index_with_root_prefix(self): + self.__test_deletion_index_with_prefix("/") + + def __test_deletion_index_with_prefix(self, prefix: str): + self.__prepare_content(prefix) + + test_zip = os.path.join(os.getcwd(), "tests/input/commons-client-4.5.6.zip") + product_456 = "commons-client-4.5.6" + handle_maven_del( + test_zip, product_456, + bucket_name=TEST_BUCKET, + prefix=prefix, + dir_=self.tempdir + ) + + test_bucket = self.mock_s3.Bucket(TEST_BUCKET) + objs = list(test_bucket.objects.all()) + actual_files = [obj.key for obj in objs] + self.assertEqual(21, len(actual_files)) + + prefix_ = remove_prefix(prefix, "/") + PREFIXED_459_INDEXES = [os.path.join(prefix_, i) for i in COMMONS_CLIENT_459_INDEXES] + for assert_file in PREFIXED_459_INDEXES: + self.assertIn(assert_file, actual_files) + + PREFIXED_LOGGING_INDEXES = [os.path.join(prefix_, i) for i in COMMONS_LOGGING_INDEXES] + for assert_file in PREFIXED_LOGGING_INDEXES: + self.assertIn(assert_file, actual_files) + + self.assertNotIn(os.path.join(prefix_, COMMONS_CLIENT_456_INDEX), actual_files) + + for obj in objs: + self.assertIn(CHECKSUM_META_KEY, obj.Object().metadata) + self.assertNotEqual("", obj.Object().metadata[CHECKSUM_META_KEY].strip()) + + indedx_obj = test_bucket.Object(os.path.join(prefix_, COMMONS_CLIENT_INDEX)) + index_content = str(indedx_obj.get()["Body"].read(), "utf-8") + self.assertIn("4.5.9/", index_content) + self.assertIn("../", index_content) + self.assertIn( + "maven-metadata.xml", + index_content) + self.assertNotIn("4.5.6/", index_content) + + indedx_obj = test_bucket.Object(os.path.join(prefix_, COMMONS_ROOT_INDEX)) + index_content = str(indedx_obj.get()["Body"].read(), "utf-8") + self.assertIn("org/", index_content) + self.assertIn( + "commons-logging/", + index_content + ) + self.assertNotIn("../", index_content) + + product_459 = "commons-client-4.5.9" + test_zip = os.path.join(os.getcwd(), "tests/input/commons-client-4.5.9.zip") + handle_maven_del( + test_zip, product_459, + bucket_name=TEST_BUCKET, + prefix=prefix, + dir_=self.tempdir + ) + + objs = list(test_bucket.objects.all()) + self.assertEqual(0, len(objs)) + + def __prepare_content(self, prefix=None): test_zip = os.path.join(os.getcwd(), "tests/input/commons-client-4.5.6.zip") product_456 = "commons-client-4.5.6" handle_maven_uploading( test_zip, product_456, bucket_name=TEST_BUCKET, + prefix=prefix, dir_=self.tempdir ) @@ -232,5 +366,6 @@ def __prepare_content(self): handle_maven_uploading( test_zip, product_459, bucket_name=TEST_BUCKET, + prefix=prefix, dir_=self.tempdir ) diff --git a/tests/test_maven_upload.py b/tests/test_maven_upload.py index f5a1ced6..45d1351f 100644 --- a/tests/test_maven_upload.py +++ b/tests/test_maven_upload.py @@ -1,6 +1,7 @@ from charon.pkgs.maven import handle_maven_uploading from charon.storage import PRODUCT_META_KEY, CHECKSUM_META_KEY -from tests.base import BaseTest +from charon.utils.strings import remove_prefix +from tests.base import BaseTest, SHORT_TEST_PREFIX, LONG_TEST_PREFIX from moto import mock_s3 import boto3 import os @@ -215,3 +216,59 @@ def test_ignore_upload(self): for f in ignored_files: self.assertNotIn(f, actual_files) + + def test_short_prefix_upload(self): + self.__test_prefix_upload(SHORT_TEST_PREFIX) + + def test_long_prefix_upload(self): + self.__test_prefix_upload(LONG_TEST_PREFIX) + + def test_root_prefix_upload(self): + self.__test_prefix_upload("/") + + def __test_prefix_upload(self, prefix: str): + test_zip = os.path.join(os.getcwd(), "tests/input/commons-client-4.5.6.zip") + product = "commons-client-4.5.6" + handle_maven_uploading( + test_zip, product, + bucket_name=TEST_BUCKET, + prefix=prefix, + dir_=self.tempdir, + do_index=False + ) + + test_bucket = self.mock_s3.Bucket(TEST_BUCKET) + objs = list(test_bucket.objects.all()) + actual_files = [obj.key for obj in objs] + self.assertEqual(12, len(actual_files)) + + prefix_ = remove_prefix(prefix, "/") + PREFIXED_COMMONS_CLIENT_456_FILES = [ + os.path.join(prefix_, f) for f in COMMONS_CLIENT_456_FILES] + for f in PREFIXED_COMMONS_CLIENT_456_FILES: + self.assertIn(f, actual_files) + PREFIXED_COMMONS_CLIENT_META = os.path.join(prefix_, COMMONS_CLIENT_META) + self.assertIn(PREFIXED_COMMONS_CLIENT_META, actual_files) + + PREFIXED_COMMONS_LOGGING_FILES = [ + os.path.join(prefix_, f) for f in COMMONS_LOGGING_FILES] + for f in PREFIXED_COMMONS_LOGGING_FILES: + self.assertIn(f, actual_files) + PREFIXED_COMMONS_LOGGING_META = os.path.join(prefix_, COMMONS_LOGGING_META) + self.assertIn(PREFIXED_COMMONS_LOGGING_META, actual_files) + + PREFIXED_NON_MVN_FILES = [ + os.path.join(prefix_, f) for f in NON_MVN_FILES] + for f in PREFIXED_NON_MVN_FILES: + self.assertNotIn(f, actual_files) + + for obj in objs: + self.assertEqual(product, obj.Object().metadata[PRODUCT_META_KEY]) + self.assertIn(CHECKSUM_META_KEY, obj.Object().metadata) + self.assertNotEqual("", obj.Object().metadata[CHECKSUM_META_KEY].strip()) + + meta_obj_client = test_bucket.Object(PREFIXED_COMMONS_CLIENT_META) + self.assertIsNotNone(meta_obj_client) + + meta_obj_logging = test_bucket.Object(PREFIXED_COMMONS_LOGGING_META) + self.assertIsNotNone(meta_obj_logging) diff --git a/tests/test_s3client.py b/tests/test_s3client.py index 6dc7aeb7..596c14db 100644 --- a/tests/test_s3client.py +++ b/tests/test_s3client.py @@ -16,7 +16,7 @@ from charon.storage import S3Client, PRODUCT_META_KEY, CHECKSUM_META_KEY from charon.utils.archive import extract_zip_all from charon.utils.files import write_file, read_sha1 -from tests.base import BaseTest +from tests.base import BaseTest, SHORT_TEST_PREFIX from moto import mock_s3 import boto3 import os @@ -185,6 +185,34 @@ def test_upload_and_delete_files(self): shutil.rmtree(temp_root) + def test_upload_and_delete_with_prefix(self): + (temp_root, root, all_files) = self.__prepare_files() + test_files = list(filter(lambda f: f.startswith(root), all_files)) + + bucket = self.mock_s3.Bucket(MY_BUCKET) + + self.s3_client.upload_files( + test_files, + bucket_name=MY_BUCKET, + product="apache-commons", + root=root, + key_prefix=SHORT_TEST_PREFIX) + objects = list(bucket.objects.all()) + self.assertEqual(COMMONS_LANG3_ZIP_ENTRY - 4, len(objects)) + for obj in objects: + self.assertTrue(obj.key.startswith(SHORT_TEST_PREFIX)) + + self.s3_client.delete_files( + file_paths=test_files, + bucket_name=MY_BUCKET, + product="apache-commons", + root=root, + key_prefix=SHORT_TEST_PREFIX) + objects = list(bucket.objects.all()) + self.assertEqual(0, len(list(bucket.objects.all()))) + + shutil.rmtree(temp_root) + def test_upload_file_with_checksum(self): temp_root = os.path.join(self.tempdir, "tmp_upd") os.mkdir(temp_root)