From 29594e57b7d9b31df6757af896b5db8eaeb6415b Mon Sep 17 00:00:00 2001 From: yma Date: Mon, 13 Oct 2025 15:50:03 +0800 Subject: [PATCH 1/6] Feat: Accept multiple maven zips with non-RADAS signing way --- charon/cmd/cmd_upload.py | 65 ++++++++------- charon/cmd/internal.py | 8 ++ charon/pkgs/maven.py | 140 ++++++++++++++++++++++++++++++++- charon/utils/archive.py | 13 +++ tests/test_archive.py | 33 +++++++- tests/test_extract_tarballs.py | 31 ++++++++ tests/test_maven_upload.py | 62 +++++++++++++++ 7 files changed, 320 insertions(+), 32 deletions(-) create mode 100644 tests/test_extract_tarballs.py diff --git a/charon/cmd/cmd_upload.py b/charon/cmd/cmd_upload.py index a867df01..dcc24a88 100644 --- a/charon/cmd/cmd_upload.py +++ b/charon/cmd/cmd_upload.py @@ -16,12 +16,12 @@ from typing import List from charon.config import get_config -from charon.utils.archive import detect_npm_archive, NpmArchiveType +from charon.utils.archive import detect_npm_archives, NpmArchiveType from charon.pkgs.maven import handle_maven_uploading from charon.pkgs.npm import handle_npm_uploading from charon.cmd.internal import ( _decide_mode, _validate_prod_key, - _get_local_repo, _get_targets, + _get_local_repos, _get_targets, _get_ignore_patterns, _safe_delete ) from click import command, option, argument @@ -35,8 +35,10 @@ @argument( - "repo", + "repos", type=str, + nargs='+', # This allows multiple arguments for zip urls + required=True ) @option( "--product", @@ -138,7 +140,7 @@ @option("--dryrun", "-n", is_flag=True, default=False) @command() def upload( - repo: str, + repos: List[str], product: str, version: str, targets: List[str], @@ -152,9 +154,9 @@ def upload( quiet=False, dryrun=False ): - """Upload all files from a released product REPO to Ronda - Service. The REPO points to a product released tarball which - is hosted in a remote url or a local path. + """Upload all files from released product REPOs to Ronda + Service. The REPOs point to a product released tarballs which + are hosted in remote urls or local paths. """ tmp_dir = work_dir try: @@ -173,8 +175,8 @@ def upload( logger.error("No AWS profile specified!") sys.exit(1) - archive_path = _get_local_repo(repo) - npm_archive_type = detect_npm_archive(archive_path) + archive_paths = _get_local_repos(repos) + archive_types = detect_npm_archives(archive_paths) product_key = f"{product}-{version}" manifest_bucket_name = conf.get_manifest_bucket() targets_ = _get_targets(targets, conf) @@ -185,23 +187,10 @@ def upload( " are set correctly.", targets_ ) sys.exit(1) - if npm_archive_type != NpmArchiveType.NOT_NPM: - logger.info("This is a npm archive") - tmp_dir, succeeded = handle_npm_uploading( - archive_path, - product_key, - targets=targets_, - aws_profile=aws_profile, - dir_=work_dir, - gen_sign=contain_signature, - cf_enable=conf.is_aws_cf_enable(), - key=sign_key, - dry_run=dryrun, - manifest_bucket_name=manifest_bucket_name - ) - if not succeeded: - sys.exit(1) - else: + + maven_count = archive_types.count(NpmArchiveType.NOT_NPM) + npm_count = len(archive_types) - maven_count + if maven_count == len(archive_types): ignore_patterns_list = None if ignore_patterns: ignore_patterns_list = ignore_patterns @@ -209,7 +198,7 @@ def upload( ignore_patterns_list = _get_ignore_patterns(conf) logger.info("This is a maven archive") tmp_dir, succeeded = handle_maven_uploading( - archive_path, + archive_paths, product_key, ignore_patterns_list, root=root_path, @@ -225,6 +214,28 @@ def upload( ) if not succeeded: sys.exit(1) + elif npm_count == len(archive_types) and len(archive_types) == 1: + logger.info("This is a npm archive") + tmp_dir, succeeded = handle_npm_uploading( + archive_paths[0], + product_key, + targets=targets_, + aws_profile=aws_profile, + dir_=work_dir, + gen_sign=contain_signature, + cf_enable=conf.is_aws_cf_enable(), + key=sign_key, + dry_run=dryrun, + manifest_bucket_name=manifest_bucket_name + ) + if not succeeded: + sys.exit(1) + elif npm_count == len(archive_types) and len(archive_types) > 1: + logger.error("Doesn't support multiple upload for npm") + sys.exit(1) + else: + logger.error("Upload types are not consistent") + sys.exit(1) except Exception: print(traceback.format_exc()) sys.exit(2) # distinguish between exception and bad config or bad state diff --git a/charon/cmd/internal.py b/charon/cmd/internal.py index e7e7d14a..89d4ea1b 100644 --- a/charon/cmd/internal.py +++ b/charon/cmd/internal.py @@ -75,6 +75,14 @@ def _get_local_repo(url: str) -> str: return archive_path +def _get_local_repos(urls: list) -> list: + archive_paths = [] + for url in urls: + archive_path = _get_local_repo(url) + archive_paths.append(archive_path) + return archive_paths + + def _validate_prod_key(product: str, version: str) -> bool: if not product or product.strip() == "": logger.error("Error: product can not be empty!") diff --git a/charon/pkgs/maven.py b/charon/pkgs/maven.py index 9f50f35b..b183c474 100644 --- a/charon/pkgs/maven.py +++ b/charon/pkgs/maven.py @@ -32,11 +32,12 @@ META_FILE_FAILED, MAVEN_METADATA_TEMPLATE, ARCHETYPE_CATALOG_TEMPLATE, ARCHETYPE_CATALOG_FILENAME, PACKAGE_TYPE_MAVEN) -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, Union from jinja2 import Template from datetime import datetime from zipfile import ZipFile, BadZipFile from tempfile import mkdtemp +from shutil import rmtree, copy2 from defusedxml import ElementTree import os @@ -261,7 +262,7 @@ def __gen_digest_file(hash_file_path, meta_file_path: str, hashtype: HashType) - def handle_maven_uploading( - repo: str, + repos: Union[str, List[str]], prod_key: str, ignore_patterns=None, root="maven-repository", @@ -294,8 +295,10 @@ def handle_maven_uploading( """ if targets is None: targets = [] - # 1. extract tarball - tmp_root = _extract_tarball(repo, prod_key, dir__=dir_) + if isinstance(repos, str): + repos = [repos] + # 1. extract tarballs + tmp_root = _extract_tarballs(repos, root, prod_key, dir__=dir_) # 2. scan for paths and filter out the ignored paths, # and also collect poms for later metadata generation @@ -673,6 +676,135 @@ def _extract_tarball(repo: str, prefix="", dir__=None) -> str: sys.exit(1) +def _extract_tarballs(repos: List[str], root: str, prefix="", dir__=None) -> str: + """ Extract multiple zip archives to a temporary directory. + * repos are the list of repo paths to extract + * root is a prefix in the tarball to identify which path is + the beginning of the maven GAV path + * prefix is the prefix for temporary directory name + * dir__ is the directory where temporary directories will be created. + + Returns the path to the merged temporary directory containing all extracted files + """ + # Create final merge directory + final_tmp_root = mkdtemp(prefix=f"charon-{prefix}-final-", dir=dir__) + + total_copied = 0 + total_overwritten = 0 + total_processed = 0 + + # Collect all extracted directories first + extracted_dirs = [] + + for repo in repos: + if os.path.exists(repo): + try: + logger.info("Extracting tarball %s", repo) + repo_zip = ZipFile(repo) + tmp_root = mkdtemp(prefix=f"charon-{prefix}-", dir=dir__) + extract_zip_all(repo_zip, tmp_root) + extracted_dirs.append(tmp_root) + + except BadZipFile as e: + logger.error("Tarball extraction error: %s", e) + sys.exit(1) + else: + logger.error("Error: archive %s does not exist", repo) + sys.exit(1) + + # Merge all extracted directories + if extracted_dirs: + # Get top-level directory names for merged from all repos + top_level_merged_name_dirs = [] + for extracted_dir in extracted_dirs: + for item in os.listdir(extracted_dir): + item_path = os.path.join(extracted_dir, item) + # Check the root maven-repository subdirectory existence + maven_repo_path = os.path.join(item_path, root) + if os.path.isdir(item_path) and os.path.exists(maven_repo_path): + top_level_merged_name_dirs.append(item) + break + + # Create merged directory name + merged_dir_name = ( + "_".join(top_level_merged_name_dirs) if top_level_merged_name_dirs else "merged" + ) + merged_dest_dir = os.path.join(final_tmp_root, merged_dir_name) + + # Merge content from all extracted directories + for extracted_dir in extracted_dirs: + copied, overwritten, processed = _merge_directories_with_rename( + extracted_dir, merged_dest_dir, root + ) + total_copied += copied + total_overwritten += overwritten + total_processed += processed + + # Clean up temporary extraction directory + rmtree(extracted_dir) + + logger.info( + "All zips merged! Total copied: %s, Total overwritten: %s, Total processed: %s", + total_copied, + total_overwritten, + total_processed, + ) + return final_tmp_root + + +def _merge_directories_with_rename(src_dir: str, dest_dir: str, root: str): + """ Recursively copy files from src_dir to dest_dir, overwriting existing files. + * src_dir is the source directory to copy from + * dest_dir is the destination directory to copy to. + + Returns Tuple of (copied_count, overwritten_count, processed_count) + """ + copied_count = 0 + overwritten_count = 0 + processed_count = 0 + + # Find the actual content directory + content_root = src_dir + for item in os.listdir(src_dir): + item_path = os.path.join(src_dir, item) + # Check the root maven-repository subdirectory existence + maven_repo_path = os.path.join(item_path, root) + if os.path.isdir(item_path) and os.path.exists(maven_repo_path): + content_root = item_path + break + + # pylint: disable=unused-variable + for root_dir, dirs, files in os.walk(content_root): + # Calculate relative path from content root + rel_path = os.path.relpath(root_dir, content_root) + dest_root = os.path.join(dest_dir, rel_path) if rel_path != '.' else dest_dir + + # Create destination directory if it doesn't exist + os.makedirs(dest_root, exist_ok=True) + + # Copy all files, overwriting existing ones + for file in files: + src_file = os.path.join(root_dir, file) + dest_file = os.path.join(dest_root, file) + if os.path.exists(dest_file): + overwritten_count += 1 + logger.debug("Overwritten: %s -> %s", src_file, dest_file) + else: + copied_count += 1 + logger.debug("Copied: %s -> %s", src_file, dest_file) + + processed_count += 1 + copy2(src_file, dest_file) + + logger.info( + "One zip merged! Files copied: %s, Files overwritten: %s, Total files processed: %s", + copied_count, + overwritten_count, + processed_count, + ) + return copied_count, overwritten_count, processed_count + + def _scan_paths(files_root: str, ignore_patterns: List[str], root: str) -> Tuple[str, List[str], List[str], List[str]]: # 2. scan for paths and filter out the ignored paths, diff --git a/charon/utils/archive.py b/charon/utils/archive.py index 4a1f256c..058fa17e 100644 --- a/charon/utils/archive.py +++ b/charon/utils/archive.py @@ -182,6 +182,19 @@ def detect_npm_archive(repo): return NpmArchiveType.NOT_NPM +def detect_npm_archives(repos): + """Detects, if the archives need to have npm workflow. + :parameter repos list of repository directories + :return list of NpmArchiveType values + """ + results = [] + for repo in repos: + result = detect_npm_archive(repo) + results.append(result) + + return results + + def download_archive(url: str, base_dir=None) -> str: dir_ = base_dir if not dir_ or not os.path.isdir(dir_): diff --git a/tests/test_archive.py b/tests/test_archive.py index 0e2ac09a..22cf48fd 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -1,5 +1,5 @@ from tests.base import BaseTest -from charon.utils.archive import NpmArchiveType, detect_npm_archive +from charon.utils.archive import NpmArchiveType, detect_npm_archive, detect_npm_archives import os from tests.constants import INPUTS @@ -12,5 +12,36 @@ def test_detect_package(self): npm_tarball = os.path.join(INPUTS, "code-frame-7.14.5.tgz") self.assertEqual(NpmArchiveType.TAR_FILE, detect_npm_archive(npm_tarball)) + def test_detect_packages(self): + mvn_tarballs = [ + os.path.join(INPUTS, "commons-client-4.5.6.zip"), + os.path.join(INPUTS, "commons-client-4.5.9.zip") + ] + archive_types = detect_npm_archives(mvn_tarballs) + self.assertEqual(2, archive_types.count(NpmArchiveType.NOT_NPM)) + + npm_tarball = [ + os.path.join(INPUTS, "code-frame-7.14.5.tgz") + ] + archive_types = detect_npm_archives(npm_tarball) + self.assertEqual(1, archive_types.count(NpmArchiveType.TAR_FILE)) + + npm_tarballs = [ + os.path.join(INPUTS, "code-frame-7.14.5.tgz"), + os.path.join(INPUTS, "code-frame-7.15.8.tgz") + ] + archive_types = detect_npm_archives(npm_tarballs) + self.assertEqual(2, archive_types.count(NpmArchiveType.TAR_FILE)) + + tarballs = [ + os.path.join(INPUTS, "commons-client-4.5.6.zip"), + os.path.join(INPUTS, "commons-client-4.5.9.zip"), + os.path.join(INPUTS, "code-frame-7.14.5.tgz"), + os.path.join(INPUTS, "code-frame-7.15.8.tgz") + ] + archive_types = detect_npm_archives(tarballs) + self.assertEqual(2, archive_types.count(NpmArchiveType.NOT_NPM)) + self.assertEqual(2, archive_types.count(NpmArchiveType.TAR_FILE)) + def test_download_archive(self): pass diff --git a/tests/test_extract_tarballs.py b/tests/test_extract_tarballs.py new file mode 100644 index 00000000..22190bfc --- /dev/null +++ b/tests/test_extract_tarballs.py @@ -0,0 +1,31 @@ +from tests.base import BaseTest +from charon.pkgs.maven import _extract_tarballs +import os + +from tests.constants import INPUTS + + +class ArchiveTest(BaseTest): + def test_extract_tarballs(self): + mvn_tarballs = [ + os.path.join(INPUTS, "commons-client-4.5.6.zip"), + os.path.join(INPUTS, "commons-client-4.5.9.zip"), + ] + final_merged_path = _extract_tarballs(mvn_tarballs, "maven-repository") + expected_dir = os.path.join( + final_merged_path, "commons-client-4.5.6_commons-client-4.5.9", "maven-repository" + ) + self.assertTrue(os.path.exists(expected_dir)) + + expected_files = [ + "org/apache/httpcomponents/httpclient/4.5.9/httpclient-4.5.9.jar", + "org/apache/httpcomponents/httpclient/4.5.9/httpclient-4.5.9.pom", + "org/apache/httpcomponents/httpclient/4.5.6/httpclient-4.5.6.jar", + "org/apache/httpcomponents/httpclient/4.5.6/httpclient-4.5.6.pom", + ] + for expected_file in expected_files: + file_path = os.path.join(expected_dir, expected_file) + self.assertTrue(os.path.exists(file_path)) + + def test_download_archive(self): + pass diff --git a/tests/test_maven_upload.py b/tests/test_maven_upload.py index 629a9e3f..6f40a8ca 100644 --- a/tests/test_maven_upload.py +++ b/tests/test_maven_upload.py @@ -110,6 +110,68 @@ def test_overlap_upload(self): self.assertIn("httpclient", cat_content) self.assertIn("org.apache.httpcomponents", cat_content) + def test_multi_zips_upload(self): + mvn_tarballs = [ + os.path.join(INPUTS, "commons-client-4.5.6.zip"), + os.path.join(INPUTS, "commons-client-4.5.9.zip") + ] + product_45 = "commons-client-4.5" + + handle_maven_uploading( + mvn_tarballs, product_45, + targets=[('', TEST_BUCKET, '', '')], + dir_=self.tempdir, do_index=False + ) + + objs = list(self.test_bucket.objects.all()) + actual_files = [obj.key for obj in objs] + # need to double mvn num because of .prodinfo files + self.assertEqual( + COMMONS_CLIENT_MVN_NUM * 2 + COMMONS_CLIENT_META_NUM, + len(actual_files) + ) + + filesets = [ + COMMONS_CLIENT_METAS, COMMONS_CLIENT_456_FILES, + COMMONS_CLIENT_459_FILES, + ARCHETYPE_CATALOG_FILES + ] + for fileset in filesets: + for f in fileset: + self.assertIn(f, actual_files) + + product_mix = [product_45] + for f in COMMONS_LOGGING_FILES: + self.assertIn(f, actual_files) + self.check_product(f, product_mix) + for f in COMMONS_LOGGING_METAS: + self.assertIn(f, actual_files) + + meta_obj_client = self.test_bucket.Object(COMMONS_CLIENT_METAS[0]) + meta_content_client = str(meta_obj_client.get()["Body"].read(), "utf-8") + self.assertIn( + "org.apache.httpcomponents", meta_content_client + ) + self.assertIn("httpclient", meta_content_client) + self.assertIn("4.5.9", meta_content_client) + self.assertIn("4.5.9", meta_content_client) + self.assertIn("4.5.6", meta_content_client) + self.assertIn("4.5.9", meta_content_client) + + meta_obj_logging = self.test_bucket.Object(COMMONS_LOGGING_METAS[0]) + meta_content_logging = str(meta_obj_logging.get()["Body"].read(), "utf-8") + self.assertIn("commons-logging", meta_content_logging) + self.assertIn("commons-logging", meta_content_logging) + self.assertIn("1.2", meta_content_logging) + self.assertIn("1.2", meta_content_logging) + self.assertIn("1.2", meta_content_logging) + + catalog = self.test_bucket.Object(ARCHETYPE_CATALOG) + cat_content = str(catalog.get()["Body"].read(), "utf-8") + self.assertIn("4.5.9", cat_content) + self.assertIn("httpclient", cat_content) + self.assertIn("org.apache.httpcomponents", cat_content) + def test_ignore_upload(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product_456 = "commons-client-4.5.6" From 6d4dc56a59101ca38a3bb52de57badb6fc586741 Mon Sep 17 00:00:00 2001 From: yma Date: Tue, 14 Oct 2025 14:15:14 +0800 Subject: [PATCH 2/6] Fix TypeError for argument multi nargs value defination --- charon/cmd/cmd_upload.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/charon/cmd/cmd_upload.py b/charon/cmd/cmd_upload.py index dcc24a88..4cd0d8dd 100644 --- a/charon/cmd/cmd_upload.py +++ b/charon/cmd/cmd_upload.py @@ -37,8 +37,7 @@ @argument( "repos", type=str, - nargs='+', # This allows multiple arguments for zip urls - required=True + nargs=-1 # This allows multiple arguments for zip urls ) @option( "--product", From f3a2c55d9233895f39d330aae3120fde3a7e17cc Mon Sep 17 00:00:00 2001 From: yma Date: Fri, 17 Oct 2025 11:54:43 +0800 Subject: [PATCH 3/6] Fix note, err log, repos param list type, merged dir name length, typo issues --- charon/cmd/cmd_upload.py | 1 + charon/pkgs/maven.py | 24 +++++------------------- tests/test_cf_maven_ops.py | 4 ++-- tests/test_cf_reindex.py | 2 +- tests/test_extract_tarballs.py | 5 +---- tests/test_manifest_del.py | 2 +- tests/test_manifest_upload.py | 2 +- tests/test_maven_del.py | 4 ++-- tests/test_maven_del_multi_tgts.py | 4 ++-- tests/test_maven_index.py | 14 +++++++------- tests/test_maven_index_multi_tgts.py | 12 ++++++------ tests/test_maven_sign.py | 6 +++--- tests/test_maven_upload.py | 8 ++++---- tests/test_maven_upload_multi_tgts.py | 8 ++++---- tests/test_pkgs_dryrun.py | 6 +++--- 15 files changed, 43 insertions(+), 59 deletions(-) diff --git a/charon/cmd/cmd_upload.py b/charon/cmd/cmd_upload.py index 4cd0d8dd..f1e4df3f 100644 --- a/charon/cmd/cmd_upload.py +++ b/charon/cmd/cmd_upload.py @@ -156,6 +156,7 @@ def upload( """Upload all files from released product REPOs to Ronda Service. The REPOs point to a product released tarballs which are hosted in remote urls or local paths. + Notes: It does not support multiple repos for NPM archives """ tmp_dir = work_dir try: diff --git a/charon/pkgs/maven.py b/charon/pkgs/maven.py index b183c474..90724050 100644 --- a/charon/pkgs/maven.py +++ b/charon/pkgs/maven.py @@ -32,7 +32,7 @@ META_FILE_FAILED, MAVEN_METADATA_TEMPLATE, ARCHETYPE_CATALOG_TEMPLATE, ARCHETYPE_CATALOG_FILENAME, PACKAGE_TYPE_MAVEN) -from typing import Dict, List, Tuple, Union +from typing import Dict, List, Tuple from jinja2 import Template from datetime import datetime from zipfile import ZipFile, BadZipFile @@ -262,7 +262,7 @@ def __gen_digest_file(hash_file_path, meta_file_path: str, hashtype: HashType) - def handle_maven_uploading( - repos: Union[str, List[str]], + repos: List[str], prod_key: str, ignore_patterns=None, root="maven-repository", @@ -295,8 +295,7 @@ def handle_maven_uploading( """ if targets is None: targets = [] - if isinstance(repos, str): - repos = [repos] + # 1. extract tarballs tmp_root = _extract_tarballs(repos, root, prod_key, dir__=dir_) @@ -706,7 +705,7 @@ def _extract_tarballs(repos: List[str], root: str, prefix="", dir__=None) -> str extracted_dirs.append(tmp_root) except BadZipFile as e: - logger.error("Tarball extraction error: %s", e) + logger.error("Tarball extraction error for repo %s: %s", repo, e) sys.exit(1) else: logger.error("Error: archive %s does not exist", repo) @@ -714,21 +713,8 @@ def _extract_tarballs(repos: List[str], root: str, prefix="", dir__=None) -> str # Merge all extracted directories if extracted_dirs: - # Get top-level directory names for merged from all repos - top_level_merged_name_dirs = [] - for extracted_dir in extracted_dirs: - for item in os.listdir(extracted_dir): - item_path = os.path.join(extracted_dir, item) - # Check the root maven-repository subdirectory existence - maven_repo_path = os.path.join(item_path, root) - if os.path.isdir(item_path) and os.path.exists(maven_repo_path): - top_level_merged_name_dirs.append(item) - break - # Create merged directory name - merged_dir_name = ( - "_".join(top_level_merged_name_dirs) if top_level_merged_name_dirs else "merged" - ) + merged_dir_name = "merged_repositories" merged_dest_dir = os.path.join(final_tmp_root, merged_dir_name) # Merge content from all extracted directories diff --git a/tests/test_cf_maven_ops.py b/tests/test_cf_maven_ops.py index b8cb03c1..ca5ac361 100644 --- a/tests/test_cf_maven_ops.py +++ b/tests/test_cf_maven_ops.py @@ -31,7 +31,7 @@ def test_cf_after_upload(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product, + [test_zip], product, targets=[('', TEST_BUCKET, 'ga', '', 'maven.repository.redhat.com')], dir_=self.tempdir, do_index=True, @@ -52,7 +52,7 @@ def test_cf_after_del(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product_456 = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product_456, + [test_zip], product_456, targets=[('', TEST_BUCKET, 'ga', '', 'maven.repository.redhat.com')], dir_=self.tempdir, do_index=True diff --git a/tests/test_cf_reindex.py b/tests/test_cf_reindex.py index 944a86f2..941793fd 100644 --- a/tests/test_cf_reindex.py +++ b/tests/test_cf_reindex.py @@ -40,7 +40,7 @@ def test_cf_maven_after_reindex(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product_456 = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product_456, + [test_zip], product_456, targets=[('', TEST_BUCKET, 'ga', '', 'maven.repository.redhat.com')], dir_=self.tempdir ) diff --git a/tests/test_extract_tarballs.py b/tests/test_extract_tarballs.py index 22190bfc..53a96f63 100644 --- a/tests/test_extract_tarballs.py +++ b/tests/test_extract_tarballs.py @@ -13,7 +13,7 @@ def test_extract_tarballs(self): ] final_merged_path = _extract_tarballs(mvn_tarballs, "maven-repository") expected_dir = os.path.join( - final_merged_path, "commons-client-4.5.6_commons-client-4.5.9", "maven-repository" + final_merged_path, "merged_repositories", "maven-repository" ) self.assertTrue(os.path.exists(expected_dir)) @@ -26,6 +26,3 @@ def test_extract_tarballs(self): for expected_file in expected_files: file_path = os.path.join(expected_dir, expected_file) self.assertTrue(os.path.exists(file_path)) - - def test_download_archive(self): - pass diff --git a/tests/test_manifest_del.py b/tests/test_manifest_del.py index 7a81be3c..c47c7602 100644 --- a/tests/test_manifest_del.py +++ b/tests/test_manifest_del.py @@ -77,7 +77,7 @@ def __prepare_maven_content(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product, + [test_zip], product, targets=[(TEST_TARGET, TEST_BUCKET, '', '')], dir_=self.tempdir, do_index=False, diff --git a/tests/test_manifest_upload.py b/tests/test_manifest_upload.py index c7e801b2..520f0679 100644 --- a/tests/test_manifest_upload.py +++ b/tests/test_manifest_upload.py @@ -36,7 +36,7 @@ def test_maven_manifest_upload(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product, + [test_zip], product, targets=[(TEST_TARGET, TEST_BUCKET, '', '')], dir_=self.tempdir, do_index=False, diff --git a/tests/test_maven_del.py b/tests/test_maven_del.py index 5b565adc..86425724 100644 --- a/tests/test_maven_del.py +++ b/tests/test_maven_del.py @@ -190,7 +190,7 @@ def __prepare_content(self, prefix=None): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product_456 = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product_456, + [test_zip], product_456, targets=[('', TEST_BUCKET, prefix, '')], dir_=self.tempdir, do_index=False @@ -199,7 +199,7 @@ def __prepare_content(self, prefix=None): test_zip = os.path.join(INPUTS, "commons-client-4.5.9.zip") product_459 = "commons-client-4.5.9" handle_maven_uploading( - test_zip, product_459, + [test_zip], product_459, targets=[('', TEST_BUCKET, prefix, '')], dir_=self.tempdir, do_index=False diff --git a/tests/test_maven_del_multi_tgts.py b/tests/test_maven_del_multi_tgts.py index 26fa11cc..2a7d042f 100644 --- a/tests/test_maven_del_multi_tgts.py +++ b/tests/test_maven_del_multi_tgts.py @@ -259,7 +259,7 @@ def __prepare_content(self, prefix=None): product_456 = "commons-client-4.5.6" targets_ = [('', TEST_BUCKET, prefix, ''), ('', TEST_BUCKET_2, prefix, '')] handle_maven_uploading( - test_zip, product_456, + [test_zip], product_456, targets=targets_, dir_=self.tempdir, do_index=False @@ -268,7 +268,7 @@ def __prepare_content(self, prefix=None): test_zip = os.path.join(INPUTS, "commons-client-4.5.9.zip") product_459 = "commons-client-4.5.9" handle_maven_uploading( - test_zip, product_459, + [test_zip], product_459, targets=targets_, dir_=self.tempdir, do_index=False diff --git a/tests/test_maven_index.py b/tests/test_maven_index.py index a5cd1ed2..33533337 100644 --- a/tests/test_maven_index.py +++ b/tests/test_maven_index.py @@ -37,7 +37,7 @@ def test_uploading_index(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product, + [test_zip], product, targets=[('', TEST_BUCKET, '', '')], dir_=self.tempdir ) @@ -79,7 +79,7 @@ def test_overlap_upload_index(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product_456 = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product_456, + [test_zip], product_456, targets=[('', TEST_BUCKET, '', '')], dir_=self.tempdir ) @@ -87,7 +87,7 @@ def test_overlap_upload_index(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.9.zip") product_459 = "commons-client-4.5.9" handle_maven_uploading( - test_zip, product_459, + [test_zip], product_459, targets=[('', TEST_BUCKET, '', '')], dir_=self.tempdir ) @@ -130,7 +130,7 @@ def test_re_index(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product, + [test_zip], product, targets=[('', TEST_BUCKET, '', '')], dir_=self.tempdir ) @@ -221,7 +221,7 @@ def __test_upload_index_with_prefix(self, prefix: str): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product, + [test_zip], product, targets=[('', TEST_BUCKET, prefix, '')], dir_=self.tempdir ) @@ -403,7 +403,7 @@ def __prepare_content(self, prefix=None): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product_456 = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product_456, + [test_zip], product_456, targets=[('', TEST_BUCKET, prefix, '')], dir_=self.tempdir ) @@ -411,7 +411,7 @@ def __prepare_content(self, prefix=None): test_zip = os.path.join(INPUTS, "commons-client-4.5.9.zip") product_459 = "commons-client-4.5.9" handle_maven_uploading( - test_zip, product_459, + [test_zip], product_459, targets=[('', TEST_BUCKET, prefix, '')], dir_=self.tempdir ) diff --git a/tests/test_maven_index_multi_tgts.py b/tests/test_maven_index_multi_tgts.py index cc9d0718..44f921bf 100644 --- a/tests/test_maven_index_multi_tgts.py +++ b/tests/test_maven_index_multi_tgts.py @@ -46,7 +46,7 @@ def test_uploading_index(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product, + [test_zip], product, targets=targets_, dir_=self.tempdir ) @@ -106,7 +106,7 @@ def test_overlap_upload_index(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product_456 = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product_456, + [test_zip], product_456, targets=targets_, dir_=self.tempdir ) @@ -114,7 +114,7 @@ def test_overlap_upload_index(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.9.zip") product_459 = "commons-client-4.5.9" handle_maven_uploading( - test_zip, product_459, + [test_zip], product_459, targets=targets_, dir_=self.tempdir ) @@ -194,7 +194,7 @@ def __test_upload_index_with_prefix(self, prefix: str): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product, + [test_zip], product, targets=targets_, dir_=self.tempdir ) @@ -417,7 +417,7 @@ def __prepare_content(self, prefix=None): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product_456 = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product_456, + [test_zip], product_456, targets=targets_, dir_=self.tempdir ) @@ -425,7 +425,7 @@ def __prepare_content(self, prefix=None): test_zip = os.path.join(INPUTS, "commons-client-4.5.9.zip") product_459 = "commons-client-4.5.9" handle_maven_uploading( - test_zip, product_459, + [test_zip], product_459, targets=targets_, dir_=self.tempdir ) diff --git a/tests/test_maven_sign.py b/tests/test_maven_sign.py index f60ee54d..834326bf 100644 --- a/tests/test_maven_sign.py +++ b/tests/test_maven_sign.py @@ -32,7 +32,7 @@ def test_uploading_sign(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product, + [test_zip], product, targets=[('', TEST_BUCKET, '', '')], dir_=self.tempdir, gen_sign=True, @@ -63,7 +63,7 @@ def test_overlap_upload_index(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product_456 = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product_456, + [test_zip], product_456, targets=[('', TEST_BUCKET, '', '')], dir_=self.tempdir, gen_sign=True, @@ -73,7 +73,7 @@ def test_overlap_upload_index(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.9.zip") product_459 = "commons-client-4.5.9" handle_maven_uploading( - test_zip, product_459, + [test_zip], product_459, targets=[('', TEST_BUCKET, '', '')], dir_=self.tempdir, gen_sign=True, diff --git a/tests/test_maven_upload.py b/tests/test_maven_upload.py index 6f40a8ca..fefa74ea 100644 --- a/tests/test_maven_upload.py +++ b/tests/test_maven_upload.py @@ -47,7 +47,7 @@ def test_overlap_upload(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product_456 = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product_456, + [test_zip], product_456, targets=[('', TEST_BUCKET, '', '')], dir_=self.tempdir, do_index=False ) @@ -55,7 +55,7 @@ def test_overlap_upload(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.9.zip") product_459 = "commons-client-4.5.9" handle_maven_uploading( - test_zip, product_459, + [test_zip], product_459, targets=[('', TEST_BUCKET, '', '')], dir_=self.tempdir, do_index=False ) @@ -176,7 +176,7 @@ def test_ignore_upload(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product_456 = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product_456, [".*.sha1"], + [test_zip], product_456, [".*.sha1"], targets=[('', TEST_BUCKET, '', '')], dir_=self.tempdir, do_index=False ) @@ -205,7 +205,7 @@ def __test_prefix_upload(self, prefix: str): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product, + [test_zip], product, targets=[('', TEST_BUCKET, prefix, '')], dir_=self.tempdir, do_index=False diff --git a/tests/test_maven_upload_multi_tgts.py b/tests/test_maven_upload_multi_tgts.py index 35aa49d4..f6eb289e 100644 --- a/tests/test_maven_upload_multi_tgts.py +++ b/tests/test_maven_upload_multi_tgts.py @@ -68,7 +68,7 @@ def test_overlap_upload(self): ('', TEST_BUCKET, '', ''), ('', TEST_BUCKET_2, '', '') ] handle_maven_uploading( - test_zip, product_456, + [test_zip], product_456, targets=targets_, dir_=self.tempdir, do_index=False ) @@ -76,7 +76,7 @@ def test_overlap_upload(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.9.zip") product_459 = "commons-client-4.5.9" handle_maven_uploading( - test_zip, product_459, + [test_zip], product_459, targets=targets_, dir_=self.tempdir, do_index=False ) @@ -186,7 +186,7 @@ def test_ignore_upload(self): ('', TEST_BUCKET, '', ''), ('', TEST_BUCKET_2, '', '') ] handle_maven_uploading( - test_zip, product_456, [".*.sha1"], + [test_zip], product_456, [".*.sha1"], targets=targets_, dir_=self.tempdir, do_index=False ) @@ -221,7 +221,7 @@ def __test_prefix_upload(self, targets: List[Tuple[str, str, str, str]]): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product, + [test_zip], product, targets=targets, dir_=self.tempdir, do_index=False diff --git a/tests/test_pkgs_dryrun.py b/tests/test_pkgs_dryrun.py index 46061734..c49ad14d 100644 --- a/tests/test_pkgs_dryrun.py +++ b/tests/test_pkgs_dryrun.py @@ -30,7 +30,7 @@ def test_maven_upload_dry_run(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product, + [test_zip], product, targets=[('', TEST_BUCKET, '', '')], dir_=self.tempdir, dry_run=True @@ -90,7 +90,7 @@ def __prepare_maven_content(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") product_456 = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product_456, + [test_zip], product_456, targets=[('', TEST_BUCKET, '', '')], dir_=self.tempdir ) @@ -98,7 +98,7 @@ def __prepare_maven_content(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.9.zip") product_459 = "commons-client-4.5.9" handle_maven_uploading( - test_zip, product_459, + [test_zip], product_459, targets=[('', TEST_BUCKET, '', '')], dir_=self.tempdir ) From 45d3210bfa84849701fd3edb7e977f78c90e8e8d Mon Sep 17 00:00:00 2001 From: yma Date: Wed, 22 Oct 2025 10:27:35 +0800 Subject: [PATCH 4/6] Fix files duplicated logic for the merge overlapping case --- charon/pkgs/maven.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/charon/pkgs/maven.py b/charon/pkgs/maven.py index 90724050..6ac406c5 100644 --- a/charon/pkgs/maven.py +++ b/charon/pkgs/maven.py @@ -689,7 +689,7 @@ def _extract_tarballs(repos: List[str], root: str, prefix="", dir__=None) -> str final_tmp_root = mkdtemp(prefix=f"charon-{prefix}-final-", dir=dir__) total_copied = 0 - total_overwritten = 0 + total_duplicated = 0 total_processed = 0 # Collect all extracted directories first @@ -719,20 +719,20 @@ def _extract_tarballs(repos: List[str], root: str, prefix="", dir__=None) -> str # Merge content from all extracted directories for extracted_dir in extracted_dirs: - copied, overwritten, processed = _merge_directories_with_rename( + copied, duplicated, processed = _merge_directories_with_rename( extracted_dir, merged_dest_dir, root ) total_copied += copied - total_overwritten += overwritten + total_duplicated += duplicated total_processed += processed # Clean up temporary extraction directory rmtree(extracted_dir) logger.info( - "All zips merged! Total copied: %s, Total overwritten: %s, Total processed: %s", + "All zips merged! Total copied: %s, Total duplicated: %s, Total processed: %s", total_copied, - total_overwritten, + total_duplicated, total_processed, ) return final_tmp_root @@ -743,10 +743,10 @@ def _merge_directories_with_rename(src_dir: str, dest_dir: str, root: str): * src_dir is the source directory to copy from * dest_dir is the destination directory to copy to. - Returns Tuple of (copied_count, overwritten_count, processed_count) + Returns Tuple of (copied_count, duplicated_count, processed_count) """ copied_count = 0 - overwritten_count = 0 + duplicated_count = 0 processed_count = 0 # Find the actual content directory @@ -768,27 +768,27 @@ def _merge_directories_with_rename(src_dir: str, dest_dir: str, root: str): # Create destination directory if it doesn't exist os.makedirs(dest_root, exist_ok=True) - # Copy all files, overwriting existing ones + # Copy all files, skip existing ones for file in files: src_file = os.path.join(root_dir, file) dest_file = os.path.join(dest_root, file) if os.path.exists(dest_file): - overwritten_count += 1 - logger.debug("Overwritten: %s -> %s", src_file, dest_file) + duplicated_count += 1 + logger.debug("Duplicated: %s, skipped", dest_file) else: copied_count += 1 + copy2(src_file, dest_file) logger.debug("Copied: %s -> %s", src_file, dest_file) processed_count += 1 - copy2(src_file, dest_file) logger.info( - "One zip merged! Files copied: %s, Files overwritten: %s, Total files processed: %s", + "One zip merged! Files copied: %s, Files duplicated: %s, Total files processed: %s", copied_count, - overwritten_count, + duplicated_count, processed_count, ) - return copied_count, overwritten_count, processed_count + return copied_count, duplicated_count, processed_count def _scan_paths(files_root: str, ignore_patterns: List[str], From cd7ed7a10cde676307e45ee7e8cbae0e8fe92131 Mon Sep 17 00:00:00 2001 From: yma Date: Wed, 22 Oct 2025 15:03:01 +0800 Subject: [PATCH 5/6] Add archetype catalog files merged logic for merged zips --- charon/pkgs/maven.py | 89 +++++++++++++++++++++++++++++++++++--- tests/test_maven_upload.py | 1 + 2 files changed, 84 insertions(+), 6 deletions(-) diff --git a/charon/pkgs/maven.py b/charon/pkgs/maven.py index 6ac406c5..1cf23041 100644 --- a/charon/pkgs/maven.py +++ b/charon/pkgs/maven.py @@ -690,6 +690,7 @@ def _extract_tarballs(repos: List[str], root: str, prefix="", dir__=None) -> str total_copied = 0 total_duplicated = 0 + total_merged = 0 total_processed = 0 # Collect all extracted directories first @@ -719,20 +720,23 @@ def _extract_tarballs(repos: List[str], root: str, prefix="", dir__=None) -> str # Merge content from all extracted directories for extracted_dir in extracted_dirs: - copied, duplicated, processed = _merge_directories_with_rename( + copied, duplicated, merged, processed = _merge_directories_with_rename( extracted_dir, merged_dest_dir, root ) total_copied += copied total_duplicated += duplicated + total_merged += merged total_processed += processed # Clean up temporary extraction directory rmtree(extracted_dir) logger.info( - "All zips merged! Total copied: %s, Total duplicated: %s, Total processed: %s", + "All zips merged! Total copied: %s, Total duplicated: %s, " + "Total merged: %s, Total processed: %s", total_copied, total_duplicated, + total_merged, total_processed, ) return final_tmp_root @@ -743,10 +747,11 @@ def _merge_directories_with_rename(src_dir: str, dest_dir: str, root: str): * src_dir is the source directory to copy from * dest_dir is the destination directory to copy to. - Returns Tuple of (copied_count, duplicated_count, processed_count) + Returns Tuple of (copied_count, duplicated_count, merged_count, processed_count) """ copied_count = 0 duplicated_count = 0 + merged_count = 0 processed_count = 0 # Find the actual content directory @@ -772,23 +777,95 @@ def _merge_directories_with_rename(src_dir: str, dest_dir: str, root: str): for file in files: src_file = os.path.join(root_dir, file) dest_file = os.path.join(dest_root, file) + + if file == ARCHETYPE_CATALOG_FILENAME: + _handle_archetype_catalog_merge(src_file, dest_file) + merged_count += 1 + logger.debug("Merged archetype catalog: %s -> %s", src_file, dest_file) if os.path.exists(dest_file): duplicated_count += 1 logger.debug("Duplicated: %s, skipped", dest_file) else: - copied_count += 1 copy2(src_file, dest_file) + copied_count += 1 logger.debug("Copied: %s -> %s", src_file, dest_file) processed_count += 1 logger.info( - "One zip merged! Files copied: %s, Files duplicated: %s, Total files processed: %s", + "One zip merged! Files copied: %s, Files duplicated: %s, " + "Files merged: %s, Total files processed: %s", copied_count, duplicated_count, + merged_count, processed_count, ) - return copied_count, duplicated_count, processed_count + return copied_count, duplicated_count, merged_count, processed_count + + +def _handle_archetype_catalog_merge(src_catalog: str, dest_catalog: str): + """ + Handle merging of archetype-catalog.xml files during directory merge. + + Args: + src_catalog: Source archetype-catalog.xml file path + dest_catalog: Destination archetype-catalog.xml file path + """ + try: + with open(src_catalog, "rb") as sf: + src_archetypes = _parse_archetypes(sf.read()) + except ElementTree.ParseError as e: + logger.warning("Failed to read source archetype catalog %s: %s", src_catalog, e) + return + + if len(src_archetypes) < 1: + logger.warning( + "No archetypes found in source archetype-catalog.xml: %s, " + "even though the file exists! Skipping.", + src_catalog + ) + return + + # Copy directly if dest_catalog doesn't exist + if not os.path.exists(dest_catalog): + copy2(src_catalog, dest_catalog) + return + + try: + with open(dest_catalog, "rb") as df: + dest_archetypes = _parse_archetypes(df.read()) + except ElementTree.ParseError as e: + logger.warning("Failed to read dest archetype catalog %s: %s", dest_catalog, e) + return + + if len(dest_archetypes) < 1: + logger.warning( + "No archetypes found in dest archetype-catalog.xml: %s, " + "even though the file exists! Copy directly from the src_catalog, %s.", + dest_catalog, src_catalog + ) + copy2(src_catalog, dest_catalog) + return + + else: + original_dest_size = len(dest_archetypes) + for sa in src_archetypes: + if sa not in dest_archetypes: + dest_archetypes.append(sa) + else: + logger.debug("DUPLICATE ARCHETYPE: %s", sa) + + if len(dest_archetypes) != original_dest_size: + with open(dest_catalog, 'wb'): + content = MavenArchetypeCatalog(dest_archetypes).generate_meta_file_content() + try: + overwrite_file(dest_catalog, content) + except FileNotFoundError as e: + logger.error( + "Error: Can not create file %s because of some missing folders", + dest_catalog, + ) + raise e def _scan_paths(files_root: str, ignore_patterns: List[str], diff --git a/tests/test_maven_upload.py b/tests/test_maven_upload.py index fefa74ea..ab36c76f 100644 --- a/tests/test_maven_upload.py +++ b/tests/test_maven_upload.py @@ -168,6 +168,7 @@ def test_multi_zips_upload(self): catalog = self.test_bucket.Object(ARCHETYPE_CATALOG) cat_content = str(catalog.get()["Body"].read(), "utf-8") + self.assertIn("4.5.6", cat_content) self.assertIn("4.5.9", cat_content) self.assertIn("httpclient", cat_content) self.assertIn("org.apache.httpcomponents", cat_content) From 56c5a33b09ef47bf8c1aed2c13279c4ea5c4a187 Mon Sep 17 00:00:00 2001 From: yma Date: Thu, 23 Oct 2025 09:27:07 +0800 Subject: [PATCH 6/6] Fix unnecessary file open during archetype catalog merge --- charon/pkgs/maven.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/charon/pkgs/maven.py b/charon/pkgs/maven.py index 1cf23041..25f8bc4f 100644 --- a/charon/pkgs/maven.py +++ b/charon/pkgs/maven.py @@ -856,16 +856,12 @@ def _handle_archetype_catalog_merge(src_catalog: str, dest_catalog: str): logger.debug("DUPLICATE ARCHETYPE: %s", sa) if len(dest_archetypes) != original_dest_size: - with open(dest_catalog, 'wb'): - content = MavenArchetypeCatalog(dest_archetypes).generate_meta_file_content() - try: - overwrite_file(dest_catalog, content) - except FileNotFoundError as e: - logger.error( - "Error: Can not create file %s because of some missing folders", - dest_catalog, - ) - raise e + content = MavenArchetypeCatalog(dest_archetypes).generate_meta_file_content() + try: + overwrite_file(dest_catalog, content) + except Exception as e: + logger.error("Failed to merge archetype catalog: %s", dest_catalog) + raise e def _scan_paths(files_root: str, ignore_patterns: List[str],