diff --git a/charon/pkgs/maven.py b/charon/pkgs/maven.py index 2ac68a4b..5a138111 100644 --- a/charon/pkgs/maven.py +++ b/charon/pkgs/maven.py @@ -14,6 +14,7 @@ limitations under the License. """ from charon.utils.files import HashType +from charon.utils.strings import trail_path_with_root import charon.pkgs.indexing as indexing from charon.utils.files import overwrite_file, digest, write_manifest from charon.utils.archive import extract_zip_all @@ -254,7 +255,7 @@ def __gen_digest_file(hash_file_path, meta_file_path: str, hashtype: HashType) - def handle_maven_uploading( repo: str, prod_key: str, - ignore_patterns=None, + ignore_patterns: List[str] = None, root="maven-repository", targets: List[Tuple[str, str, str]] = None, aws_profile=None, @@ -284,6 +285,7 @@ def handle_maven_uploading( # 2. scan for paths and filter out the ignored paths, # and also collect poms for later metadata generation + # ignore_dirs = get_config().get_ignore_dirs() (top_level, valid_mvn_paths, valid_poms, @@ -447,6 +449,7 @@ def handle_maven_del( # 2. scan for paths and filter out the ignored paths, # and also collect poms for later metadata generation + # ignore_dirs = get_config().get_ignore_dirs() (top_level, valid_mvn_paths, valid_poms, @@ -595,10 +598,26 @@ def _scan_paths(files_root: str, ignore_patterns: List[str], # and also collect poms for later metadata generation logger.info("Scan %s to collect files", files_root) top_level = root - valid_mvn_paths, non_mvn_paths, ignored_paths, valid_poms, valid_dirs = [], [], [], [], [] + valid_mvn_paths, non_mvn_paths, files_ignored, dirs_ignored, \ + valid_poms, valid_dirs = [], [], [], [], [], [] changed_dirs = set() top_found = False for root_dir, dirs, names in os.walk(files_root): + ignored_current_dir = False + if ignore_patterns: + for ignored in ignore_patterns: + checking_dir = root_dir + if top_found and top_level in root_dir: + checking_dir = trail_path_with_root(root_dir, top_level) + if re.match(ignored, checking_dir): + logger.debug("Found ignored directory %s, " + "all files will be ignored in this dir", + root_dir) + ignored_current_dir = True + dirs_ignored.append(root_dir) + break + if ignored_current_dir: + continue for directory in dirs: changed_dirs.add(os.path.join(root_dir, directory)) if not top_found: @@ -615,7 +634,7 @@ def _scan_paths(files_root: str, ignore_patterns: List[str], # Let's wait to do the regex / pom examination until we # know we're inside a valid root directory. if _is_ignored(name, ignore_patterns): - ignored_paths.append(path) + files_ignored.append(path) continue valid_mvn_paths.append(path) @@ -644,10 +663,15 @@ def _scan_paths(files_root: str, ignore_patterns: List[str], valid_dirs.append(c) logger.info("Files scanning done.\n") - if ignore_patterns and len(ignore_patterns) > 0: + if dirs_ignored and len(dirs_ignored) > 0: + logger.info( + "Ignored all files in these directories: \n%s\n", + "\n".join(dirs_ignored) + ) + if files_ignored and len(files_ignored) > 0: logger.info( "Ignored paths with ignore_patterns %s as below:\n%s\n", - ignore_patterns, "\n".join(ignored_paths) + ignore_patterns, "\n".join(files_ignored) ) return (top_level, valid_mvn_paths, valid_poms, valid_dirs) @@ -994,7 +1018,7 @@ def __hash_decorate_metadata(path: str, metadata: str) -> List[str]: def _is_ignored(filename: str, ignore_patterns: List[str]) -> bool: for ignored_name in STANDARD_GENERATED_IGNORES: if ignored_name in filename: - logger.info("Ignoring standard generated Maven path: %s", filename) + logger.debug("Ignoring standard generated Maven path: %s", filename) return True if ignore_patterns: diff --git a/charon/storage.py b/charon/storage.py index 0cefed71..3cdbfd26 100644 --- a/charon/storage.py +++ b/charon/storage.py @@ -18,7 +18,7 @@ from boto3_type_annotations.s3.service_resource import Object from charon.utils.files import read_sha1 from charon.constants import PROD_INFO_SUFFIX, MANIFEST_SUFFIX - +from charon.utils.strings import trail_path_with_root from boto3 import session from botocore.errorfactory import ClientError from botocore.exceptions import HTTPClientError @@ -748,17 +748,12 @@ def __do_path_cut_and( path_handler: Callable[[str, str, int, int, List[str], asyncio.Semaphore], Awaitable[bool]], root="/" ) -> List[str]: - slash_root = root - if not root.endswith("/"): - slash_root = slash_root + "/" failed_paths = [] index = 1 file_paths_count = len(file_paths) tasks = [] for full_path in file_paths: - path = full_path - if path.startswith(slash_root): - path = path[len(slash_root):] + path = trail_path_with_root(full_path, root) tasks.append( asyncio.ensure_future( path_handler(full_path, path, index, file_paths_count, failed_paths) diff --git a/charon/utils/strings.py b/charon/utils/strings.py index a3f0ff59..f46ed6f8 100644 --- a/charon/utils/strings.py +++ b/charon/utils/strings.py @@ -2,3 +2,10 @@ def remove_prefix(string: str, prefix: str) -> str: if string and prefix and string.startswith(prefix): return string[len(prefix):] return string + + +def trail_path_with_root(path: str, root: str) -> str: + slash_root = root + if not slash_root.endswith("/"): + slash_root = slash_root + "/" + return remove_prefix(path, slash_root) diff --git a/tests/base.py b/tests/base.py index 39995023..98c8a630 100644 --- a/tests/base.py +++ b/tests/base.py @@ -40,8 +40,8 @@ def setUp(self): self.__prepare_template(config_base) default_config_content = """ ignore_patterns: - - ".*^(redhat).*" - - ".*snapshot.*" + - .*^(redhat).* + - .*snapshot.* targets: ga: diff --git a/tests/commons.py b/tests/commons.py index 1fd2ead4..dca0e95b 100644 --- a/tests/commons.py +++ b/tests/commons.py @@ -85,6 +85,11 @@ "commons-logging/commons-logging/index.html", "commons-logging/commons-logging/1.2/index.html", ] +IGNORED_META_FOLDER_ITEMS = [ + ".index/org/foo/bar/foo-bar.pom", + ".nexus/org/foo/bar/foo-bar.pom", + ".meta/org/foo/bar/foo-bar.pom" +] COMMONS_CLIENT_INDEX = "org/apache/httpcomponents/httpclient/index.html" COMMONS_CLIENT_456_INDEX = "org/apache/httpcomponents/httpclient/4.5.6/index.html" COMMONS_LOGGING_INDEX = "commons-logging/commons-logging/index.html" diff --git a/tests/input/commons-client-4.5.6.zip b/tests/input/commons-client-4.5.6.zip index 7ce27974..d083e95a 100644 Binary files a/tests/input/commons-client-4.5.6.zip and b/tests/input/commons-client-4.5.6.zip differ diff --git a/tests/test_maven_index.py b/tests/test_maven_index.py index e55cfde6..4147ad32 100644 --- a/tests/test_maven_index.py +++ b/tests/test_maven_index.py @@ -26,6 +26,8 @@ from moto import mock_s3 import os +SHOULD_IGNORED = [r"^\.nexus/.*", r"^\.index/.*", r"^\.meta/.*"] + @mock_s3 class MavenFileIndexTest(PackageBaseTest): @@ -35,6 +37,7 @@ def test_uploading_index(self): product = "commons-client-4.5.6" handle_maven_uploading( test_zip, product, + SHOULD_IGNORED, targets=[(None, TEST_BUCKET, None)], dir_=self.tempdir ) @@ -79,6 +82,7 @@ def test_overlap_upload_index(self): product_456 = "commons-client-4.5.6" handle_maven_uploading( test_zip, product_456, + SHOULD_IGNORED, targets=[(None, TEST_BUCKET, None)], dir_=self.tempdir ) @@ -139,6 +143,7 @@ def __test_upload_index_with_prefix(self, prefix: str): product = "commons-client-4.5.6" handle_maven_uploading( test_zip, product, + SHOULD_IGNORED, targets=[(None, TEST_BUCKET, prefix)], dir_=self.tempdir ) @@ -191,6 +196,7 @@ def test_deletion_index(self): product_456 = "commons-client-4.5.6" handle_maven_del( test_zip, product_456, + SHOULD_IGNORED, targets=[(None, TEST_BUCKET, None)], dir_=self.tempdir ) @@ -262,6 +268,7 @@ def __test_deletion_index_with_prefix(self, prefix: str): product_456 = "commons-client-4.5.6" handle_maven_del( test_zip, product_456, + SHOULD_IGNORED, targets=[(None, TEST_BUCKET, prefix)], dir_=self.tempdir ) @@ -321,6 +328,7 @@ def __prepare_content(self, prefix=None): product_456 = "commons-client-4.5.6" handle_maven_uploading( test_zip, product_456, + SHOULD_IGNORED, targets=[(None, TEST_BUCKET, prefix)], dir_=self.tempdir ) @@ -329,6 +337,7 @@ def __prepare_content(self, prefix=None): product_459 = "commons-client-4.5.9" handle_maven_uploading( test_zip, product_459, + SHOULD_IGNORED, targets=[(None, TEST_BUCKET, prefix)], dir_=self.tempdir ) diff --git a/tests/test_maven_upload.py b/tests/test_maven_upload.py index c8424a10..830e7337 100644 --- a/tests/test_maven_upload.py +++ b/tests/test_maven_upload.py @@ -21,7 +21,7 @@ COMMONS_CLIENT_METAS, COMMONS_LOGGING_FILES, COMMONS_LOGGING_METAS, NON_MVN_FILES, ARCHETYPE_CATALOG, ARCHETYPE_CATALOG_FILES, COMMONS_CLIENT_456_MVN_NUM, COMMONS_CLIENT_MVN_NUM, - COMMONS_CLIENT_META_NUM + COMMONS_CLIENT_META_NUM, IGNORED_META_FOLDER_ITEMS ) from moto import mock_s3 import os @@ -62,7 +62,9 @@ def test_overlap_upload(self): actual_files = [obj.key for obj in objs] # need to double mvn num because of .prodinfo files self.assertEqual( - COMMONS_CLIENT_MVN_NUM * 2 + COMMONS_CLIENT_META_NUM, + COMMONS_CLIENT_MVN_NUM * 2 + + COMMONS_CLIENT_META_NUM + + len(IGNORED_META_FOLDER_ITEMS) * 2, len(actual_files) ) @@ -112,7 +114,7 @@ def test_ignore_upload(self): test_zip = os.path.join(os.getcwd(), "tests/input/commons-client-4.5.6.zip") product_456 = "commons-client-4.5.6" handle_maven_uploading( - test_zip, product_456, [".*.sha1"], + test_zip, product_456, [".*.sha1", r"^\.nexus/.*", r"^\.index/.*", r"^\.meta/.*"], targets=[(None, TEST_BUCKET, None)], dir_=self.tempdir, do_index=False ) @@ -151,7 +153,9 @@ def __test_prefix_upload(self, prefix: str): actual_files = [obj.key for obj in objs] # need to double mvn num because of .prodinfo files self.assertEqual( - COMMONS_CLIENT_456_MVN_NUM * 2 + COMMONS_CLIENT_META_NUM, + COMMONS_CLIENT_456_MVN_NUM * 2 + + COMMONS_CLIENT_META_NUM + + len(IGNORED_META_FOLDER_ITEMS) * 2, len(actual_files) ) diff --git a/tests/test_pkgs_dryrun.py b/tests/test_pkgs_dryrun.py index e75b0258..d8fe2a95 100644 --- a/tests/test_pkgs_dryrun.py +++ b/tests/test_pkgs_dryrun.py @@ -51,7 +51,7 @@ def test_maven_delete_dry_run(self): test_bucket = self.mock_s3.Bucket(TEST_BUCKET) objs = list(test_bucket.objects.all()) - self.assertEqual(50, len(objs)) + self.assertEqual(68, len(objs)) def test_npm_upload_dry_run(self): test_tgz = os.path.join(os.getcwd(), "tests/input/code-frame-7.14.5.tgz")