From 00126e6d01ea18b9eb2a7cc439bf159ced7892f0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Jan 2024 21:22:55 +0000 Subject: [PATCH 01/31] Chore(deps): Bump jinja2 from 3.1.2 to 3.1.3 Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.2 to 3.1.3. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/3.1.2...3.1.3) --- updated-dependencies: - dependency-name: jinja2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 563abc26..10c75966 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ setuptools-rust==1.7.0 -Jinja2==3.1.2 +Jinja2==3.1.3 boto3==1.28.46 botocore==1.31.46 click==8.1.7 From 4010f82be4af553903541c0d65fca50e37209b5a Mon Sep 17 00:00:00 2001 From: Gang Li Date: Tue, 30 Jan 2024 20:29:32 +0800 Subject: [PATCH 02/31] Upgrade moto version to 3.0.7 --- tests/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/requirements.txt b/tests/requirements.txt index af22ba64..ff6f91ae 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -5,5 +5,5 @@ pytest-cov pytest-html flake8 requests-mock -moto==3.0.2.dev12 +moto==3.0.7 python-gnupg==0.5.0 From 23c94f05ccb079f820a3816d636053af370ffb2a Mon Sep 17 00:00:00 2001 From: Gang Li Date: Wed, 31 Jan 2024 21:44:13 +0800 Subject: [PATCH 03/31] Disable two linters in gh action --- .github/workflows/linters.yaml | 54 +++++++++++++++++----------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/.github/workflows/linters.yaml b/.github/workflows/linters.yaml index 86293445..92c165f1 100644 --- a/.github/workflows/linters.yaml +++ b/.github/workflows/linters.yaml @@ -29,16 +29,16 @@ jobs: - name: Run flake8 on python${{ matrix.python-version }} run: python -m tox -e flake8 - markdownlint: - name: Markdownlint - runs-on: ubuntu-latest + # markdownlint: + # name: Markdownlint + # runs-on: ubuntu-latest - steps: - - name: Check out repo - uses: actions/checkout@v2 + # steps: + # - name: Check out repo + # uses: actions/checkout@v2 - - name: Run markdownlint - uses: containerbuildsystem/actions/markdownlint@master + # - name: Run markdownlint + # uses: containerbuildsystem/actions/markdownlint@master pylint: name: Pylint analyzer for Python ${{ matrix.python-version }} @@ -91,22 +91,22 @@ jobs: # - name: Run mypy on python${{ matrix.python-version }} # run: python -m tox -e mypy - bandit: - name: Bandit analyzer for Python ${{ matrix.python-version }} - runs-on: ubuntu-latest - - strategy: - matrix: - python-version: [ "3.8" ] - - steps: - - uses: actions/checkout@v1 - - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip setuptools tox - - - name: Run bandit analyzer on python${{ matrix.python-version }} - run: python -m tox -e bandit + # bandit: + # name: Bandit analyzer for Python ${{ matrix.python-version }} + # runs-on: ubuntu-latest + + # strategy: + # matrix: + # python-version: [ "3.8" ] + + # steps: + # - uses: actions/checkout@v1 + # - uses: actions/setup-python@v4 + # with: + # python-version: ${{ matrix.python-version }} + # - name: Install dependencies + # run: | + # python -m pip install --upgrade pip setuptools tox + + # - name: Run bandit analyzer on python${{ matrix.python-version }} + # run: python -m tox -e bandit From 1f7e4f0613a0db9992c9b67ad31be19e7479c823 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Tue, 6 Feb 2024 20:22:00 +0800 Subject: [PATCH 04/31] Split commands into separate files --- charon/__init__.py | 6 - charon/cmd/__init__.py | 16 ++ charon/cmd/cmd_delete.py | 191 +++++++++++++++++ charon/cmd/cmd_upload.py | 214 +++++++++++++++++++ charon/cmd/command.py | 450 --------------------------------------- charon/cmd/internal.py | 100 +++++++++ setup.py | 2 +- 7 files changed, 522 insertions(+), 457 deletions(-) create mode 100644 charon/cmd/cmd_delete.py create mode 100644 charon/cmd/cmd_upload.py delete mode 100644 charon/cmd/command.py create mode 100644 charon/cmd/internal.py diff --git a/charon/__init__.py b/charon/__init__.py index ca82d84e..9eefcae0 100644 --- a/charon/__init__.py +++ b/charon/__init__.py @@ -13,9 +13,3 @@ See the License for the specific language governing permissions and limitations under the License. """ - -from charon.cmd.command import cli, upload, delete - -# init group command -cli.add_command(upload) -cli.add_command(delete) diff --git a/charon/cmd/__init__.py b/charon/cmd/__init__.py index 9eefcae0..a9834e1a 100644 --- a/charon/cmd/__init__.py +++ b/charon/cmd/__init__.py @@ -13,3 +13,19 @@ See the License for the specific language governing permissions and limitations under the License. """ +from click import group +from charon.cmd.cmd_upload import upload +from charon.cmd.cmd_delete import delete + + +@group() +def cli(): + """Charon is a tool to synchronize several types of + artifacts repository data to Red Hat Ronda + service (maven.repository.redhat.com). + """ + + +# init group command +cli.add_command(upload) +cli.add_command(delete) diff --git a/charon/cmd/cmd_delete.py b/charon/cmd/cmd_delete.py new file mode 100644 index 00000000..fd2bba5f --- /dev/null +++ b/charon/cmd/cmd_delete.py @@ -0,0 +1,191 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from typing import List + +from charon.config import get_config +from charon.utils.archive import detect_npm_archive, NpmArchiveType +from charon.pkgs.maven import handle_maven_del +from charon.pkgs.npm import handle_npm_del +from charon.cmd.internal import ( + _decide_mode, _validate_prod_key, + _get_local_repo, _get_buckets, + _get_ignore_patterns, _safe_delete +) +from click import command, option, argument + +import traceback +import logging +import os +import sys + +logger = logging.getLogger(__name__) + + +@argument( + "repo", + type=str, +) +@option( + "--product", + "-p", + help=""" + The product key, will combine with version to decide + the metadata of the files in tarball. + """, + nargs=1, + required=True, + multiple=False, +) +@option( + "--version", + "-v", + help=""" + The product version, will combine with product to decide + the metadata of the files in tarball. + """, + required=True, + multiple=False, +) +@option( + "--target", + "-t", + 'targets', + help=""" + The target to do the deletion, which will decide which s3 bucket + and what root path where all files will be deleted from. + Can accept more than one target. + """, + required=True, + multiple=True, +) +@option( + "--root_path", + "-r", + default="maven-repository", + help="""The root path in the tarball before the real maven paths, + will be trailing off before uploading + """, +) +@option( + "--ignore_patterns", + "-i", + multiple=True, + help=""" + The regex patterns list to filter out the files which should + not be allowed to upload to S3. Can accept more than one pattern. + """, +) +@option( + "--work_dir", + "-w", + help=""" + The temporary working directory into which archives should + be extracted, when needed. + """, +) +@option( + "--debug", + "-D", + help="Debug mode, will print all debug logs for problem tracking.", + is_flag=True, + default=False +) +@option( + "--quiet", + "-q", + help="Quiet mode, will shrink most of the logs except warning and errors.", + is_flag=True, + default=False +) +@option("--dryrun", "-n", is_flag=True, default=False) +@command() +def delete( + repo: str, + product: str, + version: str, + targets: List[str], + root_path="maven-repository", + ignore_patterns: List[str] = None, + work_dir: str = None, + debug=False, + quiet=False, + dryrun=False +): + """Roll back all files in a released product REPO from + Ronda Service. The REPO points to a product released + tarball which is hosted in a remote url or a local path. + """ + tmp_dir = work_dir + try: + _decide_mode(product, version, is_quiet=quiet, is_debug=debug) + if dryrun: + logger.info("Running in dry-run mode," + "no files will be deleted.") + if not _validate_prod_key(product, version): + return + conf = get_config() + if not conf: + sys.exit(1) + + aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile() + if not aws_profile: + logger.error("No AWS profile specified!") + sys.exit(1) + + archive_path = _get_local_repo(repo) + npm_archive_type = detect_npm_archive(archive_path) + product_key = f"{product}-{version}" + manifest_bucket_name = conf.get_manifest_bucket() + buckets = _get_buckets(targets, conf) + if npm_archive_type != NpmArchiveType.NOT_NPM: + logger.info("This is a npm archive") + tmp_dir, succeeded = handle_npm_del( + archive_path, + product_key, + buckets=buckets, + aws_profile=aws_profile, + dir_=work_dir, + dry_run=dryrun, + manifest_bucket_name=manifest_bucket_name + ) + if not succeeded: + sys.exit(1) + else: + ignore_patterns_list = None + if ignore_patterns: + ignore_patterns_list = ignore_patterns + else: + ignore_patterns_list = _get_ignore_patterns(conf) + logger.info("This is a maven archive") + tmp_dir, succeeded = handle_maven_del( + archive_path, + product_key, + ignore_patterns_list, + root=root_path, + buckets=buckets, + aws_profile=aws_profile, + dir_=work_dir, + dry_run=dryrun, + manifest_bucket_name=manifest_bucket_name + ) + if not succeeded: + sys.exit(1) + except Exception: + print(traceback.format_exc()) + sys.exit(2) # distinguish between exception and bad config or bad state + finally: + if not debug: + _safe_delete(tmp_dir) diff --git a/charon/cmd/cmd_upload.py b/charon/cmd/cmd_upload.py new file mode 100644 index 00000000..71c30295 --- /dev/null +++ b/charon/cmd/cmd_upload.py @@ -0,0 +1,214 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from typing import List + +from charon.config import get_config +from charon.utils.archive import detect_npm_archive, NpmArchiveType +from charon.pkgs.maven import handle_maven_uploading +from charon.pkgs.npm import handle_npm_uploading +from charon.cmd.internal import ( + _decide_mode, _validate_prod_key, + _get_local_repo, _get_buckets, + _get_ignore_patterns, _safe_delete +) +from click import command, option, argument + +import traceback +import logging +import os +import sys + +logger = logging.getLogger(__name__) + + +@argument( + "repo", + type=str, +) +@option( + "--product", + "-p", + help=""" + The product key, will combine with version to decide + the metadata of the files in tarball. + """, + nargs=1, + required=True, + multiple=False, +) +@option( + "--version", + "-v", + help=""" + The product version, will combine with key to decide + the metadata of the files in tarball. + """, + required=True, + multiple=False, +) +@option( + "--target", + "-t", + 'targets', + help=""" + The target to do the uploading, which will decide which s3 bucket + and what root path where all files will be uploaded to. + Can accept more than one target. + """, + required=True, + multiple=True, +) +@option( + "--root_path", + "-r", + default="maven-repository", + help=""" + The root path in the tarball before the real maven paths, + will be trailing off before uploading. + """, +) +@option( + "--ignore_patterns", + "-i", + multiple=True, + help=""" + The regex patterns list to filter out the files which should + not be allowed to upload to S3. Can accept more than one pattern. + """, +) +@option( + "--work_dir", + "-w", + help=""" + The temporary working directory into which archives should + be extracted, when needed. + """, +) +@option( + "--contain_signature", + "-s", + is_flag=True, + help=""" + Toggle signature generation and upload feature in charon. + """ +) +@option( + "--sign_key", + "-k", + help=""" + rpm-sign key to be used, will replace {{ key }} in default configuration for signature. + Does noting if detach_signature_command does not contain {{ key }} field. + """, +) +@option( + "--debug", + "-D", + help="Debug mode, will print all debug logs for problem tracking.", + is_flag=True, + default=False +) +@option( + "--quiet", + "-q", + help="Quiet mode, will shrink most of the logs except warning and errors.", + is_flag=True, + default=False +) +@option("--dryrun", "-n", is_flag=True, default=False) +@command() +def upload( + repo: str, + product: str, + version: str, + targets: List[str], + root_path="maven-repository", + ignore_patterns: List[str] = None, + work_dir: str = None, + contain_signature: bool = False, + sign_key: str = "redhatdevel", + debug=False, + quiet=False, + dryrun=False +): + """Upload all files from a released product REPO to Ronda + Service. The REPO points to a product released tarball which + is hosted in a remote url or a local path. + """ + tmp_dir = work_dir + try: + _decide_mode(product, version, is_quiet=quiet, is_debug=debug) + if dryrun: + logger.info("Running in dry-run mode," + "no files will be uploaded.") + if not _validate_prod_key(product, version): + return + conf = get_config() + if not conf: + sys.exit(1) + + aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile() + if not aws_profile: + logger.error("No AWS profile specified!") + sys.exit(1) + + archive_path = _get_local_repo(repo) + npm_archive_type = detect_npm_archive(archive_path) + product_key = f"{product}-{version}" + manifest_bucket_name = conf.get_manifest_bucket() + buckets = _get_buckets(targets, conf) + if npm_archive_type != NpmArchiveType.NOT_NPM: + logger.info("This is a npm archive") + tmp_dir, succeeded = handle_npm_uploading( + archive_path, + product_key, + buckets=buckets, + aws_profile=aws_profile, + dir_=work_dir, + gen_sign=contain_signature, + key=sign_key, + dry_run=dryrun, + manifest_bucket_name=manifest_bucket_name + ) + if not succeeded: + sys.exit(1) + else: + ignore_patterns_list = None + if ignore_patterns: + ignore_patterns_list = ignore_patterns + else: + ignore_patterns_list = _get_ignore_patterns(conf) + logger.info("This is a maven archive") + tmp_dir, succeeded = handle_maven_uploading( + archive_path, + product_key, + ignore_patterns_list, + root=root_path, + buckets=buckets, + aws_profile=aws_profile, + dir_=work_dir, + gen_sign=contain_signature, + key=sign_key, + dry_run=dryrun, + manifest_bucket_name=manifest_bucket_name + ) + if not succeeded: + sys.exit(1) + except Exception: + print(traceback.format_exc()) + sys.exit(2) # distinguish between exception and bad config or bad state + finally: + if not debug: + _safe_delete(tmp_dir) diff --git a/charon/cmd/command.py b/charon/cmd/command.py deleted file mode 100644 index 2ef88aed..00000000 --- a/charon/cmd/command.py +++ /dev/null @@ -1,450 +0,0 @@ -""" -Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" -from typing import List, Tuple - -from charon.config import CharonConfig, get_config -from charon.constants import DEFAULT_REGISTRY -from charon.utils.logs import set_logging -from charon.utils.archive import detect_npm_archive, download_archive, NpmArchiveType -from charon.pkgs.maven import handle_maven_uploading, handle_maven_del -from charon.pkgs.npm import handle_npm_uploading, handle_npm_del -from click import command, option, argument, group -from json import loads -from shutil import rmtree - -import traceback -import logging -import os -import sys - -logger = logging.getLogger(__name__) - - -@argument( - "repo", - type=str, -) -@option( - "--product", - "-p", - help=""" - The product key, will combine with version to decide - the metadata of the files in tarball. - """, - nargs=1, - required=True, - multiple=False, -) -@option( - "--version", - "-v", - help=""" - The product version, will combine with key to decide - the metadata of the files in tarball. - """, - required=True, - multiple=False, -) -@option( - "--target", - "-t", - 'targets', - help=""" - The target to do the uploading, which will decide which s3 bucket - and what root path where all files will be uploaded to. - Can accept more than one target. - """, - required=True, - multiple=True, -) -@option( - "--root_path", - "-r", - default="maven-repository", - help=""" - The root path in the tarball before the real maven paths, - will be trailing off before uploading. - """, -) -@option( - "--ignore_patterns", - "-i", - multiple=True, - help=""" - The regex patterns list to filter out the files which should - not be allowed to upload to S3. Can accept more than one pattern. - """, -) -@option( - "--work_dir", - "-w", - help=""" - The temporary working directory into which archives should - be extracted, when needed. - """, -) -@option( - "--contain_signature", - "-s", - is_flag=True, - help=""" - Toggle signature generation and upload feature in charon. - """ -) -@option( - "--sign_key", - "-k", - help=""" - rpm-sign key to be used, will replace {{ key }} in default configuration for signature. - Does noting if detach_signature_command does not contain {{ key }} field. - """, -) -@option( - "--debug", - "-D", - help="Debug mode, will print all debug logs for problem tracking.", - is_flag=True, - default=False -) -@option( - "--quiet", - "-q", - help="Quiet mode, will shrink most of the logs except warning and errors.", - is_flag=True, - default=False -) -@option("--dryrun", "-n", is_flag=True, default=False) -@command() -def upload( - repo: str, - product: str, - version: str, - targets: List[str], - root_path="maven-repository", - ignore_patterns: List[str] = None, - work_dir: str = None, - contain_signature: bool = False, - sign_key: str = "redhatdevel", - debug=False, - quiet=False, - dryrun=False -): - """Upload all files from a released product REPO to Ronda - Service. The REPO points to a product released tarball which - is hosted in a remote url or a local path. - """ - tmp_dir = work_dir - try: - __decide_mode(product, version, is_quiet=quiet, is_debug=debug) - if dryrun: - logger.info("Running in dry-run mode," - "no files will be uploaded.") - if not __validate_prod_key(product, version): - return - conf = get_config() - if not conf: - sys.exit(1) - - aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile() - if not aws_profile: - logger.error("No AWS profile specified!") - sys.exit(1) - - archive_path = __get_local_repo(repo) - npm_archive_type = detect_npm_archive(archive_path) - product_key = f"{product}-{version}" - manifest_bucket_name = conf.get_manifest_bucket() - buckets = __get_buckets(targets, conf) - if npm_archive_type != NpmArchiveType.NOT_NPM: - logger.info("This is a npm archive") - tmp_dir, succeeded = handle_npm_uploading( - archive_path, - product_key, - buckets=buckets, - aws_profile=aws_profile, - dir_=work_dir, - gen_sign=contain_signature, - key=sign_key, - dry_run=dryrun, - manifest_bucket_name=manifest_bucket_name - ) - if not succeeded: - sys.exit(1) - else: - ignore_patterns_list = None - if ignore_patterns: - ignore_patterns_list = ignore_patterns - else: - ignore_patterns_list = __get_ignore_patterns(conf) - logger.info("This is a maven archive") - tmp_dir, succeeded = handle_maven_uploading( - archive_path, - product_key, - ignore_patterns_list, - root=root_path, - buckets=buckets, - aws_profile=aws_profile, - dir_=work_dir, - gen_sign=contain_signature, - key=sign_key, - dry_run=dryrun, - manifest_bucket_name=manifest_bucket_name - ) - if not succeeded: - sys.exit(1) - except Exception: - print(traceback.format_exc()) - sys.exit(2) # distinguish between exception and bad config or bad state - finally: - if not debug: - __safe_delete(tmp_dir) - - -@argument( - "repo", - type=str, -) -@option( - "--product", - "-p", - help=""" - The product key, will combine with version to decide - the metadata of the files in tarball. - """, - nargs=1, - required=True, - multiple=False, -) -@option( - "--version", - "-v", - help=""" - The product version, will combine with product to decide - the metadata of the files in tarball. - """, - required=True, - multiple=False, -) -@option( - "--target", - "-t", - 'targets', - help=""" - The target to do the deletion, which will decide which s3 bucket - and what root path where all files will be deleted from. - Can accept more than one target. - """, - required=True, - multiple=True, -) -@option( - "--root_path", - "-r", - default="maven-repository", - help="""The root path in the tarball before the real maven paths, - will be trailing off before uploading - """, -) -@option( - "--ignore_patterns", - "-i", - multiple=True, - help=""" - The regex patterns list to filter out the files which should - not be allowed to upload to S3. Can accept more than one pattern. - """, -) -@option( - "--work_dir", - "-w", - help=""" - The temporary working directory into which archives should - be extracted, when needed. - """, -) -@option( - "--debug", - "-D", - help="Debug mode, will print all debug logs for problem tracking.", - is_flag=True, - default=False -) -@option( - "--quiet", - "-q", - help="Quiet mode, will shrink most of the logs except warning and errors.", - is_flag=True, - default=False -) -@option("--dryrun", "-n", is_flag=True, default=False) -@command() -def delete( - repo: str, - product: str, - version: str, - targets: List[str], - root_path="maven-repository", - ignore_patterns: List[str] = None, - work_dir: str = None, - debug=False, - quiet=False, - dryrun=False -): - """Roll back all files in a released product REPO from - Ronda Service. The REPO points to a product released - tarball which is hosted in a remote url or a local path. - """ - tmp_dir = work_dir - try: - __decide_mode(product, version, is_quiet=quiet, is_debug=debug) - if dryrun: - logger.info("Running in dry-run mode," - "no files will be deleted.") - if not __validate_prod_key(product, version): - return - conf = get_config() - if not conf: - sys.exit(1) - - aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile() - if not aws_profile: - logger.error("No AWS profile specified!") - sys.exit(1) - - archive_path = __get_local_repo(repo) - npm_archive_type = detect_npm_archive(archive_path) - product_key = f"{product}-{version}" - manifest_bucket_name = conf.get_manifest_bucket() - buckets = __get_buckets(targets, conf) - if npm_archive_type != NpmArchiveType.NOT_NPM: - logger.info("This is a npm archive") - tmp_dir, succeeded = handle_npm_del( - archive_path, - product_key, - buckets=buckets, - aws_profile=aws_profile, - dir_=work_dir, - dry_run=dryrun, - manifest_bucket_name=manifest_bucket_name - ) - if not succeeded: - sys.exit(1) - else: - ignore_patterns_list = None - if ignore_patterns: - ignore_patterns_list = ignore_patterns - else: - ignore_patterns_list = __get_ignore_patterns(conf) - logger.info("This is a maven archive") - tmp_dir, succeeded = handle_maven_del( - archive_path, - product_key, - ignore_patterns_list, - root=root_path, - buckets=buckets, - aws_profile=aws_profile, - dir_=work_dir, - dry_run=dryrun, - manifest_bucket_name=manifest_bucket_name - ) - if not succeeded: - sys.exit(1) - except Exception: - print(traceback.format_exc()) - sys.exit(2) # distinguish between exception and bad config or bad state - finally: - if not debug: - __safe_delete(tmp_dir) - - -def __get_buckets(targets: List[str], conf: CharonConfig) -> List[Tuple[str, str, str, str]]: - buckets = [] - for target in targets: - for bucket in conf.get_target(target): - aws_bucket = bucket.get('bucket') - prefix = bucket.get('prefix', '') - registry = bucket.get('registry', DEFAULT_REGISTRY) - buckets.append((target, aws_bucket, prefix, registry)) - return buckets - - -def __safe_delete(tmp_dir: str): - if tmp_dir and os.path.exists(tmp_dir): - logger.info("Cleaning up work directory: %s", tmp_dir) - try: - rmtree(tmp_dir) - except Exception as e: - logger.error("Failed to clear work directory. %s", e) - - -def __get_ignore_patterns(conf: CharonConfig) -> List[str]: - ignore_patterns = os.getenv("CHARON_IGNORE_PATTERNS") - if ignore_patterns: - try: - return loads(ignore_patterns) - except (ValueError, TypeError): - logger.warning("Warning: ignore_patterns %s specified in " - "system environment, but not a valid json " - "style array. Will skip it.", ignore_patterns) - if conf: - return conf.get_ignore_patterns() - return None - - -def __get_local_repo(url: str) -> str: - archive_path = url - if url.startswith("http://") or url.startswith("https://"): - logger.info("Start downloading tarball %s", url) - archive_path = download_archive(url) - logger.info("Tarball downloaded at: %s", archive_path) - return archive_path - - -def __validate_prod_key(product: str, version: str) -> bool: - if not product or product.strip() == "": - logger.error("Error: product can not be empty!") - return False - if not version or version.strip() == "": - logger.error("Error: version can not be empty!") - return False - if "," in product: - logger.error("Error: there are invalid characters in product!") - return False - if "," in version: - logger.error("Error: there are invalid characters in version!") - return False - return True - - -def __decide_mode(product: str, version: str, is_quiet: bool, is_debug: bool): - if is_quiet: - logger.info("Quiet mode enabled, " - "will only give warning and error logs.") - set_logging(product, version, level=logging.WARNING) - elif is_debug: - logger.info("Debug mode enabled, " - "will give all debug logs for tracing.") - set_logging(product, version, level=logging.DEBUG) - else: - set_logging(product, version, level=logging.INFO) - - -@group() -def cli(): - """Charon is a tool to synchronize several types of - artifacts repository data to Red Hat Ronda - service (maven.repository.redhat.com). - """ diff --git a/charon/cmd/internal.py b/charon/cmd/internal.py new file mode 100644 index 00000000..cb76559a --- /dev/null +++ b/charon/cmd/internal.py @@ -0,0 +1,100 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from typing import List, Tuple + +from charon.config import CharonConfig +from charon.constants import DEFAULT_REGISTRY +from charon.utils.logs import set_logging +from charon.utils.archive import download_archive +from json import loads +from shutil import rmtree + +import logging +import os + +logger = logging.getLogger(__name__) + + +def _get_buckets(targets: List[str], conf: CharonConfig) -> List[Tuple[str, str, str, str]]: + buckets = [] + for target in targets: + for bucket in conf.get_target(target): + aws_bucket = bucket.get('bucket') + prefix = bucket.get('prefix', '') + registry = bucket.get('registry', DEFAULT_REGISTRY) + buckets.append((target, aws_bucket, prefix, registry)) + return buckets + + +def _safe_delete(tmp_dir: str): + if tmp_dir and os.path.exists(tmp_dir): + logger.info("Cleaning up work directory: %s", tmp_dir) + try: + rmtree(tmp_dir) + except Exception as e: + logger.error("Failed to clear work directory. %s", e) + + +def _get_ignore_patterns(conf: CharonConfig) -> List[str]: + ignore_patterns = os.getenv("CHARON_IGNORE_PATTERNS") + if ignore_patterns: + try: + return loads(ignore_patterns) + except (ValueError, TypeError): + logger.warning("Warning: ignore_patterns %s specified in " + "system environment, but not a valid json " + "style array. Will skip it.", ignore_patterns) + if conf: + return conf.get_ignore_patterns() + return None + + +def _get_local_repo(url: str) -> str: + archive_path = url + if url.startswith("http://") or url.startswith("https://"): + logger.info("Start downloading tarball %s", url) + archive_path = download_archive(url) + logger.info("Tarball downloaded at: %s", archive_path) + return archive_path + + +def _validate_prod_key(product: str, version: str) -> bool: + if not product or product.strip() == "": + logger.error("Error: product can not be empty!") + return False + if not version or version.strip() == "": + logger.error("Error: version can not be empty!") + return False + if "," in product: + logger.error("Error: there are invalid characters in product!") + return False + if "," in version: + logger.error("Error: there are invalid characters in version!") + return False + return True + + +def _decide_mode(product: str, version: str, is_quiet: bool, is_debug: bool): + if is_quiet: + logger.info("Quiet mode enabled, " + "will only give warning and error logs.") + set_logging(product, version, level=logging.WARNING) + elif is_debug: + logger.info("Debug mode enabled, " + "will give all debug logs for tracing.") + set_logging(product, version, level=logging.DEBUG) + else: + set_logging(product, version, level=logging.INFO) diff --git a/setup.py b/setup.py index da42d21f..ae737355 100755 --- a/setup.py +++ b/setup.py @@ -48,6 +48,6 @@ package_data={'charon': ['schemas/*.json']}, test_suite="tests", entry_points={ - "console_scripts": ["charon = charon:cli"], + "console_scripts": ["charon = charon.cmd:cli"], }, ) From b711bf8775844c6bd32f0b33b96cef079c98a136 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Mon, 19 Feb 2024 16:36:38 +0800 Subject: [PATCH 05/31] Add content digest util method --- charon/utils/files.py | 31 ++++++++++++++++++++++--------- tests/test_util.py | 10 +++++++++- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/charon/utils/files.py b/charon/utils/files.py index ffe08bef..f15f77c4 100644 --- a/charon/utils/files.py +++ b/charon/utils/files.py @@ -58,9 +58,30 @@ def read_sha1(file: str) -> str: def digest(file: str, hash_type=HashType.SHA1) -> str: + hash_obj = _hash_object(hash_type) + # BUF_SIZE is totally arbitrary, change for your app! BUF_SIZE = 65536 # lets read stuff in 64kb chunks! + with open(file, "rb") as f: + while True: + data = f.read(BUF_SIZE) + if not data: + break + hash_obj.update(data) + + return hash_obj.hexdigest() + +def digest_content(content: str, hash_type=HashType.SHA1) -> str: + """This function will caculate the hash value for the string content with the specified + hash type + """ + hash_obj = _hash_object(hash_type) + hash_obj.update(content.encode('utf-8')) + return hash_obj.hexdigest() + + +def _hash_object(hash_type: HashType): hash_obj = None if hash_type == HashType.SHA1: hash_obj = hashlib.sha1() @@ -70,15 +91,7 @@ def digest(file: str, hash_type=HashType.SHA1) -> str: hash_obj = hashlib.md5() else: raise Exception("Error: Unknown hash type for digesting.") - - with open(file, "rb") as f: - while True: - data = f.read(BUF_SIZE) - if not data: - break - hash_obj.update(data) - - return hash_obj.hexdigest() + return hash_obj def write_manifest(paths: List[str], root: str, product_key: str) -> Tuple[str, str]: diff --git a/tests/test_util.py b/tests/test_util.py index 35c9deff..7105491d 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ -from charon.utils.files import digest, read_sha1, HashType +from charon.utils.files import digest, digest_content, read_sha1, HashType import os import unittest @@ -29,6 +29,14 @@ def test_digest(self): digest(test_file, HashType.SHA256), ) + def test_digest_content(self): + test_content = "test common content" + self.assertEqual("8c7b70f25fb88bc6a0372f70f6805132e90e2029", digest_content(test_content)) + self.assertEqual( + "1a1c26da1f6830614ed0388bb30d9e849e05bba5de4031e2a2fa6b48032f5354", + digest_content(test_content, HashType.SHA256), + ) + def test_read_sha1(self): test_file = os.path.join(INPUTS, "commons-lang3.zip") # read the real sha1 hash From e7cf9ef2319220434f56a5d7a2571335cc72acbf Mon Sep 17 00:00:00 2001 From: Gang Li Date: Fri, 9 Feb 2024 18:02:40 +0800 Subject: [PATCH 06/31] Add command for re-index of folder --- .gitignore | 3 + README.md | 10 ++++ charon/cmd/__init__.py | 2 + charon/cmd/cmd_index.py | 120 ++++++++++++++++++++++++++++++++++++++ charon/pkgs/indexing.py | 77 +++++++++++++++++++++--- charon/storage.py | 93 +++++++++++++++++++++++++++-- tests/test_maven_index.py | 82 +++++++++++++++++++++++++- tests/test_npm_index.py | 106 +++++++++++++++++++++++++++++++++ tests/test_s3client.py | 73 +++++++++++++++++++++++ 9 files changed, 553 insertions(+), 13 deletions(-) create mode 100644 charon/cmd/cmd_index.py diff --git a/.gitignore b/.gitignore index 8ca90496..b32671f8 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,6 @@ package/ # Unit test __pytest_reports htmlcov + +# Generated when local run +*.log diff --git a/README.md b/README.md index cdd1608d..887166c0 100644 --- a/README.md +++ b/README.md @@ -96,3 +96,13 @@ This command will delete some paths from repo in S3. but not delete the artifacts themselves. * During or after the paths' deletion, regenerate the metadata files and index files for both types. + +### charon-index: refresh the index.html for the specified path + +```bash +usage: charon index $PATH [-t, --target] [-D, --debug] [-q, --quiet] +``` + +This command will refresh the index.html for the specified path. + +* Note that if the path is a NPM metadata path which contains package.json, this refreshment will not work because this type of folder will display the package.json instead of the index.html in http request. diff --git a/charon/cmd/__init__.py b/charon/cmd/__init__.py index a9834e1a..9a3084d0 100644 --- a/charon/cmd/__init__.py +++ b/charon/cmd/__init__.py @@ -16,6 +16,7 @@ from click import group from charon.cmd.cmd_upload import upload from charon.cmd.cmd_delete import delete +from charon.cmd.cmd_index import index @group() @@ -29,3 +30,4 @@ def cli(): # init group command cli.add_command(upload) cli.add_command(delete) +cli.add_command(index) diff --git a/charon/cmd/cmd_index.py b/charon/cmd/cmd_index.py new file mode 100644 index 00000000..281ed876 --- /dev/null +++ b/charon/cmd/cmd_index.py @@ -0,0 +1,120 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +from charon.config import get_config +from charon.cmd.internal import _decide_mode +from charon.pkgs.indexing import re_index +from charon.constants import PACKAGE_TYPE_MAVEN, PACKAGE_TYPE_NPM +from click import command, option, argument + +import traceback +import logging +import os +import sys + +logger = logging.getLogger(__name__) + + +@argument( + "path", + type=str, +) +@option( + "--target", + "-t", + help=""" + The target to do the index refreshing, which will decide + which s3 bucket and what root path where all files will + be deleted from. + """, + required=True +) +@option( + "--debug", + "-D", + help="Debug mode, will print all debug logs for problem tracking.", + is_flag=True, + default=False +) +@option( + "--quiet", + "-q", + help="Quiet mode, will shrink most of the logs except warning and errors.", + is_flag=True, + default=False +) +@option("--dryrun", "-n", is_flag=True, default=False) +@command() +def index( + path: str, + target: str, + debug: bool = False, + quiet: bool = False, + dryrun: bool = False +): + """This command will re-generate the index.html files for the + specified path. + """ + _decide_mode( + "index-{}".format(target), path.replace("/", "_"), + is_quiet=quiet, is_debug=debug + ) + try: + conf = get_config() + if not conf: + sys.exit(1) + + aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile() + if not aws_profile: + logger.error("No AWS profile specified!") + sys.exit(1) + + tgt = conf.get_target(target) + if not tgt: + # log is recorded get_target + sys.exit(1) + + aws_bucket = None + prefix = None + for b in conf.get_target(target): + aws_bucket = b.get('bucket') + prefix = b.get('prefix', '') + + package_type = None + if "maven" in aws_bucket: + logger.info( + "The target is a maven repository. Will refresh the index as maven package type" + ) + package_type = PACKAGE_TYPE_MAVEN + elif "npm" in aws_bucket: + package_type = PACKAGE_TYPE_NPM + logger.info( + "The target is a npm repository. Will refresh the index as npm package type" + ) + else: + logger.error( + "The target is not supported. Only maven or npm target is supported." + ) + sys.exit(1) + + if not aws_bucket: + logger.error("No bucket specified!") + sys.exit(1) + + re_index(aws_bucket, prefix, path, package_type, aws_profile, dryrun) + except Exception: + print(traceback.format_exc()) + sys.exit(2) # distinguish between exception and bad config or bad state diff --git a/charon/pkgs/indexing.py b/charon/pkgs/indexing.py index f478e0a5..b342c071 100644 --- a/charon/pkgs/indexing.py +++ b/charon/pkgs/indexing.py @@ -17,6 +17,7 @@ from charon.storage import S3Client from charon.constants import (INDEX_HTML_TEMPLATE, NPM_INDEX_HTML_TEMPLATE, PACKAGE_TYPE_MAVEN, PACKAGE_TYPE_NPM, PROD_INFO_SUFFIX) +from charon.utils.files import digest_content from jinja2 import Template import os import logging @@ -149,6 +150,17 @@ def __generate_index_html( def __to_html(package_type: str, contents: List[str], folder: str, top_level: str) -> str: + html_content = __to_html_content(package_type, contents, folder) + html_path = os.path.join(top_level, folder, "index.html") + if folder == "/": + html_path = os.path.join(top_level, "index.html") + os.makedirs(os.path.dirname(html_path), exist_ok=True) + with open(html_path, 'w', encoding='utf-8') as html: + html.write(html_content) + return html_path + + +def __to_html_content(package_type: str, contents: List[str], folder: str) -> str: items = [] if folder != "/": items.append("../") @@ -160,13 +172,7 @@ def __to_html(package_type: str, contents: List[str], folder: str, top_level: st items.extend(contents) items = __sort_index_items(items) index = IndexedHTML(title=folder, header=folder, items=items) - html_path = os.path.join(top_level, folder, "index.html") - if folder == "/": - html_path = os.path.join(top_level, "index.html") - os.makedirs(os.path.dirname(html_path), exist_ok=True) - with open(html_path, 'w', encoding='utf-8') as html: - html.write(index.generate_index_file_content(package_type)) - return html_path + return index.generate_index_file_content(package_type) def __sort_index_items(items): @@ -250,3 +256,60 @@ def __compare(self, other) -> int: return -1 else: return 0 + + +def re_index( + bucket: str, + prefix: str, + path: str, + package_type: str, + aws_profile: str = None, + dry_run: bool = False +): + """Refresh the index.html for the specified folder in the bucket. + """ + s3_client = S3Client(aws_profile=aws_profile, dry_run=dry_run) + s3_folder = os.path.join(prefix, path) + if path.strip() == "" or path.strip() == "/": + s3_folder = prefix + items: List[str] = s3_client.list_folder_content(bucket, s3_folder) + contents = [i for i in items if not i.endswith(PROD_INFO_SUFFIX)] + if PACKAGE_TYPE_NPM == package_type: + if any([True if "package.json" in c else False for c in contents]): + logger.warn( + "The path %s contains NPM package.json which will work as " + "package metadata for indexing. This indexing is ignored.", + path + ) + return + + if len(contents) >= 1: + real_contents = [] + if prefix and prefix.strip() != "": + for c in contents: + if c.strip() != "": + if c.startswith(prefix): + real_c = remove_prefix(c, prefix) + real_c = remove_prefix(real_c, "/") + real_contents.append(real_c) + else: + real_contents.append(c) + else: + real_contents = contents + logger.debug(real_contents) + index_content = __to_html_content(package_type, real_contents, path) + if not dry_run: + index_path = os.path.join(path, "index.html") + if path == "/": + index_path = "index.html" + s3_client.simple_delete_file(index_path, (bucket, prefix)) + s3_client.simple_upload_file( + index_path, index_content, (bucket, prefix), + "text/html", digest_content(index_content) + ) + else: + logger.warning( + "The path %s does not contain any contents in bucket %s. " + "Will not do any re-indexing", + path, bucket + ) diff --git a/charon/storage.py b/charon/storage.py index 45963c42..f07b1c50 100644 --- a/charon/storage.py +++ b/charon/storage.py @@ -530,10 +530,11 @@ def delete_files( self, file_paths: List[str], target: Tuple[str, str], product: Optional[str], root="/" ) -> List[str]: - """ Deletes a list of files to s3 bucket. * Use the cut down file path as s3 key. The cut - down way is move root from the file path if it starts with root. Example: if file_path is - /tmp/maven-repo/org/apache/.... and root is /tmp/maven-repo Then the key will be - org/apache/..... + """ Deletes a list of files to s3 bucket. + * Use the cut down file path as s3 key. The cut + down way is move root from the file path if it starts with root. + Example: if file_path is /tmp/maven-repo/org/apache/.... and + root is /tmp/maven-repo Then the key will be org/apache/..... * The removing will happen with conditions of product checking. First the deletion will remove The product from the file metadata "rh-products". After the metadata removing, if there still are extra products left in that metadata, the file will not @@ -637,6 +638,90 @@ async def path_delete_handler( return failed_files + def simple_delete_file( + self, file_path: str, target: Tuple[str, str] + ): + """ Deletes file in s3 bucket, regardless of any extra + information like product and version info. + * Warning: this will directly delete the files even if + it has lots of product info, so please be careful to use. + If you want to delete product artifact files, please use + delete_files + """ + bucket = target[0] + prefix = target[1] + bucket_obj = self.__get_bucket(bucket) + path_key = os.path.join(prefix, file_path) + file_object = bucket_obj.Object(path_key) + existed = False + try: + existed = self.__file_exists(file_object) + if existed: + bucket_obj.delete_objects(Delete={"Objects": [{"Key": path_key}]}) + else: + logger.warning( + 'Warning: File %s does not exist in S3 bucket %s, will ignore its deleting', + file_path, bucket + ) + except (ClientError, HTTPClientError) as e: + logger.error( + "Error: file existence check failed due to error: %s", e + ) + + def simple_upload_file( + self, file_path: str, file_content: str, + target: Tuple[str, str], + mime_type: str = None, + check_sum_sha1: str = None + ): + """ Uploads file to s3 bucket, regardless of any extra + information like product and version info. + * Warning: this will directly delete the files even if + it has lots of product info, so please be careful to use. + If you want to upload product artifact files, please use + upload_files + """ + bucket = target[0] + prefix = target[1] + bucket_obj = self.__get_bucket(bucket) + path_key = os.path.join(prefix, file_path) + file_object = bucket_obj.Object(path_key) + existed = False + logger.debug( + 'Uploading %s to bucket %s', path_key, bucket + ) + existed = False + try: + existed = self.__file_exists(file_object) + except (ClientError, HTTPClientError) as e: + logger.error( + "Error: file existence check failed due to error: %s", e + ) + return + + content_type = mime_type + if not content_type: + content_type = DEFAULT_MIME_TYPE + if not existed: + f_meta = {} + if check_sum_sha1 and check_sum_sha1.strip() != "": + f_meta[CHECKSUM_META_KEY] = check_sum_sha1 + try: + if not self.__dry_run: + file_object.put( + Body=file_content, + Metadata=f_meta, + ContentType=content_type + ) + logger.debug('Uploaded %s to bucket %s', file_path, bucket) + except (ClientError, HTTPClientError) as e: + logger.error( + "ERROR: file %s not uploaded to bucket %s due to error: %s ", + file_path, bucket, e + ) + else: + raise FileExistsError("Error: file %s already exists, upload is forbiden.") + def delete_manifest(self, product_key: str, target: str, manifest_bucket_name: str): if not manifest_bucket_name: logger.warning( diff --git a/tests/test_maven_index.py b/tests/test_maven_index.py index d5647ecd..7468310d 100644 --- a/tests/test_maven_index.py +++ b/tests/test_maven_index.py @@ -15,6 +15,7 @@ """ from charon.constants import PROD_INFO_SUFFIX from charon.pkgs.maven import handle_maven_uploading, handle_maven_del +from charon.pkgs.indexing import re_index from charon.storage import CHECKSUM_META_KEY from charon.utils.strings import remove_prefix from tests.base import LONG_TEST_PREFIX, SHORT_TEST_PREFIX, PackageBaseTest @@ -45,8 +46,6 @@ def test_uploading_index(self): objs = list(test_bucket.objects.all()) actual_files = [obj.key for obj in objs] - self.assertEqual(41, len(actual_files)) - for f in COMMONS_LOGGING_INDEXES: self.assertIn(f, actual_files) @@ -127,6 +126,85 @@ def test_overlap_upload_index(self): self.assertNotIn("../", index_content) self.assertNotIn(PROD_INFO_SUFFIX, index_content) + def test_re_index(self): + test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") + product = "commons-client-4.5.6" + handle_maven_uploading( + test_zip, product, + buckets=[('', TEST_BUCKET, '', '')], + dir_=self.tempdir + ) + + test_bucket = self.mock_s3.Bucket(TEST_BUCKET) + objs = list(test_bucket.objects.all()) + actual_files = [obj.key for obj in objs] + + for f in COMMONS_CLIENT_456_INDEXES: + self.assertIn(f, actual_files) + + self.check_content(objs, [product]) + + indedx_obj = test_bucket.Object(COMMONS_CLIENT_INDEX) + index_content = str(indedx_obj.get()["Body"].read(), "utf-8") + self.assertIn('../', index_content) + self.assertIn('4.5.6/', index_content) + self.assertIn( + '' + 'maven-metadata.xml', + index_content + ) + self.assertIn( + '' + 'maven-metadata.xml.md5', + index_content + ) + self.assertIn( + '' + 'maven-metadata.xml.sha1', + index_content + ) + self.assertIn( + '' + 'maven-metadata.xml.sha256', + index_content + ) + self.assertNotIn("4.5.7/", index_content) + + # insert new in commons-client + commons_client_root = "org/apache/httpcomponents/httpclient/" + commons_client_457_test = commons_client_root + "4.5.7/httpclient-4.5.7.txt" + self.mock_s3.Bucket(TEST_BUCKET).put_object( + Key=commons_client_457_test, + Body="Just a test content" + ) + re_index(TEST_BUCKET, "", commons_client_root, "maven") + indedx_obj = test_bucket.Object(COMMONS_CLIENT_INDEX) + index_content = str(indedx_obj.get()["Body"].read(), "utf-8") + self.assertIn('../', index_content) + self.assertIn('4.5.6/', index_content) + self.assertIn( + '' + 'maven-metadata.xml', + index_content + ) + self.assertIn( + '' + 'maven-metadata.xml.md5', + index_content + ) + self.assertIn( + '' + 'maven-metadata.xml.sha1', + index_content + ) + self.assertIn( + '' + 'maven-metadata.xml.sha256', + index_content + ) + self.assertIn("4.5.7/", index_content) + self.assertNotIn(PROD_INFO_SUFFIX, index_content) + def test_upload_index_with_short_prefix(self): self.__test_upload_index_with_prefix(SHORT_TEST_PREFIX) diff --git a/tests/test_npm_index.py b/tests/test_npm_index.py index fa0ebc3a..02dc64e0 100644 --- a/tests/test_npm_index.py +++ b/tests/test_npm_index.py @@ -15,6 +15,7 @@ """ from charon.constants import PROD_INFO_SUFFIX, DEFAULT_REGISTRY from charon.pkgs.npm import handle_npm_uploading, handle_npm_del +from charon.pkgs.indexing import re_index from charon.storage import CHECKSUM_META_KEY from tests.base import LONG_TEST_PREFIX, SHORT_TEST_PREFIX, PackageBaseTest from tests.commons import ( @@ -182,3 +183,108 @@ def __prepare_content(self, prefix: str = None): buckets=[('', TEST_BUCKET, prefix, DEFAULT_REGISTRY)], dir_=self.tempdir ) + + def test_re_index(self): + test_tgz = os.path.join(INPUTS, "code-frame-7.14.5.tgz") + product_7_14_5 = "code-frame-7.14.5" + prefix = SHORT_TEST_PREFIX + + handle_npm_uploading( + test_tgz, product_7_14_5, + buckets=[('', TEST_BUCKET, SHORT_TEST_PREFIX, DEFAULT_REGISTRY)], + dir_=self.tempdir, + ) + + test_bucket = self.mock_s3.Bucket(TEST_BUCKET) + objs = list(test_bucket.objects.all()) + actual_files = [obj.key for obj in objs] + + prefixed_7158_indexes = [ + os.path.join(prefix, f) for f in CODE_FRAME_7_15_8_INDEXES + ] + prefixed_namespace_babel_index = os.path.join(prefix, NAMESPACE_BABEL_INDEX) + prefixed_root_index = os.path.join(prefix, COMMONS_ROOT_INDEX) + + for assert_file in prefixed_7158_indexes: + self.assertNotIn(assert_file, actual_files) + + # test package path + index_obj = test_bucket.Object(prefixed_namespace_babel_index) + index_content = str(index_obj.get()["Body"].read(), "utf-8") + self.assertIn('code-frame/', + index_content) + test_file_path = os.path.join(prefix, "@babel/test/test-file.txt") + self.assertNotIn( + '' + 'test/test-file.txt', index_content + ) + # Add entry and re-index package path + test_bucket.put_object( + Key=test_file_path, Body="test content" + ) + re_index(TEST_BUCKET, prefix, "@babel/", "npm") + index_obj = test_bucket.Object(prefixed_namespace_babel_index) + index_content = str(index_obj.get()["Body"].read(), "utf-8") + self.assertIn( + 'code-frame/', index_content + ) + self.assertIn( + 'test/', index_content + ) + self.assertIn( + '../', index_content + ) + self.assertNotIn(PROD_INFO_SUFFIX, index_content) + + # test root path + index_obj = test_bucket.Object(prefixed_root_index) + index_content = str(index_obj.get()["Body"].read(), "utf-8") + self.assertIn('@babel/', index_content) + test_file_path = os.path.join(prefix, "test/test-file.txt") + self.assertNotIn( + '' + 'test/test-file.txt', index_content + ) + # Add entry and re-index root + test_bucket.put_object( + Key=test_file_path, Body="test content" + ) + re_index(TEST_BUCKET, prefix, "/", "npm") + index_obj = test_bucket.Object(prefixed_root_index) + index_content = str(index_obj.get()["Body"].read(), "utf-8") + self.assertIn('@babel/', index_content) + self.assertIn( + '' + 'test/', index_content + ) + self.assertNotIn('../', index_content) + self.assertNotIn(PROD_INFO_SUFFIX, index_content) + + # Test metadata path + metadata_path = "@babel/code-frame/" + objs = list(test_bucket.objects.all()) + actual_files = [obj.key for obj in objs] + self.assertIn( + os.path.join(prefix, metadata_path, "package.json"), + actual_files + ) + self.assertNotIn( + os.path.join(prefix, metadata_path, "index.html"), + actual_files + ) + # Add entry and re-index metadata path + test_file_path = os.path.join(prefix, metadata_path, "test/test-file.txt") + test_bucket.put_object( + Key=test_file_path, Body="test content" + ) + re_index(TEST_BUCKET, prefix, metadata_path, "npm") + objs = list(test_bucket.objects.all()) + actual_files = [obj.key for obj in objs] + self.assertIn( + os.path.join(prefix, metadata_path, "package.json"), + actual_files + ) + self.assertNotIn( + os.path.join(prefix, metadata_path, "index.html"), + actual_files + ) diff --git a/tests/test_s3client.py b/tests/test_s3client.py index 1c78db2b..b33e68d7 100644 --- a/tests/test_s3client.py +++ b/tests/test_s3client.py @@ -390,6 +390,79 @@ def test_exists_override_failing(self): file_obj = bucket.Object(path) self.assertEqual(sha1, file_obj.metadata[CHECKSUM_META_KEY]) + def test_simple_upload_file(self): + (temp_root, _, all_files) = self.__prepare_files() + for file_path in all_files: + file_key = file_path[len(temp_root) + 1:] + file_content = open(file_path, "rb").read() + sha1 = read_sha1(file_path) + self.s3_client.simple_upload_file( + file_path=file_key, + file_content=file_content, + check_sum_sha1=sha1, + target=(MY_BUCKET, '') + ) + bucket = self.mock_s3.Bucket(MY_BUCKET) + + objects = list(bucket.objects.all()) + self.assertEqual(len(all_files), len(objects)) + file_path = all_files[0] + file_key = file_path[len(temp_root) + 1:] + file_content = open(file_path, "rb").read() + sha1 = read_sha1(file_path) + obj = bucket.Object(file_key) + self.assertEqual(sha1, obj.metadata[CHECKSUM_META_KEY]) + self.assertEqual(file_key, obj.key) + self.assertEqual( + str(file_content, sys.getdefaultencoding()), + str(obj.get()["Body"].read(), sys.getdefaultencoding()) + ) + + # test upload exists + self.assertRaises( + FileExistsError, + self.s3_client.simple_upload_file, + file_path=file_key, + file_content="file_content", + check_sum_sha1=sha1, + target=(MY_BUCKET, '') + ) + + shutil.rmtree(temp_root) + + def test_simple_delete_file(self): + # prepare files + (temp_root, _, all_files) = self.__prepare_files() + for file_path in all_files: + file_key = file_path[len(temp_root) + 1:] + file_content = open(file_path, "rb").read() + sha1 = read_sha1(file_path) + self.s3_client.simple_upload_file( + file_path=file_key, + file_content=file_content, + check_sum_sha1=sha1, + target=(MY_BUCKET, '') + ) + bucket = self.mock_s3.Bucket(MY_BUCKET) + + objects = list(bucket.objects.all()) + self.assertEqual(len(all_files), len(objects)) + + # test delete file start + file_key = all_files[0][len(temp_root) + 1:] + objects = list(bucket.objects.all()) + self.assertIn(file_key, [o.key for o in objects]) + self.s3_client.simple_delete_file( + file_path=file_key, + target=(MY_BUCKET, "") + ) + + objects = list(bucket.objects.all()) + self.assertEqual(len(all_files) - 1, len(objects)) + self.assertNotIn(file_key, [o.key for o in objects]) + + shutil.rmtree(temp_root) + def __prepare_files(self): test_zip = zipfile.ZipFile( os.path.join(INPUTS, "commons-lang3.zip") From 7abdd298fd794fd4ad9014e6a897759b6199bd06 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Fri, 23 Feb 2024 19:36:47 +0800 Subject: [PATCH 07/31] Fix a wrong logger typo --- charon/pkgs/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charon/pkgs/indexing.py b/charon/pkgs/indexing.py index b342c071..2c840b47 100644 --- a/charon/pkgs/indexing.py +++ b/charon/pkgs/indexing.py @@ -276,7 +276,7 @@ def re_index( contents = [i for i in items if not i.endswith(PROD_INFO_SUFFIX)] if PACKAGE_TYPE_NPM == package_type: if any([True if "package.json" in c else False for c in contents]): - logger.warn( + logger.warning( "The path %s contains NPM package.json which will work as " "package metadata for indexing. This indexing is ignored.", path From f004f479301f422ef8d503313614ce9cc9b552c9 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Mon, 19 Feb 2024 16:41:15 +0800 Subject: [PATCH 08/31] Add command of checksum validation by using http way --- README.md | 8 ++ charon/cmd/__init__.py | 2 + charon/cmd/cmd_checksum.py | 154 ++++++++++++++++++++ charon/pkgs/checksum_http.py | 262 +++++++++++++++++++++++++++++++++++ requirements.txt | 3 + 5 files changed, 429 insertions(+) create mode 100644 charon/cmd/cmd_checksum.py create mode 100644 charon/pkgs/checksum_http.py diff --git a/README.md b/README.md index 887166c0..af0f6c89 100644 --- a/README.md +++ b/README.md @@ -106,3 +106,11 @@ usage: charon index $PATH [-t, --target] [-D, --debug] [-q, --quiet] This command will refresh the index.html for the specified path. * Note that if the path is a NPM metadata path which contains package.json, this refreshment will not work because this type of folder will display the package.json instead of the index.html in http request. + +### charon-validate: validate the checksum of files in specified path in a maven repository + +```bash +usage: charon validate $path [-t, --target] [-f, --report_file_path] [-i, --includes] [-r, --recursive] [-D, --debug] [-q, --quiet] +``` + +This command will validate the checksum of the specified path for the maven repository. It will calculate the sha1 checksum of all artifact files in the specified path and compare with the companied .sha1 files of the artifacts, then record all mismatched artifacts in the report file. If some artifact files misses the companied .sha1 files, they will also be recorded. diff --git a/charon/cmd/__init__.py b/charon/cmd/__init__.py index 9a3084d0..9ff53846 100644 --- a/charon/cmd/__init__.py +++ b/charon/cmd/__init__.py @@ -17,6 +17,7 @@ from charon.cmd.cmd_upload import upload from charon.cmd.cmd_delete import delete from charon.cmd.cmd_index import index +from charon.cmd.cmd_checksum import validate @group() @@ -31,3 +32,4 @@ def cli(): cli.add_command(upload) cli.add_command(delete) cli.add_command(index) +cli.add_command(validate) diff --git a/charon/cmd/cmd_checksum.py b/charon/cmd/cmd_checksum.py new file mode 100644 index 00000000..b06c01ce --- /dev/null +++ b/charon/cmd/cmd_checksum.py @@ -0,0 +1,154 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from typing import List + +from charon.config import get_config +from charon.pkgs.checksum_http import handle_checksum_validation_http +from charon.cmd.internal import _decide_mode +from click import command, option, argument + +import traceback +import logging +import os +import sys + +logger = logging.getLogger(__name__) + + +@argument( + "path", + type=str +) +@option( + "--debug", + "-D", + "debug", + help="Debug mode, will print all debug logs for problem tracking.", + is_flag=True, + default=False +) +@option( + "--quiet", + "-q", + "quiet", + help="Quiet mode, will shrink most of the logs except warning and errors.", + is_flag=True, + default=False +) +@option( + "--skip", + "-k", + "skips", + multiple=True, + help=""" + Paths to be skipped. This is used for recursive mode when $PATH has sub folders. + """ +) +@option( + "--recursive", + "-r", + "recursive", + help=""" + Decide if do validation recursively in the specified path. + Warning: if the path is high level which contains lots of sub path(e.g org/ + or com/), set this flag will take very long time to do the validation. + """, + is_flag=True, + default=False +) +@option( + "--report-file-path", + "-f", + "report_file_path", + help=""" + The path where the final report files will be generated + """ +) +@option( + "--includes", + "-i", + "includes", + help=""" + The comma splitted file suffix for all files that need to + validate. e.g, ".jar,.pom,.xml". If not specified, will use + default file types + """ +) +@option( + "--target", + "-t", + "target", + help=""" + The target to do the uploading, which will decide which s3 bucket + and what root path where all files will be uploaded to. + Can accept more than one target. + """, + required=True +) +@command() +def validate( + path: str, + target: str, + includes: List[str], + report_file_path: str, + skips: List[str], + recursive: bool = False, + quiet: bool = False, + debug: bool = False +): + """This command will validate the checksum of the specified path for the + maven repository. It will calculate the sha1 checksum of all artifact + files in the specified path and compare with the companied .sha1 files + of the artifacts, then record all mismatched artifacts in the report file. + If some artifact files misses the companied .sha1 files, they will also + be recorded. + """ + _decide_mode( + "checksum-{}".format(target), path.replace("/", "_"), + is_quiet=quiet, is_debug=debug + ) + try: + conf = get_config() + if not conf: + sys.exit(1) + + aws_bucket = "" + root_path = "" + t = conf.get_target(target) + if not t: + sys.exit(1) + for b in t: + aws_bucket = b.get('bucket') + prefix = b.get('prefix', '') + + # NOTE: This is a liitle hacky, which constrain the configuration of + # of target should define the bucket to contain "prod-maven" + # or "stage-maven" to decide that the bucket is for maven repo + # in our defined aws env for production or stage + if "prod-maven" not in aws_bucket and "stage-maven" not in aws_bucket: + logger.error("The target %s is not a maven repository.", target) + sys.exit(1) + + root_path = os.path.join(prefix, path) + skip_paths = [os.path.join(prefix, p) for p in skips if p != "" and p != "/"] + if path == "/": + root_path = prefix + handle_checksum_validation_http( + aws_bucket, root_path, includes, report_file_path, recursive, skip_paths + ) + except Exception: + print(traceback.format_exc()) + sys.exit(2) diff --git a/charon/pkgs/checksum_http.py b/charon/pkgs/checksum_http.py new file mode 100644 index 00000000..a3099f37 --- /dev/null +++ b/charon/pkgs/checksum_http.py @@ -0,0 +1,262 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from charon.utils.files import digest +from typing import Tuple, List, Dict +from bs4 import BeautifulSoup +import tempfile +import os +import logging +import requests +import shutil + +logger = logging.getLogger(__name__) + +DEFAULT_ARTIFACT_TYPES = ['.pom', '.jar', '.war', '.ear', '.zip', '.tar', '.gz', '.xml'] + + +def handle_checksum_validation_http( + bucket: str, + path: str, + includes: str, + report_file_path: str, + recursive: bool = False, + skips: List[str] = None +): + """ Handle the checksum check for maven artifacts. + * target contains bucket name and prefix for the bucket, which will + be used to store artifacts with the prefix. See target definition + in Charon configuration for details. + * path is the root path where to start the validation in the bucket. + * includes are the file suffixes which will decide the types of files + to do the validation. + * recursive decide if to validate the path recursively, default false. + Becareful to set true because it will be very time-consuming to do the + recursive validation as it will recursively scan all sub paths in + the path. + + This will generate a file contains all artifacts which mismatched with its + checksum files. Will use sha1 to do the validation. + """ + local_dir = tempfile.mkdtemp() + results = ([], [], []) + try: + if not os.path.exists(local_dir): + os.makedirs(local_dir) + root_url = _decide_root_url(bucket) + logger.debug("Root url is %s", root_url) + _collect_invalid_files( + root_url, path, includes, local_dir, recursive, skips, results + ) + finally: + shutil.rmtree(local_dir) + if results and any([ + results[0] and len(results[0]) > 0, + results[1] and len(results[1]) > 0, + results[2] and len(results[2]) > 0 + ]): + _gen_report(report_file_path, results) + + +def _collect_invalid_files( + root_url: str, + path: str, + includes: str, + work_dir: str, + recursive: bool, + skips: List[str], + results: Tuple[List[str], List[str], List[Dict[str, str]]] +): + if skips and path in skips: + logger.info("Path %s is in skips list, will not check it", path) + return + logger.info("Validating path %s", path) + + try: + folder_url = os.path.join(root_url, path) + items = _list_folder_content(folder_url, path) + sub_folders = [item for item in items if item.endswith("/")] + files = [item for item in items if not item.endswith("/")] + if path+"/" in sub_folders: + sub_folders.remove(path+"/") + logger.debug("Folders in path %s: %s", path, sub_folders) + logger.debug("Files in path %s: %s", path, files) + include_types = DEFAULT_ARTIFACT_TYPES + if includes and includes.strip() != "": + include_types = includes.split(",") + for f in files: + if any(f.endswith(filetype) for filetype in include_types): + _do_validation(root_url, f, work_dir, results) + except Exception as e: + logger.error("Error happened during checking path %s: %s", path, e) + if recursive: + for folder in sub_folders: + _collect_invalid_files(root_url, folder, includes, work_dir, recursive, skips, results) + + +def _do_validation( + root_url: str, file: str, work_dir: str, + results: Tuple[List[str], List[str], List[Dict[str, str]]] +): + mismatch_files = results[0] + missing_checksum_files = results[1] + error_files = results[2] + item_path = file + checksum_file_url = os.path.join(root_url, item_path + ".sha1") + checksum = None + if not _remote_file_exists(checksum_file_url): + logger.info("Missing checksum file for file %s", item_path) + missing_checksum_files.append(item_path) + else: + local_path = os.path.join(work_dir, item_path) + try: + # At first we want to get checksum from s3 metadata for files, but found it + # does not match with the file itself after checking. So here we download + # the file itself and do digesting directly + _download_file(root_url, item_path, work_dir) + checksum = digest(local_path) + except Exception as e: + logger.error("Validation failed for file %s: %s", item_path, e) + error_files.append({"path": item_path, "error": str(e)}) + finally: + if os.path.exists(local_path): + os.remove(local_path) + if checksum and checksum.strip() != "": + remote_checksum = _read_remote_file_content(checksum_file_url) + if remote_checksum is None: + logger.info("Missing checksum file for file %s", item_path) + missing_checksum_files.append(item_path) + elif checksum.strip().lower() != remote_checksum.strip().lower(): + logger.info("""Found mismatched file %s, file checksum %s, + remote checksum: %s""", item_path, checksum, remote_checksum) + mismatch_files.append(item_path) + + +def _gen_report( + report_file_path: str, + content: Tuple[List[str], List[str], List[Dict[str, str]]] +): + """Generate a report file.""" + work_dir = report_file_path + if work_dir and work_dir.strip() != "": + if not os.path.isdir(work_dir): + tmp_dir = tempfile.gettempdir() + work_dir = os.path.join(tmp_dir, work_dir) + if not os.path.isdir(work_dir): + os.makedirs(work_dir) + logger.debug("Created %s as report file directory.", work_dir) + else: + work_dir = tempfile.mkdtemp() + logger.debug("""The report file path is empty. + Created temp dir %s as report file path.""", work_dir) + + def _check_and_remove_file(file_name: str): + if os.path.isfile(file_name): + os.remove(file_name) + + def _write_one_col_file(items: List[str], file_name: str): + if items and len(items) > 0: + _check_and_remove_file(file_name) + with open(file_name, "w") as f: + for i in items: + f.write(i + "\n") + logger.info("The report file %s is generated.", file_name) + + _write_one_col_file(content[0], os.path.join(work_dir, "mismatched_files.csv")) + _write_one_col_file(content[1], os.path.join(work_dir, "missing_checksum_files.csv")) + + if content[2] and len(content[2]) > 0: + error_file = os.path.join(work_dir, "error_files.csv") + _check_and_remove_file(error_file) + with open(error_file, "w") as f: + f.write("path,error\n") + for d in content[2]: + f.write("{path},{error}\n".format(path=d["path"], error=d["error"])) + logger.info("The report file %s is generated.", error_file) + + +def _remote_file_exists(file_url: str) -> bool: + with requests.head(file_url) as r: + if r.status_code == 200: + return True + return False + + +def _download_file(root_url: str, file_path: str, work_dir: str): + file_url = os.path.join(root_url, file_path) + logger.debug("Start downloading file %s", file_url) + local_filename = os.path.join(work_dir, file_path) + local_dir = os.path.dirname(local_filename) + if not os.path.exists(local_dir): + logger.debug("Creating dir %s", local_dir) + os.makedirs(local_dir) + # NOTE the stream=True parameter below + try: + with requests.get(file_url, stream=True) as r: + if r.status_code == 200: + with open(local_filename, 'wb') as f: + # shutil.copyfileobj(r.raw, f) + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + logger.debug("Downloaded file %s to %s", file_path, local_filename) + except Exception as e: + logger.error("Download file %s failed: %s", file_path, e) + raise e + return local_filename + + +def _list_folder_content(folder_url: str, folder_path: str) -> List[str]: + try: + with requests.get(folder_url) as r: + if r.status_code == 200: + contentType = r.headers.get('Content-Type') + if contentType and "text/html" in contentType: + pageContent = r.text + return _parseContent(pageContent, folder_path) + else: + logger.warning("%s is not a folder!", folder_url) + except Exception as e: + logger.error("Can not list folder %s. The error is %s", folder_url, e) + return [] + + +def _parseContent(pageContent: str, parent: str) -> List[str]: + items = [] + soup = BeautifulSoup(pageContent, "html.parser") + contents = soup.find("ul", id="contents").find_all("a") + for c in contents: + item = c["href"] + if not item or item.strip() == '../': + continue + items.append(os.path.join(parent, item)) + return items + + +def _read_remote_file_content(remote_file_url: str) -> str: + try: + with requests.get(remote_file_url) as r: + if r.status_code == 200: + return r.text.strip() if r.text else "" + except Exception as e: + logger.error("Can not read file %s. The error is %s", remote_file_url, e) + return None + + +def _decide_root_url(bucket: str) -> str: + if bucket.strip().startswith("prod-maven"): + return "https://maven.repository.redhat.com" + if bucket.strip().startswith("stage-maven"): + return "https://maven.stage.repository.redhat.com" + return None diff --git a/requirements.txt b/requirements.txt index 10c75966..783d24e7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,6 @@ PyYAML==6.0.1 defusedxml==0.7.1 subresource-integrity==0.2 jsonschema==4.19.0 +beautifulsoup4==4.11.1 +requests==2.31.0 +urllib3==1.26.15 \ No newline at end of file From 33592deda43df779766157f33b5d1624ce9691d9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 28 Feb 2024 09:39:05 +0000 Subject: [PATCH 09/31] Chore(deps): Bump urllib3 from 1.26.15 to 1.26.18 Bumps [urllib3](https://github.com/urllib3/urllib3) from 1.26.15 to 1.26.18. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/1.26.15...1.26.18) --- updated-dependencies: - dependency-name: urllib3 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 783d24e7..6ae31725 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,4 @@ subresource-integrity==0.2 jsonschema==4.19.0 beautifulsoup4==4.11.1 requests==2.31.0 -urllib3==1.26.15 \ No newline at end of file +urllib3==1.26.18 \ No newline at end of file From 54cf5bda519243579d50f5008ad4d647c49cd915 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Wed, 28 Feb 2024 20:05:14 +0800 Subject: [PATCH 10/31] Fix a bug for re-index Found that if prefix is "/" in target will cause some missing in path for re-index. Fixed here --- charon/pkgs/indexing.py | 13 +++++++------ charon/storage.py | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/charon/pkgs/indexing.py b/charon/pkgs/indexing.py index 2c840b47..0ab43057 100644 --- a/charon/pkgs/indexing.py +++ b/charon/pkgs/indexing.py @@ -269,7 +269,8 @@ def re_index( """Refresh the index.html for the specified folder in the bucket. """ s3_client = S3Client(aws_profile=aws_profile, dry_run=dry_run) - s3_folder = os.path.join(prefix, path) + real_prefix = prefix if prefix.strip() != "/" else "" + s3_folder = os.path.join(real_prefix, path) if path.strip() == "" or path.strip() == "/": s3_folder = prefix items: List[str] = s3_client.list_folder_content(bucket, s3_folder) @@ -285,11 +286,11 @@ def re_index( if len(contents) >= 1: real_contents = [] - if prefix and prefix.strip() != "": + if real_prefix and real_prefix.strip() != "": for c in contents: if c.strip() != "": - if c.startswith(prefix): - real_c = remove_prefix(c, prefix) + if c.startswith(real_prefix): + real_c = remove_prefix(c, real_prefix) real_c = remove_prefix(real_c, "/") real_contents.append(real_c) else: @@ -302,9 +303,9 @@ def re_index( index_path = os.path.join(path, "index.html") if path == "/": index_path = "index.html" - s3_client.simple_delete_file(index_path, (bucket, prefix)) + s3_client.simple_delete_file(index_path, (bucket, real_prefix)) s3_client.simple_upload_file( - index_path, index_content, (bucket, prefix), + index_path, index_content, (bucket, real_prefix), "text/html", digest_content(index_content) ) else: diff --git a/charon/storage.py b/charon/storage.py index f07b1c50..68b73a32 100644 --- a/charon/storage.py +++ b/charon/storage.py @@ -713,7 +713,7 @@ def simple_upload_file( Metadata=f_meta, ContentType=content_type ) - logger.debug('Uploaded %s to bucket %s', file_path, bucket) + logger.debug('Uploaded %s to bucket %s', path_key, bucket) except (ClientError, HTTPClientError) as e: logger.error( "ERROR: file %s not uploaded to bucket %s due to error: %s ", From b58e6855acc406332d1b5539a7e78a314c13aa03 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Thu, 29 Feb 2024 09:27:50 +0800 Subject: [PATCH 11/31] Some chore fix * Add new requires in spec file * Remove duplicated requirements * Little pydoc fix --- charon.spec | 2 +- charon/storage.py | 2 +- requirements.txt | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/charon.spec b/charon.spec index dfe39b55..d365774a 100644 --- a/charon.spec +++ b/charon.spec @@ -50,7 +50,7 @@ Requires: python%{python3_pkgversion}-importlib-metadata Requires: python%{python3_pkgversion}-zipp Requires: python%{python3_pkgversion}-attrs Requires: python%{python3_pkgversion}-pyrsistent - +Requires: python%{python3_pkgversion}-beautifulsoup4 %description Simple Python tool with command line interface for charon init, diff --git a/charon/storage.py b/charon/storage.py index 68b73a32..6c2fcfde 100644 --- a/charon/storage.py +++ b/charon/storage.py @@ -676,7 +676,7 @@ def simple_upload_file( ): """ Uploads file to s3 bucket, regardless of any extra information like product and version info. - * Warning: this will directly delete the files even if + * Warning: this will directly overwrite the files even if it has lots of product info, so please be careful to use. If you want to upload product artifact files, please use upload_files diff --git a/requirements.txt b/requirements.txt index 6ae31725..e2c3b87e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,5 +9,4 @@ defusedxml==0.7.1 subresource-integrity==0.2 jsonschema==4.19.0 beautifulsoup4==4.11.1 -requests==2.31.0 urllib3==1.26.18 \ No newline at end of file From 41d2b3277806d5a014537e7209eb3b198aae368e Mon Sep 17 00:00:00 2001 From: Gang Li Date: Wed, 20 Mar 2024 15:12:36 +0800 Subject: [PATCH 12/31] Use HTMLParser instead bs4 in checksum validation --- charon.spec | 1 - charon/pkgs/checksum_http.py | 30 ++++++++++++++++++------------ requirements.txt | 1 - 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/charon.spec b/charon.spec index d365774a..ffb77655 100644 --- a/charon.spec +++ b/charon.spec @@ -50,7 +50,6 @@ Requires: python%{python3_pkgversion}-importlib-metadata Requires: python%{python3_pkgversion}-zipp Requires: python%{python3_pkgversion}-attrs Requires: python%{python3_pkgversion}-pyrsistent -Requires: python%{python3_pkgversion}-beautifulsoup4 %description Simple Python tool with command line interface for charon init, diff --git a/charon/pkgs/checksum_http.py b/charon/pkgs/checksum_http.py index a3099f37..515bf5a3 100644 --- a/charon/pkgs/checksum_http.py +++ b/charon/pkgs/checksum_http.py @@ -15,7 +15,7 @@ """ from charon.utils.files import digest from typing import Tuple, List, Dict -from bs4 import BeautifulSoup +from html.parser import HTMLParser import tempfile import os import logging @@ -224,7 +224,9 @@ def _list_folder_content(folder_url: str, folder_path: str) -> List[str]: contentType = r.headers.get('Content-Type') if contentType and "text/html" in contentType: pageContent = r.text - return _parseContent(pageContent, folder_path) + p = _IndexParser() + p.feed(pageContent) + return p.get_content(folder_path) else: logger.warning("%s is not a folder!", folder_url) except Exception as e: @@ -232,16 +234,20 @@ def _list_folder_content(folder_url: str, folder_path: str) -> List[str]: return [] -def _parseContent(pageContent: str, parent: str) -> List[str]: - items = [] - soup = BeautifulSoup(pageContent, "html.parser") - contents = soup.find("ul", id="contents").find_all("a") - for c in contents: - item = c["href"] - if not item or item.strip() == '../': - continue - items.append(os.path.join(parent, item)) - return items +class _IndexParser(HTMLParser): + def __init__(self): + super().__init__() + self.reset() + self.__content = [] + + def handle_starttag(self, tag, attrs): + if tag == "a": + for name, link in attrs: + if name == "href" and link.strip() not in ['../', '']: + self.__content.append(link) + + def get_content(self, parent): + return [os.path.join(parent, i) for i in self.__content] def _read_remote_file_content(remote_file_url: str) -> str: diff --git a/requirements.txt b/requirements.txt index e2c3b87e..cc669871 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,5 +8,4 @@ PyYAML==6.0.1 defusedxml==0.7.1 subresource-integrity==0.2 jsonschema==4.19.0 -beautifulsoup4==4.11.1 urllib3==1.26.18 \ No newline at end of file From 0fbf7eab1670f4c69d8f747bf07e7e10ee550e99 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Wed, 20 Mar 2024 18:05:39 +0800 Subject: [PATCH 13/31] Mark sample files The files in config/ and template/ are sample files, so changed them to .sample and add README to show how to use. This will tell us they are not source code files. --- config/README | 6 ++++++ config/{aws-credentials => aws-credentials.sample} | 0 config/{charon.yaml => charon.yaml.sample} | 0 template/README | 3 +++ ...etype-catalog.xml.j2 => archetype-catalog.xml.j2.sample} | 0 template/{index.html.j2 => index.html.j2.sample} | 0 .../{maven-metadata.xml.j2 => maven-metadata.xml.j2.sample} | 0 7 files changed, 9 insertions(+) create mode 100644 config/README rename config/{aws-credentials => aws-credentials.sample} (100%) rename config/{charon.yaml => charon.yaml.sample} (100%) create mode 100644 template/README rename template/{archetype-catalog.xml.j2 => archetype-catalog.xml.j2.sample} (100%) rename template/{index.html.j2 => index.html.j2.sample} (100%) rename template/{maven-metadata.xml.j2 => maven-metadata.xml.j2.sample} (100%) diff --git a/config/README b/config/README new file mode 100644 index 00000000..239129e1 --- /dev/null +++ b/config/README @@ -0,0 +1,6 @@ +# Sample config files + +There are two sample config files here: + +* [aws-credentials.sample](./aws-credentials.sample): sample aws credentials file. It is same with aws credentials config file in official +* [charon.yaml.sample](./charon.yaml.sample): sample charon config file. You can put it into $HOME/.charon/charon.yaml and change the content by your requirements. diff --git a/config/aws-credentials b/config/aws-credentials.sample similarity index 100% rename from config/aws-credentials rename to config/aws-credentials.sample diff --git a/config/charon.yaml b/config/charon.yaml.sample similarity index 100% rename from config/charon.yaml rename to config/charon.yaml.sample diff --git a/template/README b/template/README new file mode 100644 index 00000000..46e47f9a --- /dev/null +++ b/template/README @@ -0,0 +1,3 @@ +# Sample template files to generate metadata + +These template files are used to generate related metadata accordingly. You can change based on the sample templates and then put them into $HOME/.charon/template/ to replace the default templates which are defined in charon sources. diff --git a/template/archetype-catalog.xml.j2 b/template/archetype-catalog.xml.j2.sample similarity index 100% rename from template/archetype-catalog.xml.j2 rename to template/archetype-catalog.xml.j2.sample diff --git a/template/index.html.j2 b/template/index.html.j2.sample similarity index 100% rename from template/index.html.j2 rename to template/index.html.j2.sample diff --git a/template/maven-metadata.xml.j2 b/template/maven-metadata.xml.j2.sample similarity index 100% rename from template/maven-metadata.xml.j2 rename to template/maven-metadata.xml.j2.sample From 2468c2065adbd6bad692f8e4735dd53dbe5027d9 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Thu, 14 Mar 2024 15:31:27 +0800 Subject: [PATCH 14/31] Add support for CloudFront invalidating --- charon.spec | 7 ++ charon/cache.py | 134 ++++++++++++++++++++++++++ charon/cmd/cmd_delete.py | 2 + charon/cmd/cmd_index.py | 45 ++++----- charon/cmd/cmd_upload.py | 2 + charon/cmd/internal.py | 3 +- charon/config.py | 4 + charon/pkgs/indexing.py | 21 ++-- charon/pkgs/maven.py | 46 ++++++++- charon/pkgs/npm.py | 43 ++++++++- charon/pkgs/pkg_utils.py | 61 +++++++++--- setup.py | 2 +- tests/base.py | 55 ++++++++--- tests/constants.py | 30 ++++++ tests/requirements.txt | 4 +- tests/test_cf_maven_ops.py | 73 ++++++++++++++ tests/test_cf_npm_ops.py | 71 ++++++++++++++ tests/test_cf_reindex.py | 77 +++++++++++++++ tests/test_cfclient.py | 65 +++++++++++++ tests/test_manifest_del.py | 4 +- tests/test_manifest_upload.py | 4 +- tests/test_maven_del.py | 4 +- tests/test_maven_del_multi_tgts.py | 4 +- tests/test_maven_index.py | 10 +- tests/test_maven_index_multi_tgts.py | 4 +- tests/test_maven_sign.py | 4 +- tests/test_maven_upload.py | 4 +- tests/test_maven_upload_multi_tgts.py | 4 +- tests/test_npm_del.py | 4 +- tests/test_npm_del_multi_tgts.py | 4 +- tests/test_npm_dist_gen.py | 4 +- tests/test_npm_index.py | 22 ++++- tests/test_npm_index_multi_tgts.py | 4 +- tests/test_npm_meta.py | 4 +- tests/test_npm_upload.py | 4 +- tests/test_npm_upload_multi_tgts.py | 4 +- tests/test_pkgs_dryrun.py | 4 +- tests/test_s3client.py | 4 +- 38 files changed, 737 insertions(+), 108 deletions(-) create mode 100644 charon/cache.py create mode 100644 tests/test_cf_maven_ops.py create mode 100644 tests/test_cf_npm_ops.py create mode 100644 tests/test_cf_reindex.py create mode 100644 tests/test_cfclient.py diff --git a/charon.spec b/charon.spec index ffb77655..249a5527 100644 --- a/charon.spec +++ b/charon.spec @@ -80,6 +80,13 @@ export LANG=en_US.UTF-8 LANGUAGE=en_US.en LC_ALL=en_US.UTF-8 %changelog +* Mon Mar 25 2024 Gang Li +- 1.3.0 release +- Add validate command: validate the checksum for maven artifacts +- Add index command: support to re-index of the speicified folder +- Add CF invalidating feature: invalidate generated metadata files (maven-metadata*/package.json/index.html) after product uploading/deleting in CloudFront +- Add CF invalidating feature: add command to do CF invalidating and checking + * Mon Sep 18 2023 Harsh Modi - 1.2.2 release - hot fix for "dist_tags" derived issue diff --git a/charon/cache.py b/charon/cache.py new file mode 100644 index 00000000..b289f8ca --- /dev/null +++ b/charon/cache.py @@ -0,0 +1,134 @@ +from boto3 import session +from botocore.exceptions import ClientError +from typing import Dict, List +import os +import logging +import uuid + +logger = logging.getLogger(__name__) + +ENDPOINT_ENV = "aws_endpoint_url" + +DEFAULT_BUCKET_TO_DOMAIN = { + "prod-maven-ga": "maven.repository.redhat.com", + "prod-maven-ea": "maven.repository.redhat.com", + "stage-maven-ga": "maven.strage.repository.redhat.com", + "stage-maven-ea": "maven.strage.repository.redhat.com", + "prod-npm": "npm.repository.redhat.com", + "stage-npm": "npm.stage.repository.redhat.com" +} + + +class CFClient(object): + """The CFClient is a wrapper of the original boto3 clouldfrong client, + which will provide CloudFront functions to be used in the charon. + """ + + def __init__( + self, + aws_profile=None, + extra_conf=None + ) -> None: + self.__client = self.__init_aws_client(aws_profile, extra_conf) + + def __init_aws_client( + self, aws_profile=None, extra_conf=None + ): + if aws_profile: + logger.debug("Using aws profile: %s", aws_profile) + cf_session = session.Session(profile_name=aws_profile) + else: + cf_session = session.Session() + endpoint_url = self.__get_endpoint(extra_conf) + return cf_session.client( + 'cloudfront', + endpoint_url=endpoint_url + ) + + def __get_endpoint(self, extra_conf) -> str: + endpoint_url = os.getenv(ENDPOINT_ENV) + if not endpoint_url or not endpoint_url.strip(): + if isinstance(extra_conf, Dict): + endpoint_url = extra_conf.get(ENDPOINT_ENV, None) + if endpoint_url: + logger.info("Using endpoint url for aws client: %s", endpoint_url) + else: + logger.debug("No user-specified endpoint url is used.") + return endpoint_url + + def invalidate_paths(self, distr_id: str, paths: List[str]) -> Dict[str, str]: + """Send a invalidating requests for the paths in distribution to CloudFront. + This will invalidate the paths in the distribution to enforce the refreshment + from backend S3 bucket for these paths. For details see: + https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/Invalidation.html + * The distr_id is the id for the distribution. This id can be get through + get_dist_id_by_domain(domain) function + * Can specify the invalidating paths through paths param. + """ + caller_ref = str(uuid.uuid4()) + logger.debug("[CloudFront] Creating invalidation for paths: %s", paths) + try: + response = self.__client.create_invalidation( + DistributionId=distr_id, + InvalidationBatch={ + 'CallerReference': caller_ref, + 'Paths': { + 'Quantity': len(paths), + 'Items': paths + } + } + ) + if response: + invalidation = response.get('Invalidation', {}) + return { + 'Id': invalidation.get('Id', None), + 'Status': invalidation.get('Status', None) + } + except Exception as err: + logger.error( + "[CloudFront] Error occurred while creating invalidation, error: %s", err + ) + + def check_invalidation(self, distr_id: str, invalidation_id: str) -> dict: + try: + response = self.__client.get_invalidation( + DistributionId=distr_id, + Id=invalidation_id + ) + if response: + invalidation = response.get('Invalidation', {}) + return { + 'Id': invalidation.get('Id', None), + 'CreateTime': invalidation.get('CreateTime', None), + 'Status': invalidation.get('Status', None) + } + except Exception as err: + logger.error( + "[CloudFront] Error occurred while check invalidation of id %s, " + "error: %s", invalidation_id, err + ) + + def get_dist_id_by_domain(self, domain: str) -> str: + """Get distribution id by a domain name. The id can be used to send invalidating + request through #invalidate_paths function + * Domain are Ronda domains, like "maven.repository.redhat.com" + or "npm.repository.redhat.com" + """ + try: + response = self.__client.list_distributions() + if response: + dist_list_items = response.get("DistributionList", {}).get("Items", []) + for distr in dist_list_items: + aliases_items = distr.get('Aliases', {}).get('Items', []) + if aliases_items and domain in aliases_items: + return distr['Id'] + logger.error("[CloudFront]: Distribution not found for domain %s", domain) + except ClientError as err: + logger.error( + "[CloudFront]: Error occurred while get distribution for domain %s: %s", + domain, err + ) + return None + + def get_domain_by_bucket(self, bucket: str) -> str: + return DEFAULT_BUCKET_TO_DOMAIN.get(bucket, None) diff --git a/charon/cmd/cmd_delete.py b/charon/cmd/cmd_delete.py index fd2bba5f..dda57d2e 100644 --- a/charon/cmd/cmd_delete.py +++ b/charon/cmd/cmd_delete.py @@ -158,6 +158,7 @@ def delete( buckets=buckets, aws_profile=aws_profile, dir_=work_dir, + cf_enable=conf.is_aws_cf_enable(), dry_run=dryrun, manifest_bucket_name=manifest_bucket_name ) @@ -178,6 +179,7 @@ def delete( buckets=buckets, aws_profile=aws_profile, dir_=work_dir, + cf_enable=conf.is_aws_cf_enable(), dry_run=dryrun, manifest_bucket_name=manifest_bucket_name ) diff --git a/charon/cmd/cmd_index.py b/charon/cmd/cmd_index.py index 281ed876..e9a3e18c 100644 --- a/charon/cmd/cmd_index.py +++ b/charon/cmd/cmd_index.py @@ -87,34 +87,31 @@ def index( # log is recorded get_target sys.exit(1) - aws_bucket = None - prefix = None - for b in conf.get_target(target): + for b in tgt: aws_bucket = b.get('bucket') - prefix = b.get('prefix', '') - package_type = None - if "maven" in aws_bucket: - logger.info( - "The target is a maven repository. Will refresh the index as maven package type" - ) - package_type = PACKAGE_TYPE_MAVEN - elif "npm" in aws_bucket: - package_type = PACKAGE_TYPE_NPM - logger.info( - "The target is a npm repository. Will refresh the index as npm package type" - ) - else: - logger.error( - "The target is not supported. Only maven or npm target is supported." - ) - sys.exit(1) + package_type = None + if "maven" in aws_bucket: + logger.info( + "The target is a maven repository. Will refresh the index as maven package type" + ) + package_type = PACKAGE_TYPE_MAVEN + elif "npm" in aws_bucket: + package_type = PACKAGE_TYPE_NPM + logger.info( + "The target is a npm repository. Will refresh the index as npm package type" + ) + else: + logger.error( + "The target %s is not supported. Only maven or npm target is supported.", + target + ) - if not aws_bucket: - logger.error("No bucket specified!") - sys.exit(1) + if not aws_bucket: + logger.error("No bucket specified for target %s!", target) + else: + re_index(b, path, package_type, aws_profile, dryrun) - re_index(aws_bucket, prefix, path, package_type, aws_profile, dryrun) except Exception: print(traceback.format_exc()) sys.exit(2) # distinguish between exception and bad config or bad state diff --git a/charon/cmd/cmd_upload.py b/charon/cmd/cmd_upload.py index 71c30295..2fe19901 100644 --- a/charon/cmd/cmd_upload.py +++ b/charon/cmd/cmd_upload.py @@ -178,6 +178,7 @@ def upload( aws_profile=aws_profile, dir_=work_dir, gen_sign=contain_signature, + cf_enable=conf.is_aws_cf_enable(), key=sign_key, dry_run=dryrun, manifest_bucket_name=manifest_bucket_name @@ -200,6 +201,7 @@ def upload( aws_profile=aws_profile, dir_=work_dir, gen_sign=contain_signature, + cf_enable=conf.is_aws_cf_enable(), key=sign_key, dry_run=dryrun, manifest_bucket_name=manifest_bucket_name diff --git a/charon/cmd/internal.py b/charon/cmd/internal.py index cb76559a..edc87c05 100644 --- a/charon/cmd/internal.py +++ b/charon/cmd/internal.py @@ -35,7 +35,8 @@ def _get_buckets(targets: List[str], conf: CharonConfig) -> List[Tuple[str, str, aws_bucket = bucket.get('bucket') prefix = bucket.get('prefix', '') registry = bucket.get('registry', DEFAULT_REGISTRY) - buckets.append((target, aws_bucket, prefix, registry)) + cf_domain = bucket.get('domain', None) + buckets.append((target, aws_bucket, prefix, registry, cf_domain)) return buckets diff --git a/charon/config.py b/charon/config.py index 8f128617..f9b6403c 100644 --- a/charon/config.py +++ b/charon/config.py @@ -38,6 +38,7 @@ def __init__(self, data: Dict): self.__manifest_bucket: str = data.get("manifest_bucket", None) self.__ignore_signature_suffix: Dict = data.get("ignore_signature_suffix", None) self.__signature_command: str = data.get("detach_signature_command", None) + self.__aws_cf_enable: bool = data.get("aws_cf_enable", False) def get_ignore_patterns(self) -> List[str]: return self.__ignore_patterns @@ -63,6 +64,9 @@ def get_ignore_signature_suffix(self, package_type: str) -> List[str]: def get_detach_signature_command(self) -> str: return self.__signature_command + def is_aws_cf_enable(self) -> bool: + return self.__aws_cf_enable + def get_config() -> Optional[CharonConfig]: config_file_path = os.path.join(os.getenv("HOME"), ".charon", CONFIG_FILE) diff --git a/charon/pkgs/indexing.py b/charon/pkgs/indexing.py index 0ab43057..bd9192ab 100644 --- a/charon/pkgs/indexing.py +++ b/charon/pkgs/indexing.py @@ -15,13 +15,15 @@ """ from charon.config import get_template from charon.storage import S3Client +from charon.cache import CFClient +from charon.pkgs.pkg_utils import invalidate_cf_paths from charon.constants import (INDEX_HTML_TEMPLATE, NPM_INDEX_HTML_TEMPLATE, PACKAGE_TYPE_MAVEN, PACKAGE_TYPE_NPM, PROD_INFO_SUFFIX) from charon.utils.files import digest_content from jinja2 import Template import os import logging -from typing import List, Set +from typing import List, Set, Tuple from charon.utils.strings import remove_prefix @@ -259,21 +261,23 @@ def __compare(self, other) -> int: def re_index( - bucket: str, - prefix: str, + bucket: Tuple[str, str, str, str, str], path: str, package_type: str, aws_profile: str = None, + cf_enable: bool = False, dry_run: bool = False ): """Refresh the index.html for the specified folder in the bucket. """ + bucket_name = bucket[1] + prefix = bucket[2] s3_client = S3Client(aws_profile=aws_profile, dry_run=dry_run) real_prefix = prefix if prefix.strip() != "/" else "" s3_folder = os.path.join(real_prefix, path) if path.strip() == "" or path.strip() == "/": s3_folder = prefix - items: List[str] = s3_client.list_folder_content(bucket, s3_folder) + items: List[str] = s3_client.list_folder_content(bucket_name, s3_folder) contents = [i for i in items if not i.endswith(PROD_INFO_SUFFIX)] if PACKAGE_TYPE_NPM == package_type: if any([True if "package.json" in c else False for c in contents]): @@ -303,14 +307,17 @@ def re_index( index_path = os.path.join(path, "index.html") if path == "/": index_path = "index.html" - s3_client.simple_delete_file(index_path, (bucket, real_prefix)) + s3_client.simple_delete_file(index_path, (bucket_name, real_prefix)) s3_client.simple_upload_file( - index_path, index_content, (bucket, real_prefix), + index_path, index_content, (bucket_name, real_prefix), "text/html", digest_content(index_content) ) + if cf_enable: + cf_client = CFClient(aws_profile=aws_profile) + invalidate_cf_paths(cf_client, bucket, [index_path]) else: logger.warning( "The path %s does not contain any contents in bucket %s. " "Will not do any re-indexing", - path, bucket + path, bucket_name ) diff --git a/charon/pkgs/maven.py b/charon/pkgs/maven.py index 9fd57422..257bb2b9 100644 --- a/charon/pkgs/maven.py +++ b/charon/pkgs/maven.py @@ -20,7 +20,12 @@ from charon.utils.archive import extract_zip_all from charon.utils.strings import remove_prefix from charon.storage import S3Client -from charon.pkgs.pkg_utils import upload_post_process, rollback_post_process +from charon.cache import CFClient +from charon.pkgs.pkg_utils import ( + upload_post_process, + rollback_post_process, + invalidate_cf_paths +) from charon.config import CharonConfig, get_template, get_config from charon.constants import (META_FILE_GEN_KEY, META_FILE_DEL_KEY, META_FILE_FAILED, MAVEN_METADATA_TEMPLATE, @@ -257,11 +262,12 @@ def handle_maven_uploading( prod_key: str, ignore_patterns=None, root="maven-repository", - buckets: List[Tuple[str, str, str, str]] = None, + buckets: List[Tuple[str, str, str, str, str]] = None, aws_profile=None, dir_=None, do_index=True, gen_sign=False, + cf_enable=False, key=None, dry_run=False, manifest_bucket_name=None @@ -322,6 +328,9 @@ def handle_maven_uploading( succeeded = True generated_signs = [] for bucket in buckets: + # prepare cf invalidate files + cf_invalidate_paths = [] + # 5. Do manifest uploading if not manifest_bucket_name: logger.warning( @@ -360,6 +369,9 @@ def handle_maven_uploading( ) failed_metas.extend(_failed_metas) logger.info("maven-metadata.xml updating done in bucket %s\n", bucket_name) + # Add maven-metadata.xml to CF invalidate paths + if cf_enable: + cf_invalidate_paths.extend(meta_files.get(META_FILE_GEN_KEY, [])) # 8. Determine refreshment of archetype-catalog.xml if os.path.exists(os.path.join(top_level, "archetype-catalog.xml")): @@ -386,6 +398,9 @@ def handle_maven_uploading( ) failed_metas.extend(_failed_metas) logger.info("archetype-catalog.xml updating done in bucket %s\n", bucket_name) + # Add archtype-catalog to invalidate paths + if cf_enable: + cf_invalidate_paths.extend(archetype_files) # 10. Generate signature file if contain_signature is set to True if gen_sign: @@ -436,9 +451,17 @@ def handle_maven_uploading( ) failed_metas.extend(_failed_metas) logger.info("Index files updating done\n") + # Add index files to Cf invalidate paths + if cf_enable: + cf_invalidate_paths.extend(created_indexes) else: logger.info("Bypass indexing") + # Finally do the CF invalidating for metadata files + if cf_enable and len(cf_invalidate_paths) > 0: + cf_client = CFClient(aws_profile=aws_profile) + invalidate_cf_paths(cf_client, bucket, cf_invalidate_paths, top_level) + upload_post_process(failed_files, failed_metas, prod_key, bucket_name) succeeded = succeeded and len(failed_files) <= 0 and len(failed_metas) <= 0 @@ -450,10 +473,11 @@ def handle_maven_del( prod_key: str, ignore_patterns=None, root="maven-repository", - buckets: List[Tuple[str, str, str, str]] = None, + buckets: List[Tuple[str, str, str, str, str]] = None, aws_profile=None, dir_=None, do_index=True, + cf_enable=False, dry_run=False, manifest_bucket_name=None ) -> Tuple[str, bool]: @@ -487,6 +511,9 @@ def handle_maven_del( logger.debug("Valid poms: %s", valid_poms) succeeded = True for bucket in buckets: + # prepare cf invalidation paths + cf_invalidate_paths = [] + prefix = remove_prefix(bucket[2], "/") s3_client = S3Client(aws_profile=aws_profile, dry_run=dry_run) bucket_name = bucket[1] @@ -544,6 +571,11 @@ def handle_maven_del( if len(_failed_metas) > 0: failed_metas.extend(_failed_metas) logger.info("maven-metadata.xml updating done\n") + if cf_enable: + logger.debug( + "Extending invalidate_paths with %s:", all_meta_files + ) + cf_invalidate_paths.extend(all_meta_files) # 7. Determine refreshment of archetype-catalog.xml if os.path.exists(os.path.join(top_level, "archetype-catalog.xml")): @@ -578,6 +610,8 @@ def handle_maven_del( if len(_failed_metas) > 0: failed_metas.extend(_failed_metas) logger.info("archetype-catalog.xml updating done\n") + if cf_enable: + cf_invalidate_paths.extend(archetype_files) if do_index: logger.info("Start generating index files for all changed entries") @@ -596,9 +630,15 @@ def handle_maven_del( if len(_failed_index_files) > 0: failed_metas.extend(_failed_index_files) logger.info("Index files updating done.\n") + if cf_enable: + cf_invalidate_paths.extend(created_indexes) else: logger.info("Bypassing indexing") + if cf_enable and len(cf_invalidate_paths): + cf_client = CFClient(aws_profile=aws_profile) + invalidate_cf_paths(cf_client, bucket, cf_invalidate_paths, top_level) + rollback_post_process(failed_files, failed_metas, prod_key, bucket_name) succeeded = succeeded and len(failed_files) == 0 and len(failed_metas) == 0 diff --git a/charon/pkgs/npm.py b/charon/pkgs/npm.py index 684e8457..ad418ff1 100644 --- a/charon/pkgs/npm.py +++ b/charon/pkgs/npm.py @@ -28,8 +28,13 @@ from charon.config import CharonConfig, get_config from charon.constants import META_FILE_GEN_KEY, META_FILE_DEL_KEY, PACKAGE_TYPE_NPM from charon.storage import S3Client +from charon.cache import CFClient from charon.utils.archive import extract_npm_tarball -from charon.pkgs.pkg_utils import upload_post_process, rollback_post_process +from charon.pkgs.pkg_utils import ( + upload_post_process, + rollback_post_process, + invalidate_cf_paths +) from charon.utils.strings import remove_prefix from charon.utils.files import write_manifest from charon.utils.map import del_none, replace_field @@ -78,6 +83,7 @@ def handle_npm_uploading( dir_=None, do_index=True, gen_sign=False, + cf_enable=False, key=None, dry_run=False, manifest_bucket_name=None @@ -96,9 +102,13 @@ def handle_npm_uploading( Returns the directory used for archive processing and if uploading is successful """ + client = S3Client(aws_profile=aws_profile, dry_run=dry_run) generated_signs = [] for bucket in buckets: + # prepare cf invalidate files + cf_invalidate_paths = [] + bucket_name = bucket[1] prefix = remove_prefix(bucket[2], "/") registry = bucket[3] @@ -159,6 +169,13 @@ def handle_npm_uploading( client, bucket_name, target_dir, package_metadata, prefix ) logger.info("package.json generation done\n") + if cf_enable: + meta_f = meta_files.get(META_FILE_GEN_KEY, []) + logger.debug("Add invalidating metafiles: %s", meta_f) + if isinstance(meta_f, str): + cf_invalidate_paths.append(meta_f) + elif isinstance(meta_f, list): + cf_invalidate_paths.extend(meta_f) if META_FILE_GEN_KEY in meta_files: _failed_metas = client.upload_metadatas( @@ -218,9 +235,16 @@ def handle_npm_uploading( ) failed_metas.extend(_failed_metas) logger.info("Index files updating done\n") + if cf_enable: + cf_invalidate_paths.extend(created_indexes) else: logger.info("Bypass indexing\n") + # Do CloudFront invalidating for generated metadata + if cf_enable and len(cf_invalidate_paths): + cf_client = CFClient(aws_profile=aws_profile) + invalidate_cf_paths(cf_client, bucket, cf_invalidate_paths, target_dir) + upload_post_process(failed_files, failed_metas, product, bucket_name) succeeded = succeeded and len(failed_files) == 0 and len(failed_metas) == 0 @@ -234,6 +258,7 @@ def handle_npm_del( aws_profile=None, dir_=None, do_index=True, + cf_enable=False, dry_run=False, manifest_bucket_name=None ) -> Tuple[str, str]: @@ -258,6 +283,9 @@ def handle_npm_del( client = S3Client(aws_profile=aws_profile, dry_run=dry_run) succeeded = True for bucket in buckets: + # prepare cf invalidate files + cf_invalidate_paths = [] + bucket_name = bucket[1] prefix = remove_prefix(bucket[2], "/") logger.info("Start deleting files from s3 bucket %s", bucket_name) @@ -309,6 +337,9 @@ def handle_npm_del( ) failed_metas.extend(_failed_metas) logger.info("package.json uploading done") + if cf_enable and len(all_meta_files): + logger.debug("Add meta files to cf invalidate list: %s", all_meta_files) + cf_invalidate_paths.extend(all_meta_files) if do_index: logger.info( @@ -329,9 +360,17 @@ def handle_npm_del( ) failed_metas.extend(_failed_index_files) logger.info("Index files updating done.\n") + if cf_enable and len(created_indexes): + logger.debug("Add index files to cf invalidate list: %s", created_indexes) + cf_invalidate_paths.extend(created_indexes) else: logger.info("Bypassing indexing\n") + # Do CloudFront invalidating for generated metadata + if cf_enable and len(cf_invalidate_paths): + cf_client = CFClient(aws_profile=aws_profile) + invalidate_cf_paths(cf_client, bucket, cf_invalidate_paths, target_dir) + rollback_post_process(failed_files, failed_metas, product, bucket_name) succeeded = succeeded and len(failed_files) <= 0 and len(failed_metas) <= 0 @@ -476,7 +515,7 @@ def _scan_for_version(path: str): logger.error('Error: Failed to parse json!') -def _is_latest_version(source_version: str, versions: list()): +def _is_latest_version(source_version: str, versions: List[str]): for v in versions: if compare(source_version, v) <= 0: return False diff --git a/charon/pkgs/pkg_utils.py b/charon/pkgs/pkg_utils.py index 20ffc71b..a206f697 100644 --- a/charon/pkgs/pkg_utils.py +++ b/charon/pkgs/pkg_utils.py @@ -1,5 +1,7 @@ -from typing import List +from typing import List, Tuple +from charon.cache import CFClient import logging +import os logger = logging.getLogger(__name__) @@ -44,15 +46,52 @@ def __post_process( product_key, operation, bucket) else: total = len(failed_files) + len(failed_metas) - logger.error("%d file(s) occur errors/warnings in bucket %s, " - "please see errors.log for details.\n", - bucket, total) - logger.error("Product release %s is %s Ronda service in bucket %s, " - "but has some failures as below:", - product_key, operation, bucket) + logger.error( + "%d file(s) occur errors/warnings in bucket %s, " + "please see errors.log for details.\n", + bucket, total + ) + logger.error( + "Product release %s is %s Ronda service in bucket %s, " + "but has some failures as below:", + product_key, operation, bucket + ) if len(failed_files) > 0: - logger.error("Failed files: \n%s\n", - failed_files) + logger.error("Failed files: \n%s\n", failed_files) if len(failed_metas) > 0: - logger.error("Failed metadata files: \n%s\n", - failed_metas) + logger.error("Failed metadata files: \n%s\n", failed_metas) + + +def invalidate_cf_paths( + cf_client: CFClient, + bucket: Tuple[str, str, str, str, str], + invalidate_paths: List[str], + root="/" +): + logger.info("Invalidating CF cache for %s", bucket[1]) + bucket_name = bucket[1] + prefix = bucket[2] + prefix = "/" + prefix if not prefix.startswith("/") else prefix + domain = bucket[4] + slash_root = root + if not root.endswith("/"): + slash_root = slash_root + "/" + final_paths = [] + for full_path in invalidate_paths: + path = full_path + if path.startswith(slash_root): + path = path[len(slash_root):] + if prefix: + path = os.path.join(prefix, path) + final_paths.append(path) + logger.debug("Invalidating paths: %s", final_paths) + if not domain: + domain = cf_client.get_domain_by_bucket(bucket_name) + distr_id = cf_client.get_dist_id_by_domain(domain) + if distr_id: + result = cf_client.invalidate_paths(distr_id, final_paths) + if result: + logger.info( + "The CF invalidating request for metadata/indexing is sent, " + "request id %s, status is %s", result['Id'], result['Status'] + ) diff --git a/setup.py b/setup.py index ae737355..98d6be4d 100755 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ """ from setuptools import setup, find_packages -version = "1.2.2" +version = "1.3.0" # f = open('README.md') # long_description = f.read().strip() diff --git a/tests/base.py b/tests/base.py index 49cd2f1e..11ff9af3 100644 --- a/tests/base.py +++ b/tests/base.py @@ -25,9 +25,12 @@ from charon.pkgs.pkg_utils import is_metadata from charon.storage import PRODUCT_META_KEY, CHECKSUM_META_KEY from tests.commons import TEST_BUCKET, TEST_MANIFEST_BUCKET -from moto import mock_s3 +from tests.constants import HERE, TEST_DS_CONFIG +from moto import mock_aws +import logging -from tests.constants import HERE +logging.basicConfig(level=logging.INFO) +logging.getLogger("charon").setLevel(logging.DEBUG) SHORT_TEST_PREFIX = "ga" LONG_TEST_PREFIX = "earlyaccess/all" @@ -38,7 +41,21 @@ def setUp(self): self.change_home() config_base = self.get_config_base() self.__prepare_template(config_base) - default_config_content = """ + config_content = self.get_config_content() + self.prepare_config(config_base, config_content) + + def tearDown(self): + shutil.rmtree(self.tempdir, ignore_errors=True) + os.environ = self.old_environ + + def change_home(self): + self.old_environ = os.environ.copy() + self.tempdir = tempfile.mkdtemp(prefix='charon-test-') + # Configure environment and copy templates + os.environ['HOME'] = self.tempdir + + def get_config_content(self): + return """ ignore_patterns: - ".*^(redhat).*" - ".*snapshot.*" @@ -69,17 +86,6 @@ def setUp(self): aws_profile: "test" manifest_bucket: "manifest" """ - self.prepare_config(config_base, default_config_content) - - def tearDown(self): - shutil.rmtree(self.tempdir, ignore_errors=True) - os.environ = self.old_environ - - def change_home(self): - self.old_environ = os.environ.copy() - self.tempdir = tempfile.mkdtemp(prefix='charon-test-') - # Configure environment and copy templates - os.environ['HOME'] = self.tempdir def __prepare_template(self, config_base): template_path = os.path.join(config_base, 'template') @@ -101,7 +107,7 @@ def get_config_base(self) -> str: return os.path.join(self.get_temp_dir(), '.charon') -@mock_s3 +@mock_aws class PackageBaseTest(BaseTest): def setUp(self): super().setUp() @@ -158,3 +164,22 @@ def check_content(self, objs: List, products: List[str], msg=None): self.assertEqual(sha1_checksum, sha1_file_content, msg=msg) self.assertIn(CHECKSUM_META_KEY, file_obj.metadata, msg=msg) self.assertNotEqual("", file_obj.metadata[CHECKSUM_META_KEY].strip(), msg=msg) + + +@mock_aws +class CFBasedTest(PackageBaseTest): + def setUp(self): + super().setUp() + # mock_cf is used to generate expected content + self.mock_cf = self.__prepare_cf() + response = self.mock_cf.create_distribution(DistributionConfig=TEST_DS_CONFIG) + self.test_dist_id = response.get('Distribution').get('Id') + + def tearDown(self): + super().tearDown() + # The IfMatch-value is ignored - any value is considered valid. + # Calling this function without a value is invalid, per AWS’ behaviour + self.mock_cf.delete_distribution(Id=self.test_dist_id, IfMatch='..') + + def __prepare_cf(self): + return boto3.client('cloudfront') diff --git a/tests/constants.py b/tests/constants.py index 2e6d111f..0e202deb 100644 --- a/tests/constants.py +++ b/tests/constants.py @@ -17,3 +17,33 @@ HERE = os.path.dirname(__file__) INPUTS = os.path.join(HERE, 'input') + +TEST_DS_CONFIG = { + 'CallerReference': 'test', + "Aliases": { + "Quantity": 1, + "Items": [ + "maven.repository.redhat.com", + "npm.registry.redhat.com" + ] + }, + "Origins": { + "Quantity": 1, + "Items": [ + { + "Id": "prod-maven-ga", + "DomainName": "prod-maven-ga.s3.us-east-1.amazonaws.com", + "OriginPath": "", + "CustomHeaders": { + "Quantity": 0 + }, + } + ] + }, + "DefaultCacheBehavior": { + "TargetOriginId": "prod-maven-ga", + "ViewerProtocolPolicy": "allow-all", + }, + "Comment": "", + "Enabled": True + } diff --git a/tests/requirements.txt b/tests/requirements.txt index ff6f91ae..4acad34d 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,9 +1,9 @@ flexmock>=0.10.6 -responses>=0.9.0,<0.10.8 +responses>=0.9.0 pytest<=7.1.3 pytest-cov pytest-html flake8 requests-mock -moto==3.0.7 +moto==5.0.3 python-gnupg==0.5.0 diff --git a/tests/test_cf_maven_ops.py b/tests/test_cf_maven_ops.py new file mode 100644 index 00000000..46bb5780 --- /dev/null +++ b/tests/test_cf_maven_ops.py @@ -0,0 +1,73 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from charon.pkgs.maven import handle_maven_uploading, handle_maven_del +from tests.base import CFBasedTest +from tests.commons import TEST_BUCKET +from tests.constants import INPUTS +from moto import mock_aws +import os + + +@mock_aws +class CFInMavenOPSTest(CFBasedTest): + def test_cf_after_upload(self): + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertIsNotNone(response) + self.assertEqual(0, response.get('InvalidationList').get('Quantity')) + + test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") + product = "commons-client-4.5.6" + handle_maven_uploading( + test_zip, product, + buckets=[('', TEST_BUCKET, 'ga', '', 'maven.repository.redhat.com')], + dir_=self.tempdir, + do_index=True, + cf_enable=True + ) + + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertEqual(1, response.get('InvalidationList').get('Quantity')) + items = response.get('InvalidationList').get('Items') + self.assertEqual(1, len(items)) + self.assertEqual('completed', str.lower(items[0].get('Status'))) + + def test_cf_after_del(self): + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertIsNotNone(response) + self.assertEqual(0, response.get('InvalidationList').get('Quantity')) + + test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") + product_456 = "commons-client-4.5.6" + handle_maven_uploading( + test_zip, product_456, + buckets=[('', TEST_BUCKET, 'ga', '', 'maven.repository.redhat.com')], + dir_=self.tempdir, + do_index=True + ) + + product_456 = "commons-client-4.5.6" + handle_maven_del( + test_zip, product_456, + buckets=[('', TEST_BUCKET, 'ga', '', 'maven.repository.redhat.com')], + dir_=self.tempdir, do_index=True, + cf_enable=True + ) + + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertEqual(1, response.get('InvalidationList').get('Quantity')) + items = response.get('InvalidationList').get('Items') + self.assertEqual(1, len(items)) + self.assertEqual('completed', str.lower(items[0].get('Status'))) diff --git a/tests/test_cf_npm_ops.py b/tests/test_cf_npm_ops.py new file mode 100644 index 00000000..8b1c11b9 --- /dev/null +++ b/tests/test_cf_npm_ops.py @@ -0,0 +1,71 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from charon.pkgs.npm import handle_npm_uploading, handle_npm_del +from charon.constants import DEFAULT_REGISTRY +from tests.base import CFBasedTest +from tests.commons import TEST_BUCKET +from tests.constants import INPUTS +from moto import mock_aws +import os + + +@mock_aws +class CFInNPMOPSTest(CFBasedTest): + def test_cf_after_upload(self): + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertIsNotNone(response) + self.assertEqual(0, response.get('InvalidationList').get('Quantity')) + + test_tgz = os.path.join(INPUTS, "code-frame-7.14.5.tgz") + product_7_14_5 = "code-frame-7.14.5" + handle_npm_uploading( + test_tgz, product_7_14_5, + buckets=[('', TEST_BUCKET, "/", DEFAULT_REGISTRY, "npm.registry.redhat.com")], + dir_=self.tempdir, do_index=True, + cf_enable=True + ) + + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertEqual(1, response.get('InvalidationList').get('Quantity')) + items = response.get('InvalidationList').get('Items') + self.assertEqual(1, len(items)) + self.assertEqual('completed', str.lower(items[0].get('Status'))) + + def test_cf_after_del(self): + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertIsNotNone(response) + self.assertEqual(0, response.get('InvalidationList').get('Quantity')) + + test_tgz = os.path.join(INPUTS, "code-frame-7.14.5.tgz") + product_7_14_5 = "code-frame-7.14.5" + handle_npm_uploading( + test_tgz, product_7_14_5, + buckets=[('', TEST_BUCKET, '/', DEFAULT_REGISTRY, 'npm.registry.redhat.com')], + dir_=self.tempdir, do_index=True + ) + + handle_npm_del( + test_tgz, product_7_14_5, + buckets=[('', TEST_BUCKET, '/', DEFAULT_REGISTRY, 'npm.registry.redhat.com')], + dir_=self.tempdir, do_index=True, + cf_enable=True + ) + + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertEqual(1, response.get('InvalidationList').get('Quantity')) + items = response.get('InvalidationList').get('Items') + self.assertEqual(1, len(items)) + self.assertEqual('completed', str.lower(items[0].get('Status'))) diff --git a/tests/test_cf_reindex.py b/tests/test_cf_reindex.py new file mode 100644 index 00000000..c8fc400a --- /dev/null +++ b/tests/test_cf_reindex.py @@ -0,0 +1,77 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from charon.pkgs.maven import handle_maven_uploading +from charon.pkgs.npm import handle_npm_uploading +from charon.pkgs.indexing import re_index +from charon.constants import DEFAULT_REGISTRY +from tests.base import CFBasedTest +from tests.commons import TEST_BUCKET +from tests.constants import INPUTS +from moto import mock_aws +import os + + +@mock_aws +class CFReIndexTest(CFBasedTest): + def test_cf_maven_after_reindex(self): + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertIsNotNone(response) + self.assertEqual(0, response.get('InvalidationList').get('Quantity')) + + test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") + product_456 = "commons-client-4.5.6" + handle_maven_uploading( + test_zip, product_456, + buckets=[('', TEST_BUCKET, 'ga', '', 'maven.repository.redhat.com')], + dir_=self.tempdir + ) + + re_index( + (TEST_BUCKET, TEST_BUCKET, "ga", "", "maven.repository.redhat.com"), + "org/apache/httpcomponents/httpclient/", "maven", + cf_enable=True + ) + + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertEqual(1, response.get('InvalidationList').get('Quantity')) + items = response.get('InvalidationList').get('Items') + self.assertEqual(1, len(items)) + self.assertEqual('completed', str.lower(items[0].get('Status'))) + + def test_cf_npm_after_reindex(self): + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertIsNotNone(response) + self.assertEqual(0, response.get('InvalidationList').get('Quantity')) + + test_tgz = os.path.join(INPUTS, "code-frame-7.14.5.tgz") + product_7_14_5 = "code-frame-7.14.5" + handle_npm_uploading( + test_tgz, product_7_14_5, + buckets=[('', TEST_BUCKET, '/', DEFAULT_REGISTRY, 'npm.registry.redhat.com')], + dir_=self.tempdir, do_index=True + ) + + re_index( + (TEST_BUCKET, TEST_BUCKET, "", "", "npm.registry.redhat.com"), + "@babel/", "npm", + cf_enable=True + ) + + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertEqual(1, response.get('InvalidationList').get('Quantity')) + items = response.get('InvalidationList').get('Items') + self.assertEqual(1, len(items)) + self.assertEqual('completed', str.lower(items[0].get('Status'))) diff --git a/tests/test_cfclient.py b/tests/test_cfclient.py new file mode 100644 index 00000000..610c454b --- /dev/null +++ b/tests/test_cfclient.py @@ -0,0 +1,65 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from tests.base import BaseTest +from tests.constants import TEST_DS_CONFIG +from charon.cache import CFClient +from moto import mock_aws +import boto3 +import pytest + + +@mock_aws +class CFClientTest(BaseTest): + def setUp(self): + super().setUp() + # mock_cf is used to generate expected content + self.mock_cf = self.__prepare_cf() + response = self.mock_cf.create_distribution(DistributionConfig=TEST_DS_CONFIG) + self.test_dist_id = response.get('Distribution').get('Id') + # cf_client is the client we will test + self.cf_client = CFClient() + + def tearDown(self): + self.mock_cf.delete_distribution(Id=self.test_dist_id, IfMatch=".") + super().tearDown() + + def __prepare_cf(self): + return boto3.client('cloudfront') + + def test_get_distribution_id(self): + dist_id = self.cf_client.get_dist_id_by_domain("maven.repository.redhat.com") + self.assertIsNotNone(dist_id) + dist_id = self.cf_client.get_dist_id_by_domain("notexists.redhat.com") + self.assertIsNone(dist_id) + + def test_invalidate_paths(self): + dist_id = self.cf_client.get_dist_id_by_domain("maven.repository.redhat.com") + result = self.cf_client.invalidate_paths(dist_id, ["/*"]) + self.assertIsNotNone(result['Id']) + self.assertEqual('completed', str.lower(result['Status'])) + status = self.cf_client.invalidate_paths("noexists_id", ["/*"]) + self.assertIsNone(status) + + @pytest.mark.skip(reason=""" + Because current moto 5.0.3 has not implemented the get_invalidation(), + this test will fail. Will enable it when the it is implemented in future moto + """) + def test_check_invalidation(self): + dist_id = self.cf_client.get_dist_id_by_domain("maven.repository.redhat.com") + result = self.cf_client.invalidate_paths(dist_id, ["/*"]) + invalidation = self.cf_client.check_invalidation(dist_id, result['Id']) + self.assertIsNotNone(invalidation['Id']) + self.assertEqual('completed', str.lower(result['Status'])) diff --git a/tests/test_manifest_del.py b/tests/test_manifest_del.py index fc5ff35c..b5d42255 100644 --- a/tests/test_manifest_del.py +++ b/tests/test_manifest_del.py @@ -15,7 +15,7 @@ """ import os -from moto import mock_s3 +from moto import mock_aws from charon.pkgs.maven import handle_maven_uploading, handle_maven_del from charon.pkgs.npm import handle_npm_uploading, handle_npm_del @@ -28,7 +28,7 @@ from tests.constants import INPUTS -@mock_s3 +@mock_aws class ManifestDeleteTest(PackageBaseTest): def test_maven_manifest_delete(self): diff --git a/tests/test_manifest_upload.py b/tests/test_manifest_upload.py index e6aa43e9..8a76de8d 100644 --- a/tests/test_manifest_upload.py +++ b/tests/test_manifest_upload.py @@ -15,7 +15,7 @@ """ import os -from moto import mock_s3 +from moto import mock_aws from charon.pkgs.maven import handle_maven_uploading from charon.pkgs.npm import handle_npm_uploading @@ -29,7 +29,7 @@ from tests.constants import INPUTS -@mock_s3 +@mock_aws class ManifestUploadTest(PackageBaseTest): def test_maven_manifest_upload(self): diff --git a/tests/test_maven_del.py b/tests/test_maven_del.py index c26e6d4a..9ce85eaa 100644 --- a/tests/test_maven_del.py +++ b/tests/test_maven_del.py @@ -24,13 +24,13 @@ ARCHETYPE_CATALOG, ARCHETYPE_CATALOG_FILES, COMMONS_CLIENT_459_MVN_NUM, COMMONS_CLIENT_META_NUM ) -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS -@mock_s3 +@mock_aws class MavenDeleteTest(PackageBaseTest): def test_maven_deletion(self): self.__test_prefix_deletion("") diff --git a/tests/test_maven_del_multi_tgts.py b/tests/test_maven_del_multi_tgts.py index ffc60954..c3c93713 100644 --- a/tests/test_maven_del_multi_tgts.py +++ b/tests/test_maven_del_multi_tgts.py @@ -24,13 +24,13 @@ ARCHETYPE_CATALOG, ARCHETYPE_CATALOG_FILES, COMMONS_CLIENT_459_MVN_NUM, COMMONS_CLIENT_META_NUM, TEST_BUCKET_2 ) -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS -@mock_s3 +@mock_aws class MavenDeleteMultiTgtsTest(PackageBaseTest): def setUp(self): super().setUp() diff --git a/tests/test_maven_index.py b/tests/test_maven_index.py index 7468310d..445b8e92 100644 --- a/tests/test_maven_index.py +++ b/tests/test_maven_index.py @@ -24,13 +24,13 @@ COMMONS_LOGGING_INDEXES, COMMONS_CLIENT_INDEX, COMMONS_CLIENT_456_INDEX, COMMONS_LOGGING_INDEX, COMMONS_ROOT_INDEX ) -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS -@mock_s3 +@mock_aws class MavenFileIndexTest(PackageBaseTest): def test_uploading_index(self): @@ -177,7 +177,11 @@ def test_re_index(self): Key=commons_client_457_test, Body="Just a test content" ) - re_index(TEST_BUCKET, "", commons_client_root, "maven") + re_index( + (TEST_BUCKET, TEST_BUCKET, "", "", None), + commons_client_root, "maven", + cf_enable=True + ) indedx_obj = test_bucket.Object(COMMONS_CLIENT_INDEX) index_content = str(indedx_obj.get()["Body"].read(), "utf-8") self.assertIn('../', index_content) diff --git a/tests/test_maven_index_multi_tgts.py b/tests/test_maven_index_multi_tgts.py index a02707f2..ddd7bb12 100644 --- a/tests/test_maven_index_multi_tgts.py +++ b/tests/test_maven_index_multi_tgts.py @@ -23,13 +23,13 @@ COMMONS_LOGGING_INDEXES, COMMONS_CLIENT_INDEX, COMMONS_CLIENT_456_INDEX, COMMONS_LOGGING_INDEX, COMMONS_ROOT_INDEX, TEST_BUCKET_2 ) -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS -@mock_s3 +@mock_aws class MavenFileIndexMultiTgtsTest(PackageBaseTest): def setUp(self): super().setUp() diff --git a/tests/test_maven_sign.py b/tests/test_maven_sign.py index 41cab15e..52df5690 100644 --- a/tests/test_maven_sign.py +++ b/tests/test_maven_sign.py @@ -19,13 +19,13 @@ TEST_BUCKET, COMMONS_CLIENT_456_SIGNS, COMMONS_LOGGING_SIGNS, COMMONS_CLIENT_456_INDEX, COMMONS_CLIENT_459_SIGNS ) -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS -@mock_s3 +@mock_aws class MavenFileSignTest(PackageBaseTest): def test_uploading_sign(self): diff --git a/tests/test_maven_upload.py b/tests/test_maven_upload.py index 431475a8..c47d1695 100644 --- a/tests/test_maven_upload.py +++ b/tests/test_maven_upload.py @@ -23,13 +23,13 @@ COMMONS_CLIENT_456_MVN_NUM, COMMONS_CLIENT_MVN_NUM, COMMONS_CLIENT_META_NUM ) -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS -@mock_s3 +@mock_aws class MavenUploadTest(PackageBaseTest): def test_fresh_upload(self): self.__test_prefix_upload("") diff --git a/tests/test_maven_upload_multi_tgts.py b/tests/test_maven_upload_multi_tgts.py index ffb41d20..921e8a9d 100644 --- a/tests/test_maven_upload_multi_tgts.py +++ b/tests/test_maven_upload_multi_tgts.py @@ -24,13 +24,13 @@ COMMONS_CLIENT_456_MVN_NUM, COMMONS_CLIENT_MVN_NUM, COMMONS_CLIENT_META_NUM, TEST_BUCKET_2 ) -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS -@mock_s3 +@mock_aws class MavenUploadMultiTgtsTest(PackageBaseTest): def setUp(self): super().setUp() diff --git a/tests/test_npm_del.py b/tests/test_npm_del.py index 8d6a6df0..5f734b26 100644 --- a/tests/test_npm_del.py +++ b/tests/test_npm_del.py @@ -14,7 +14,7 @@ limitations under the License. """ import os -from moto import mock_s3 +from moto import mock_aws from charon.constants import PROD_INFO_SUFFIX, DEFAULT_REGISTRY from charon.pkgs.npm import handle_npm_uploading, handle_npm_del from charon.storage import CHECKSUM_META_KEY @@ -23,7 +23,7 @@ from tests.constants import INPUTS -@mock_s3 +@mock_aws class NPMDeleteTest(PackageBaseTest): def test_npm_deletion(self): self.__test_prefix() diff --git a/tests/test_npm_del_multi_tgts.py b/tests/test_npm_del_multi_tgts.py index ac1e2c32..a6401db6 100644 --- a/tests/test_npm_del_multi_tgts.py +++ b/tests/test_npm_del_multi_tgts.py @@ -14,7 +14,7 @@ limitations under the License. """ import os -from moto import mock_s3 +from moto import mock_aws from charon.constants import PROD_INFO_SUFFIX, DEFAULT_REGISTRY from charon.pkgs.npm import handle_npm_uploading, handle_npm_del from charon.storage import CHECKSUM_META_KEY @@ -23,7 +23,7 @@ from tests.constants import INPUTS -@mock_s3 +@mock_aws class NPMDeleteMultiTgtsTest(PackageBaseTest): def setUp(self): super().setUp() diff --git a/tests/test_npm_dist_gen.py b/tests/test_npm_dist_gen.py index 438cc094..7fbf58c0 100644 --- a/tests/test_npm_dist_gen.py +++ b/tests/test_npm_dist_gen.py @@ -15,7 +15,7 @@ """ import os import subresource_integrity -from moto import mock_s3 +from moto import mock_aws from charon.pkgs.npm import handle_npm_uploading from charon.utils.files import digest, HashType from tests.base import PackageBaseTest @@ -26,7 +26,7 @@ from tests.constants import INPUTS -@mock_s3 +@mock_aws class NPMUploadTest(PackageBaseTest): def setUp(self): super().setUp() diff --git a/tests/test_npm_index.py b/tests/test_npm_index.py index 02dc64e0..f6745c3c 100644 --- a/tests/test_npm_index.py +++ b/tests/test_npm_index.py @@ -22,7 +22,7 @@ TEST_BUCKET, CODE_FRAME_7_14_5_INDEXES, CODE_FRAME_7_15_8_INDEXES, COMMONS_ROOT_INDEX ) -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS @@ -30,7 +30,7 @@ NAMESPACE_BABEL_INDEX = "@babel/index.html" -@mock_s3 +@mock_aws class NpmFileIndexTest(PackageBaseTest): def test_uploading_index(self): self.__test_upload_prefix() @@ -222,7 +222,11 @@ def test_re_index(self): test_bucket.put_object( Key=test_file_path, Body="test content" ) - re_index(TEST_BUCKET, prefix, "@babel/", "npm") + re_index( + (TEST_BUCKET, TEST_BUCKET, prefix, "", None), + "@babel/", "npm", + cf_enable=True + ) index_obj = test_bucket.Object(prefixed_namespace_babel_index) index_content = str(index_obj.get()["Body"].read(), "utf-8") self.assertIn( @@ -249,7 +253,11 @@ def test_re_index(self): test_bucket.put_object( Key=test_file_path, Body="test content" ) - re_index(TEST_BUCKET, prefix, "/", "npm") + re_index( + (TEST_BUCKET, TEST_BUCKET, prefix, "", None), + "/", "npm", + cf_enable=True + ) index_obj = test_bucket.Object(prefixed_root_index) index_content = str(index_obj.get()["Body"].read(), "utf-8") self.assertIn('@babel/', index_content) @@ -277,7 +285,11 @@ def test_re_index(self): test_bucket.put_object( Key=test_file_path, Body="test content" ) - re_index(TEST_BUCKET, prefix, metadata_path, "npm") + re_index( + (TEST_BUCKET, TEST_BUCKET, prefix, "", None), + metadata_path, "npm", + cf_enable=True + ) objs = list(test_bucket.objects.all()) actual_files = [obj.key for obj in objs] self.assertIn( diff --git a/tests/test_npm_index_multi_tgts.py b/tests/test_npm_index_multi_tgts.py index ef653303..acb882a4 100644 --- a/tests/test_npm_index_multi_tgts.py +++ b/tests/test_npm_index_multi_tgts.py @@ -22,7 +22,7 @@ CODE_FRAME_7_15_8_INDEXES, COMMONS_ROOT_INDEX, TEST_BUCKET_2 ) -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS @@ -30,7 +30,7 @@ NAMESPACE_BABEL_INDEX = "@babel/index.html" -@mock_s3 +@mock_aws class NpmFileIndexMultiTgtsTest(PackageBaseTest): def setUp(self): super().setUp() diff --git a/tests/test_npm_meta.py b/tests/test_npm_meta.py index a0627e2b..6d112efd 100644 --- a/tests/test_npm_meta.py +++ b/tests/test_npm_meta.py @@ -16,7 +16,7 @@ import os import boto3 -from moto import mock_s3 +from moto import mock_aws from charon.pkgs.npm import handle_npm_uploading, read_package_metadata_from_content from charon.storage import S3Client @@ -27,7 +27,7 @@ MY_BUCKET = "npm_bucket" -@mock_s3 +@mock_aws class NPMMetadataOnS3Test(BaseTest): def setUp(self): super().setUp() diff --git a/tests/test_npm_upload.py b/tests/test_npm_upload.py index 3438ad61..53767301 100644 --- a/tests/test_npm_upload.py +++ b/tests/test_npm_upload.py @@ -15,7 +15,7 @@ """ import os -from moto import mock_s3 +from moto import mock_aws from charon.pkgs.npm import handle_npm_uploading from charon.pkgs.pkg_utils import is_metadata @@ -29,7 +29,7 @@ from tests.constants import INPUTS -@mock_s3 +@mock_aws class NPMUploadTest(PackageBaseTest): def test_npm_upload(self): diff --git a/tests/test_npm_upload_multi_tgts.py b/tests/test_npm_upload_multi_tgts.py index 82a265f7..242937a7 100644 --- a/tests/test_npm_upload_multi_tgts.py +++ b/tests/test_npm_upload_multi_tgts.py @@ -15,7 +15,7 @@ """ import os -from moto import mock_s3 +from moto import mock_aws from charon.pkgs.npm import handle_npm_uploading from charon.pkgs.pkg_utils import is_metadata @@ -29,7 +29,7 @@ from tests.constants import INPUTS -@mock_s3 +@mock_aws class NPMUploadMultiTgtsTest(PackageBaseTest): def setUp(self): super().setUp() diff --git a/tests/test_pkgs_dryrun.py b/tests/test_pkgs_dryrun.py index 7f2b004e..3b82d1b4 100644 --- a/tests/test_pkgs_dryrun.py +++ b/tests/test_pkgs_dryrun.py @@ -18,13 +18,13 @@ from charon.constants import DEFAULT_REGISTRY from tests.base import PackageBaseTest from tests.commons import TEST_BUCKET -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS -@mock_s3 +@mock_aws class PkgsDryRunTest(PackageBaseTest): def test_maven_upload_dry_run(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") diff --git a/tests/test_s3client.py b/tests/test_s3client.py index b33e68d7..48063daa 100644 --- a/tests/test_s3client.py +++ b/tests/test_s3client.py @@ -19,7 +19,7 @@ from charon.utils.files import overwrite_file, read_sha1 from charon.constants import PROD_INFO_SUFFIX from tests.base import BaseTest, SHORT_TEST_PREFIX -from moto import mock_s3 +from moto import mock_aws import boto3 import os import sys @@ -35,7 +35,7 @@ COMMONS_LANG3_ZIP_MVN_ENTRY = 26 -@mock_s3 +@mock_aws class S3ClientTest(BaseTest): def setUp(self): super().setUp() From aa2e110989428107cd1554ce6a7c58afcf86e253 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Tue, 26 Mar 2024 13:53:18 +0800 Subject: [PATCH 15/31] Add new command to clear CF cache --- .gitignore | 1 + charon/cache.py | 2 +- charon/cmd/__init__.py | 2 + charon/cmd/cmd_cache.py | 126 +++++++++++++++++++++++++++++++++++++ charon/schemas/charon.json | 4 ++ 5 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 charon/cmd/cmd_cache.py diff --git a/.gitignore b/.gitignore index b32671f8..f4df6301 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ coverage .vscode package/ .local +local .DS_Store # Unit test diff --git a/charon/cache.py b/charon/cache.py index b289f8ca..271c5478 100644 --- a/charon/cache.py +++ b/charon/cache.py @@ -51,7 +51,7 @@ def __get_endpoint(self, extra_conf) -> str: if isinstance(extra_conf, Dict): endpoint_url = extra_conf.get(ENDPOINT_ENV, None) if endpoint_url: - logger.info("Using endpoint url for aws client: %s", endpoint_url) + logger.info("Using endpoint url for aws CF client: %s", endpoint_url) else: logger.debug("No user-specified endpoint url is used.") return endpoint_url diff --git a/charon/cmd/__init__.py b/charon/cmd/__init__.py index 9ff53846..7a22f570 100644 --- a/charon/cmd/__init__.py +++ b/charon/cmd/__init__.py @@ -18,6 +18,7 @@ from charon.cmd.cmd_delete import delete from charon.cmd.cmd_index import index from charon.cmd.cmd_checksum import validate +from charon.cmd.cmd_cache import clear_cf @group() @@ -33,3 +34,4 @@ def cli(): cli.add_command(delete) cli.add_command(index) cli.add_command(validate) +cli.add_command(clear_cf) diff --git a/charon/cmd/cmd_cache.py b/charon/cmd/cmd_cache.py new file mode 100644 index 00000000..e6cd61a4 --- /dev/null +++ b/charon/cmd/cmd_cache.py @@ -0,0 +1,126 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +from charon.config import get_config +from charon.cmd.internal import _decide_mode, _get_buckets +from charon.cache import CFClient +from charon.pkgs.pkg_utils import invalidate_cf_paths +from click import command, option +from typing import List + +import traceback +import logging +import sys +import os + +logger = logging.getLogger(__name__) + + +@option( + "--target", + "-t", + "target", + help=""" + The target to do the uploading, which will decide which s3 bucket + and what root path where all files will be uploaded to. + Can accept more than one target. + """, + required=True +) +@option( + "--path", + "-p", + "paths", + help=""" + The paths which will be invalidated in CF. The path can use the format as CF defining + in https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/Invalidation.html + """, + multiple=True +) +@option( + "--path-file", + "-f", + "path_file", + help=""" + The file which contain the paths to be invalidated in CF. Pahts in this file follow the + format of CF defining too, and each path should be in a single line. + """ +) +@option( + "--debug", + "-D", + "debug", + help="Debug mode, will print all debug logs for problem tracking.", + is_flag=True, + default=False +) +@option( + "--quiet", + "-q", + "quiet", + help="Quiet mode, will shrink most of the logs except warning and errors.", + is_flag=True, + default=False +) +@command() +def clear_cf( + target: str, + paths: List[str], + path_file: str, + quiet: bool = False, + debug: bool = False +): + """This command will do invalidating on AWS CloudFront for the specified paths. + """ + _decide_mode( + f"cfclear-{target}", "", + is_quiet=quiet, is_debug=debug + ) + if not paths and not path_file: + logger.error( + "No path specified, please specify at least one path " + "through --path or --path-file.") + sys.exit(1) + + work_paths = [] + if paths: + work_paths.extend(paths) + + if path_file: + with open(path_file, "r", encoding="utf-8") as f: + for line in f.readlines(): + work_paths.append(str(line).strip()) + + try: + conf = get_config() + if not conf: + sys.exit(1) + + aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile() + if not aws_profile: + logger.error("No AWS profile specified!") + sys.exit(1) + + buckets = _get_buckets([target], conf) + + for b in buckets: + cf_client = CFClient(aws_profile=aws_profile) + invalidate_cf_paths( + cf_client, b, work_paths + ) + except Exception: + print(traceback.format_exc()) + sys.exit(2) diff --git a/charon/schemas/charon.json b/charon/schemas/charon.json index bf745f9a..d7850fad 100644 --- a/charon/schemas/charon.json +++ b/charon/schemas/charon.json @@ -66,6 +66,10 @@ "type": "string", "description": "aws profile to use with S3" }, + "aws_cf_enable": { + "type": "boolean", + "description": "enable aws cloudfront support" + }, "manifest_bucket": { "type": "string", "description": "which bucket to use for storing manifests" From b09b10efae7d414505506c6af46874709c6d2b4d Mon Sep 17 00:00:00 2001 From: Gang Li Date: Wed, 27 Mar 2024 11:23:50 +0800 Subject: [PATCH 16/31] Refine some logging --- charon/cache.py | 9 +++++--- charon/storage.py | 55 +++++++++++++++++++++++++---------------------- 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/charon/cache.py b/charon/cache.py index 271c5478..a5daf5fe 100644 --- a/charon/cache.py +++ b/charon/cache.py @@ -35,7 +35,7 @@ def __init_aws_client( self, aws_profile=None, extra_conf=None ): if aws_profile: - logger.debug("Using aws profile: %s", aws_profile) + logger.debug("[CloudFront] Using aws profile: %s", aws_profile) cf_session = session.Session(profile_name=aws_profile) else: cf_session = session.Session() @@ -51,9 +51,12 @@ def __get_endpoint(self, extra_conf) -> str: if isinstance(extra_conf, Dict): endpoint_url = extra_conf.get(ENDPOINT_ENV, None) if endpoint_url: - logger.info("Using endpoint url for aws CF client: %s", endpoint_url) + logger.info( + "[CloudFront] Using endpoint url for aws CF client: %s", + endpoint_url + ) else: - logger.debug("No user-specified endpoint url is used.") + logger.debug("[CloudFront] No user-specified endpoint url is used.") return endpoint_url def invalidate_paths(self, distr_id: str, paths: List[str]) -> Dict[str, str]: diff --git a/charon/storage.py b/charon/storage.py index 6c2fcfde..34ae1274 100644 --- a/charon/storage.py +++ b/charon/storage.py @@ -65,14 +65,14 @@ def __init_aws_client( self, aws_profile=None, extra_conf=None ): if aws_profile: - logger.debug("Using aws profile: %s", aws_profile) + logger.debug("[S3] Using aws profile: %s", aws_profile) s3_session = session.Session(profile_name=aws_profile) else: s3_session = session.Session() endpoint_url = self.__get_endpoint(extra_conf) config = None if self.__enable_acceleration(extra_conf): - logger.info("S3 acceleration config enabled, " + logger.info("[S3] S3 acceleration config enabled, " "will enable s3 use_accelerate_endpoint config") config = Config(s3={"use_accelerate_endpoint": True}) return s3_session.resource( @@ -87,9 +87,12 @@ def __get_endpoint(self, extra_conf) -> str: if isinstance(extra_conf, Dict): endpoint_url = extra_conf.get(ENDPOINT_ENV, None) if endpoint_url: - logger.info("Using endpoint url for aws client: %s", endpoint_url) + logger.info( + "[S3] Using endpoint url for aws S3 client: %s", + endpoint_url + ) else: - logger.debug("No user-specified endpoint url is used.") + logger.debug("[S3] No user-specified endpoint url is used.") return endpoint_url def __enable_acceleration(self, extra_conf) -> bool: @@ -140,14 +143,14 @@ async def path_upload_handler( async with self.__con_sem: if not os.path.isfile(full_file_path): logger.warning( - 'Warning: file %s does not exist during uploading. Product: %s', + '[S3] Warning: file %s does not exist during uploading. Product: %s', full_file_path, product ) failed.append(full_file_path) return logger.debug( - '(%d/%d) Uploading %s to bucket %s', + '[S3] (%d/%d) Uploading %s to bucket %s', index, total, full_file_path, main_bucket_name ) main_path_key = os.path.join(key_prefix, path) if key_prefix else path @@ -157,7 +160,7 @@ async def path_upload_handler( existed = await self.__run_async(self.__file_exists, main_file_object) except (ClientError, HTTPClientError) as e: logger.error( - "Error: file existence check failed due to error: %s", e + "[S3] Error: file existence check failed due to error: %s", e ) failed.append(full_file_path) return @@ -193,9 +196,9 @@ async def path_upload_handler( main_path_key, main_bucket_name, [product] ) - logger.debug('Uploaded %s to bucket %s', path, main_bucket_name) + logger.debug('[S3] Uploaded %s to bucket %s', path, main_bucket_name) except (ClientError, HTTPClientError) as e: - logger.error("ERROR: file %s not uploaded to bucket" + logger.error("[S3] ERROR: file %s not uploaded to bucket" " %s due to error: %s ", full_file_path, main_bucket_name, e) failed.append(full_file_path) @@ -230,9 +233,9 @@ async def path_upload_handler( extra_path_key, extra_bucket_name, [product] ) except (ClientError, HTTPClientError) as e: - logger.error("ERROR: copying failure happend for file %s to bucket" - " %s due to error: %s ", full_file_path, - extra_bucket_name, e) + logger.error("[S3] ERROR: copying failure happend for file %s" + " to bucket %s due to error: %s ", + full_file_path, extra_bucket_name, e) failed.append(full_file_path) else: await handle_existed( @@ -613,7 +616,7 @@ async def path_delete_handler( if not updated: failed.append(full_file_path) return - logger.info("Deleted %s from bucket %s", path, bucket_name) + logger.info("[S3] Deleted %s from bucket %s", path, bucket_name) return except (ClientError, HTTPClientError) as e: logger.error( @@ -759,7 +762,7 @@ def get_files(self, bucket_name: str, prefix=None, suffix=None) -> Tuple[List[st try: objs = list(bucket.objects.filter(Prefix=prefix)) except (ClientError, HTTPClientError) as e: - logger.error("ERROR: Can not get files under %s in bucket" + logger.error("[S3] ERROR: Can not get files under %s in bucket" " %s due to error: %s ", prefix, bucket_name, e) return ([], False) @@ -800,7 +803,7 @@ def list_folder_content(self, bucket_name: str, folder: str) -> List[str]: ) except (ClientError, HTTPClientError) as e: - logger.error("ERROR: Can not get contents of %s from bucket" + logger.error("[S3] ERROR: Can not get contents of %s from bucket" " %s due to error: %s ", folder, bucket_name, e) return [] @@ -828,7 +831,7 @@ def __get_bucket(self, bucket_name: str): bucket = self.__buckets.get(bucket_name) if bucket: return bucket - logger.debug("Cache aws bucket %s", bucket_name) + logger.debug("[S3] Cache aws bucket %s", bucket_name) bucket = self.__client.Bucket(bucket_name) self.__buckets[bucket_name] = bucket return bucket @@ -848,15 +851,15 @@ def __file_exists(self, file_object) -> bool: def __get_prod_info( self, file: str, bucket_name: str ) -> Tuple[List[str], bool]: - logger.debug("Getting product infomation for file %s", file) + logger.debug("[S3] Getting product infomation for file %s", file) prod_info_file = file + PROD_INFO_SUFFIX try: info_file_content = self.read_file_content(bucket_name, prod_info_file) prods = [p.strip() for p in info_file_content.split("\n")] - logger.debug("Got product information as below %s", prods) + logger.debug("[S3] Got product information as below %s", prods) return (prods, True) except (ClientError, HTTPClientError) as e: - logger.warning("WARN: Can not get product info for file %s " + logger.warning("[S3] WARN: Can not get product info for file %s " "due to error: %s", file, e) return ([], False) @@ -868,7 +871,7 @@ async def __update_prod_info( file_obj = bucket.Object(prod_info_file) content_type = "text/plain" if len(prods) > 0: - logger.debug("Updating product infomation for file %s " + logger.debug("[S3] Updating product infomation for file %s " "with products: %s", file, prods) try: await self.__run_async( @@ -878,14 +881,14 @@ async def __update_prod_info( ContentType=content_type ) ) - logger.debug("Updated product infomation for file %s", file) + logger.debug("[S3] Updated product infomation for file %s", file) return True except (ClientError, HTTPClientError) as e: - logger.warning("WARNING: Can not update product info for file %s " + logger.warning("[S3] WARNING: Can not update product info for file %s " "due to error: %s", file, e) return False else: - logger.debug("Removing product infomation file for file %s " + logger.debug("[S3] Removing product infomation file for file %s " "because no products left", file) try: result = await self.__run_async( @@ -899,10 +902,10 @@ async def __update_prod_info( Delete={"Objects": [{"Key": prod_info_file}]} ) ) - logger.debug("Removed product infomation file for file %s", file) + logger.debug("[S3] Removed product infomation file for file %s", file) return True except (ClientError, HTTPClientError) as e: - logger.warning("WARNING: Can not delete product info file for file %s " + logger.warning("[S3] WARNING: Can not delete product info file for file %s " "due to error: %s", file, e) return False @@ -918,7 +921,7 @@ async def wrapper( await path_handler(full_file_path, path, index, total, failed) finally: if index % FILE_REPORT_LIMIT == 0: - logger.info("######### %d/%d files finished", index, total) + logger.info("[S3] ######### %d/%d files finished", index, total) return wrapper def __do_path_cut_and( From a52ba7a56878dc85ae0bed8138a2037442e8126f Mon Sep 17 00:00:00 2001 From: Gang Li Date: Wed, 27 Mar 2024 17:35:27 +0800 Subject: [PATCH 17/31] Use wildcard for paths in maven CF invalidating * And ignore the indexing(index.html) CF invalidating per cost consideration * And change the root work dir for npm uploading --- charon/cache.py | 61 ++++++++++++++++++++++++--------------- charon/pkgs/indexing.py | 13 +++++---- charon/pkgs/maven.py | 47 +++++++++++++++++++++--------- charon/pkgs/npm.py | 20 +++++++------ charon/pkgs/pkg_utils.py | 22 +++++++++----- tests/test_cf_reindex.py | 9 +++--- tests/test_cfclient.py | 10 +++---- tests/test_maven_index.py | 3 +- tests/test_npm_index.py | 9 ++---- 9 files changed, 118 insertions(+), 76 deletions(-) diff --git a/charon/cache.py b/charon/cache.py index a5daf5fe..0112d5fe 100644 --- a/charon/cache.py +++ b/charon/cache.py @@ -59,7 +59,10 @@ def __get_endpoint(self, extra_conf) -> str: logger.debug("[CloudFront] No user-specified endpoint url is used.") return endpoint_url - def invalidate_paths(self, distr_id: str, paths: List[str]) -> Dict[str, str]: + def invalidate_paths( + self, distr_id: str, paths: List[str], + batch_size: int = 15 + ) -> List[Dict[str, str]]: """Send a invalidating requests for the paths in distribution to CloudFront. This will invalidate the paths in the distribution to enforce the refreshment from backend S3 bucket for these paths. For details see: @@ -67,30 +70,42 @@ def invalidate_paths(self, distr_id: str, paths: List[str]) -> Dict[str, str]: * The distr_id is the id for the distribution. This id can be get through get_dist_id_by_domain(domain) function * Can specify the invalidating paths through paths param. + * Batch size is the number of paths to be invalidated in one request. + Because paths contains wildcard(*), so the default value is 15 which + is the maximum number in official doc: + https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/Invalidation.html#InvalidationLimits """ - caller_ref = str(uuid.uuid4()) logger.debug("[CloudFront] Creating invalidation for paths: %s", paths) - try: - response = self.__client.create_invalidation( - DistributionId=distr_id, - InvalidationBatch={ - 'CallerReference': caller_ref, - 'Paths': { - 'Quantity': len(paths), - 'Items': paths + real_paths = [paths] + # Split paths into batches by batch_size + if batch_size: + real_paths = [paths[i:i + batch_size] for i in range(0, len(paths), batch_size)] + results = [] + for batch_paths in real_paths: + caller_ref = str(uuid.uuid4()) + try: + response = self.__client.create_invalidation( + DistributionId=distr_id, + InvalidationBatch={ + 'CallerReference': caller_ref, + 'Paths': { + 'Quantity': len(batch_paths), + 'Items': batch_paths + } } - } - ) - if response: - invalidation = response.get('Invalidation', {}) - return { - 'Id': invalidation.get('Id', None), - 'Status': invalidation.get('Status', None) - } - except Exception as err: - logger.error( - "[CloudFront] Error occurred while creating invalidation, error: %s", err - ) + ) + if response: + invalidation = response.get('Invalidation', {}) + results.append({ + 'Id': invalidation.get('Id', None), + 'Status': invalidation.get('Status', None) + }) + except Exception as err: + logger.error( + "[CloudFront] Error occurred while creating invalidation" + " for paths %s, error: %s", batch_paths, err + ) + return results def check_invalidation(self, distr_id: str, invalidation_id: str) -> dict: try: @@ -115,7 +130,7 @@ def get_dist_id_by_domain(self, domain: str) -> str: """Get distribution id by a domain name. The id can be used to send invalidating request through #invalidate_paths function * Domain are Ronda domains, like "maven.repository.redhat.com" - or "npm.repository.redhat.com" + or "npm.registry.redhat.com" """ try: response = self.__client.list_distributions() diff --git a/charon/pkgs/indexing.py b/charon/pkgs/indexing.py index bd9192ab..ee42a83c 100644 --- a/charon/pkgs/indexing.py +++ b/charon/pkgs/indexing.py @@ -15,8 +15,8 @@ """ from charon.config import get_template from charon.storage import S3Client -from charon.cache import CFClient -from charon.pkgs.pkg_utils import invalidate_cf_paths +# from charon.cache import CFClient +# from charon.pkgs.pkg_utils import invalidate_cf_paths from charon.constants import (INDEX_HTML_TEMPLATE, NPM_INDEX_HTML_TEMPLATE, PACKAGE_TYPE_MAVEN, PACKAGE_TYPE_NPM, PROD_INFO_SUFFIX) from charon.utils.files import digest_content @@ -265,7 +265,7 @@ def re_index( path: str, package_type: str, aws_profile: str = None, - cf_enable: bool = False, + # cf_enable: bool = False, dry_run: bool = False ): """Refresh the index.html for the specified folder in the bucket. @@ -312,9 +312,10 @@ def re_index( index_path, index_content, (bucket_name, real_prefix), "text/html", digest_content(index_content) ) - if cf_enable: - cf_client = CFClient(aws_profile=aws_profile) - invalidate_cf_paths(cf_client, bucket, [index_path]) + # We will not invalidate index.html per cost consideration + # if cf_enable: + # cf_client = CFClient(aws_profile=aws_profile) + # invalidate_cf_paths(cf_client, bucket, [index_path]) else: logger.warning( "The path %s does not contain any contents in bucket %s. " diff --git a/charon/pkgs/maven.py b/charon/pkgs/maven.py index 257bb2b9..c7413c80 100644 --- a/charon/pkgs/maven.py +++ b/charon/pkgs/maven.py @@ -58,7 +58,9 @@ def __get_mvn_template(kind: str, default: str) -> str: META_TEMPLATE = __get_mvn_template("maven-metadata.xml.j2", MAVEN_METADATA_TEMPLATE) ARCH_TEMPLATE = __get_mvn_template("archetype-catalog.xml.j2", ARCHETYPE_CATALOG_TEMPLATE) -STANDARD_GENERATED_IGNORES = ["maven-metadata.xml", "archetype-catalog.xml"] +MAVEN_METADATA_FILE = "maven-metadata.xml" +MAVEN_ARCH_FILE = "archetype-catalog.xml" +STANDARD_GENERATED_IGNORES = [MAVEN_METADATA_FILE, MAVEN_ARCH_FILE] class MavenMetadata(object): @@ -219,7 +221,7 @@ def gen_meta_file(group_id, artifact_id: str, versions: list, root="/", digest=T ).generate_meta_file_content() g_path = "/".join(group_id.split(".")) meta_files = [] - final_meta_path = os.path.join(root, g_path, artifact_id, "maven-metadata.xml") + final_meta_path = os.path.join(root, g_path, artifact_id, MAVEN_METADATA_FILE) try: overwrite_file(final_meta_path, content) meta_files.append(final_meta_path) @@ -374,7 +376,7 @@ def handle_maven_uploading( cf_invalidate_paths.extend(meta_files.get(META_FILE_GEN_KEY, [])) # 8. Determine refreshment of archetype-catalog.xml - if os.path.exists(os.path.join(top_level, "archetype-catalog.xml")): + if os.path.exists(os.path.join(top_level, MAVEN_ARCH_FILE)): logger.info("Start generating archetype-catalog.xml for bucket %s", bucket_name) upload_archetype_file = _generate_upload_archetype_catalog( s3=s3_client, bucket=bucket_name, @@ -451,15 +453,16 @@ def handle_maven_uploading( ) failed_metas.extend(_failed_metas) logger.info("Index files updating done\n") - # Add index files to Cf invalidate paths - if cf_enable: - cf_invalidate_paths.extend(created_indexes) + # We will not invalidate the index files per cost consideration + # if cf_enable: + # cf_invalidate_paths.extend(created_indexes) else: logger.info("Bypass indexing") # Finally do the CF invalidating for metadata files if cf_enable and len(cf_invalidate_paths) > 0: cf_client = CFClient(aws_profile=aws_profile) + cf_invalidate_paths = __wildcard_metadata_paths(cf_invalidate_paths) invalidate_cf_paths(cf_client, bucket, cf_invalidate_paths, top_level) upload_post_process(failed_files, failed_metas, prod_key, bucket_name) @@ -578,7 +581,7 @@ def handle_maven_del( cf_invalidate_paths.extend(all_meta_files) # 7. Determine refreshment of archetype-catalog.xml - if os.path.exists(os.path.join(top_level, "archetype-catalog.xml")): + if os.path.exists(os.path.join(top_level, MAVEN_ARCH_FILE)): logger.info("Start generating archetype-catalog.xml") archetype_action = _generate_rollback_archetype_catalog( s3=s3_client, bucket=bucket_name, @@ -630,13 +633,15 @@ def handle_maven_del( if len(_failed_index_files) > 0: failed_metas.extend(_failed_index_files) logger.info("Index files updating done.\n") - if cf_enable: - cf_invalidate_paths.extend(created_indexes) + # We will not invalidate the index files per cost consideration + # if cf_enable: + # cf_invalidate_paths.extend(created_indexes) else: logger.info("Bypassing indexing") if cf_enable and len(cf_invalidate_paths): cf_client = CFClient(aws_profile=aws_profile) + cf_invalidate_paths = __wildcard_metadata_paths(cf_invalidate_paths) invalidate_cf_paths(cf_client, bucket, cf_invalidate_paths, top_level) rollback_post_process(failed_files, failed_metas, prod_key, bucket_name) @@ -1017,15 +1022,15 @@ def _generate_metadatas( "No poms found in s3 bucket %s for GA path %s", bucket, path ) meta_files_deletion = meta_files.get(META_FILE_DEL_KEY, []) - meta_files_deletion.append(os.path.join(path, "maven-metadata.xml")) - meta_files_deletion.extend(__hash_decorate_metadata(path, "maven-metadata.xml")) + meta_files_deletion.append(os.path.join(path, MAVEN_METADATA_FILE)) + meta_files_deletion.extend(__hash_decorate_metadata(path, MAVEN_METADATA_FILE)) meta_files[META_FILE_DEL_KEY] = meta_files_deletion else: logger.warning("An error happened when scanning remote " "artifacts under GA path %s", path) meta_failed_path = meta_files.get(META_FILE_FAILED, []) - meta_failed_path.append(os.path.join(path, "maven-metadata.xml")) - meta_failed_path.extend(__hash_decorate_metadata(path, "maven-metadata.xml")) + meta_failed_path.append(os.path.join(path, MAVEN_METADATA_FILE)) + meta_failed_path.extend(__hash_decorate_metadata(path, MAVEN_METADATA_FILE)) meta_files[META_FILE_FAILED] = meta_failed_path else: logger.debug( @@ -1090,6 +1095,22 @@ def __get_suffix(package_type: str, conf: CharonConfig) -> List[str]: return [] +def __wildcard_metadata_paths(paths: List[str]) -> List[str]: + new_paths = [] + for path in paths: + if path.endswith(MAVEN_METADATA_FILE)\ + or path.endswith(MAVEN_ARCH_FILE): + new_paths.append(path[:-len(".xml")] + ".*") + elif path.endswith(".md5")\ + or path.endswith(".sha1")\ + or path.endswith(".sha128")\ + or path.endswith(".sha256"): + continue + else: + new_paths.append(path) + return new_paths + + class VersionCompareKey: 'Used as key function for version sorting' def __init__(self, obj): diff --git a/charon/pkgs/npm.py b/charon/pkgs/npm.py index ad418ff1..894998a6 100644 --- a/charon/pkgs/npm.py +++ b/charon/pkgs/npm.py @@ -105,6 +105,8 @@ def handle_npm_uploading( client = S3Client(aws_profile=aws_profile, dry_run=dry_run) generated_signs = [] + succeeded = True + root_dir = mkdtemp(prefix=f"npm-charon-{product}-", dir=dir_) for bucket in buckets: # prepare cf invalidate files cf_invalidate_paths = [] @@ -113,7 +115,7 @@ def handle_npm_uploading( prefix = remove_prefix(bucket[2], "/") registry = bucket[3] target_dir, valid_paths, package_metadata = _scan_metadata_paths_from_archive( - tarball_path, registry, prod=product, dir__=dir_ + tarball_path, registry, prod=product, dir__=root_dir ) if not os.path.isdir(target_dir): logger.error("Error: the extracted target_dir path %s does not exist.", target_dir) @@ -129,8 +131,6 @@ def handle_npm_uploading( ) logger.info("Files uploading done\n") - succeeded = True - if not manifest_bucket_name: logger.warning( 'Warning: No manifest bucket is provided, will ignore the process of manifest ' @@ -235,8 +235,9 @@ def handle_npm_uploading( ) failed_metas.extend(_failed_metas) logger.info("Index files updating done\n") - if cf_enable: - cf_invalidate_paths.extend(created_indexes) + # We will not invalidate the index files per cost consideration + # if cf_enable: + # cf_invalidate_paths.extend(created_indexes) else: logger.info("Bypass indexing\n") @@ -248,7 +249,7 @@ def handle_npm_uploading( upload_post_process(failed_files, failed_metas, product, bucket_name) succeeded = succeeded and len(failed_files) == 0 and len(failed_metas) == 0 - return (target_dir, succeeded) + return (root_dir, succeeded) def handle_npm_del( @@ -360,9 +361,10 @@ def handle_npm_del( ) failed_metas.extend(_failed_index_files) logger.info("Index files updating done.\n") - if cf_enable and len(created_indexes): - logger.debug("Add index files to cf invalidate list: %s", created_indexes) - cf_invalidate_paths.extend(created_indexes) + # We will not invalidate the index files per cost consideration + # if cf_enable and len(created_indexes): + # logger.debug("Add index files to cf invalidate list: %s", created_indexes) + # cf_invalidate_paths.extend(created_indexes) else: logger.info("Bypassing indexing\n") diff --git a/charon/pkgs/pkg_utils.py b/charon/pkgs/pkg_utils.py index a206f697..00fa293a 100644 --- a/charon/pkgs/pkg_utils.py +++ b/charon/pkgs/pkg_utils.py @@ -87,11 +87,17 @@ def invalidate_cf_paths( logger.debug("Invalidating paths: %s", final_paths) if not domain: domain = cf_client.get_domain_by_bucket(bucket_name) - distr_id = cf_client.get_dist_id_by_domain(domain) - if distr_id: - result = cf_client.invalidate_paths(distr_id, final_paths) - if result: - logger.info( - "The CF invalidating request for metadata/indexing is sent, " - "request id %s, status is %s", result['Id'], result['Status'] - ) + if domain: + distr_id = cf_client.get_dist_id_by_domain(domain) + if distr_id: + result = cf_client.invalidate_paths(distr_id, final_paths) + if result: + logger.info( + "The CF invalidating request for metadata/indexing is sent, " + "request status as below:\n %s", result + ) + else: + logger.error( + "CF invalidating will not be performed because domain not found for" + " bucket %s. ", bucket_name + ) diff --git a/tests/test_cf_reindex.py b/tests/test_cf_reindex.py index c8fc400a..42a6dbab 100644 --- a/tests/test_cf_reindex.py +++ b/tests/test_cf_reindex.py @@ -22,10 +22,12 @@ from tests.constants import INPUTS from moto import mock_aws import os +import pytest @mock_aws class CFReIndexTest(CFBasedTest): + @pytest.mark.skip(reason="Indexing CF invalidation is abandoned") def test_cf_maven_after_reindex(self): response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) self.assertIsNotNone(response) @@ -41,8 +43,7 @@ def test_cf_maven_after_reindex(self): re_index( (TEST_BUCKET, TEST_BUCKET, "ga", "", "maven.repository.redhat.com"), - "org/apache/httpcomponents/httpclient/", "maven", - cf_enable=True + "org/apache/httpcomponents/httpclient/", "maven" ) response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) @@ -51,6 +52,7 @@ def test_cf_maven_after_reindex(self): self.assertEqual(1, len(items)) self.assertEqual('completed', str.lower(items[0].get('Status'))) + @pytest.mark.skip(reason="Indexing CF invalidation is abandoned") def test_cf_npm_after_reindex(self): response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) self.assertIsNotNone(response) @@ -66,8 +68,7 @@ def test_cf_npm_after_reindex(self): re_index( (TEST_BUCKET, TEST_BUCKET, "", "", "npm.registry.redhat.com"), - "@babel/", "npm", - cf_enable=True + "@babel/", "npm" ) response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) diff --git a/tests/test_cfclient.py b/tests/test_cfclient.py index 610c454b..455af65c 100644 --- a/tests/test_cfclient.py +++ b/tests/test_cfclient.py @@ -48,10 +48,10 @@ def test_get_distribution_id(self): def test_invalidate_paths(self): dist_id = self.cf_client.get_dist_id_by_domain("maven.repository.redhat.com") result = self.cf_client.invalidate_paths(dist_id, ["/*"]) - self.assertIsNotNone(result['Id']) - self.assertEqual('completed', str.lower(result['Status'])) + self.assertTrue(result[0]['Id']) + self.assertEqual('completed', str.lower(result[0]['Status'])) status = self.cf_client.invalidate_paths("noexists_id", ["/*"]) - self.assertIsNone(status) + self.assertFalse(status) @pytest.mark.skip(reason=""" Because current moto 5.0.3 has not implemented the get_invalidation(), @@ -60,6 +60,6 @@ def test_invalidate_paths(self): def test_check_invalidation(self): dist_id = self.cf_client.get_dist_id_by_domain("maven.repository.redhat.com") result = self.cf_client.invalidate_paths(dist_id, ["/*"]) - invalidation = self.cf_client.check_invalidation(dist_id, result['Id']) + invalidation = self.cf_client.check_invalidation(dist_id, result[0]['Id']) self.assertIsNotNone(invalidation['Id']) - self.assertEqual('completed', str.lower(result['Status'])) + self.assertEqual('completed', str.lower(result[0]['Status'])) diff --git a/tests/test_maven_index.py b/tests/test_maven_index.py index 445b8e92..a137fb11 100644 --- a/tests/test_maven_index.py +++ b/tests/test_maven_index.py @@ -179,8 +179,7 @@ def test_re_index(self): ) re_index( (TEST_BUCKET, TEST_BUCKET, "", "", None), - commons_client_root, "maven", - cf_enable=True + commons_client_root, "maven" ) indedx_obj = test_bucket.Object(COMMONS_CLIENT_INDEX) index_content = str(indedx_obj.get()["Body"].read(), "utf-8") diff --git a/tests/test_npm_index.py b/tests/test_npm_index.py index f6745c3c..129f5278 100644 --- a/tests/test_npm_index.py +++ b/tests/test_npm_index.py @@ -224,8 +224,7 @@ def test_re_index(self): ) re_index( (TEST_BUCKET, TEST_BUCKET, prefix, "", None), - "@babel/", "npm", - cf_enable=True + "@babel/", "npm" ) index_obj = test_bucket.Object(prefixed_namespace_babel_index) index_content = str(index_obj.get()["Body"].read(), "utf-8") @@ -255,8 +254,7 @@ def test_re_index(self): ) re_index( (TEST_BUCKET, TEST_BUCKET, prefix, "", None), - "/", "npm", - cf_enable=True + "/", "npm" ) index_obj = test_bucket.Object(prefixed_root_index) index_content = str(index_obj.get()["Body"].read(), "utf-8") @@ -287,8 +285,7 @@ def test_re_index(self): ) re_index( (TEST_BUCKET, TEST_BUCKET, prefix, "", None), - metadata_path, "npm", - cf_enable=True + metadata_path, "npm" ) objs = list(test_bucket.objects.all()) actual_files = [obj.key for obj in objs] From a6ebcbb0f002befec487c57cfb758e3a82e834a5 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Thu, 28 Mar 2024 14:48:14 +0800 Subject: [PATCH 18/31] Change cf cmd name to cf-invalidate --- charon/cmd/__init__.py | 4 ++-- charon/cmd/cmd_cache.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/charon/cmd/__init__.py b/charon/cmd/__init__.py index 7a22f570..9f62b768 100644 --- a/charon/cmd/__init__.py +++ b/charon/cmd/__init__.py @@ -18,7 +18,7 @@ from charon.cmd.cmd_delete import delete from charon.cmd.cmd_index import index from charon.cmd.cmd_checksum import validate -from charon.cmd.cmd_cache import clear_cf +from charon.cmd.cmd_cache import cf_invalidate @group() @@ -34,4 +34,4 @@ def cli(): cli.add_command(delete) cli.add_command(index) cli.add_command(validate) -cli.add_command(clear_cf) +cli.add_command(cf_invalidate) diff --git a/charon/cmd/cmd_cache.py b/charon/cmd/cmd_cache.py index e6cd61a4..348b1cb0 100644 --- a/charon/cmd/cmd_cache.py +++ b/charon/cmd/cmd_cache.py @@ -76,7 +76,7 @@ default=False ) @command() -def clear_cf( +def cf_invalidate( target: str, paths: List[str], path_file: str, From 626a346a1b27b4aa3c2264a83d01f9601da47f59 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Thu, 28 Mar 2024 15:25:16 +0800 Subject: [PATCH 19/31] Add command to do CF invalidation status check --- charon/cmd/__init__.py | 3 +- charon/cmd/cmd_cache.py | 118 +++++++++++++++++++++++++++++++++------ charon/cmd/internal.py | 4 +- charon/pkgs/pkg_utils.py | 7 ++- 4 files changed, 112 insertions(+), 20 deletions(-) diff --git a/charon/cmd/__init__.py b/charon/cmd/__init__.py index 9f62b768..a2ccadaa 100644 --- a/charon/cmd/__init__.py +++ b/charon/cmd/__init__.py @@ -18,7 +18,7 @@ from charon.cmd.cmd_delete import delete from charon.cmd.cmd_index import index from charon.cmd.cmd_checksum import validate -from charon.cmd.cmd_cache import cf_invalidate +from charon.cmd.cmd_cache import cf_invalidate, cf_check @group() @@ -35,3 +35,4 @@ def cli(): cli.add_command(index) cli.add_command(validate) cli.add_command(cf_invalidate) +cli.add_command(cf_check) diff --git a/charon/cmd/cmd_cache.py b/charon/cmd/cmd_cache.py index 348b1cb0..ed874723 100644 --- a/charon/cmd/cmd_cache.py +++ b/charon/cmd/cmd_cache.py @@ -18,8 +18,8 @@ from charon.cmd.internal import _decide_mode, _get_buckets from charon.cache import CFClient from charon.pkgs.pkg_utils import invalidate_cf_paths -from click import command, option -from typing import List +from click import command, option, argument +from typing import List, Tuple import traceback import logging @@ -34,9 +34,8 @@ "-t", "target", help=""" - The target to do the uploading, which will decide which s3 bucket - and what root path where all files will be uploaded to. - Can accept more than one target. + The target to do the invalidating, which will decide the s3 bucket + which and its related domain to get the distribution. """, required=True ) @@ -104,23 +103,110 @@ def cf_invalidate( for line in f.readlines(): work_paths.append(str(line).strip()) + use_wildcard = False + for path in work_paths: + if "*" in path: + use_wildcard = True + break + try: - conf = get_config() - if not conf: - sys.exit(1) + (buckets, aws_profile) = _init_cmd(target) - aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile() - if not aws_profile: - logger.error("No AWS profile specified!") - sys.exit(1) + for b in buckets: + cf_client = CFClient(aws_profile=aws_profile) + # Per aws official doc, if the paths contains wildcard, it is + # limited to 15 as max items in one request. Otherwise it could + # be 3000 + if use_wildcard: + invalidate_cf_paths( + cf_client, b, work_paths + ) + else: + invalidate_cf_paths( + cf_client, b, work_paths, b, batch_size=3000 + ) + except Exception: + print(traceback.format_exc()) + sys.exit(2) - buckets = _get_buckets([target], conf) +@argument( + "invalidation_id", + type=str +) +@option( + "--target", + "-t", + "target", + help=""" + The target to do the invalidating, which will decide the s3 bucket + which and its related domain to get the distribution. + """, + required=True +) +@option( + "--debug", + "-D", + "debug", + help="Debug mode, will print all debug logs for problem tracking.", + is_flag=True, + default=False +) +@option( + "--quiet", + "-q", + "quiet", + help="Quiet mode, will shrink most of the logs except warning and errors.", + is_flag=True, + default=False +) +@command() +def cf_check( + invalidation_id: str, + target: str, + quiet: bool = False, + debug: bool = False +): + """This command will check the invalidation status of the specified invalidation id. + """ + _decide_mode( + f"cfcheck-{target}", "", + is_quiet=quiet, is_debug=debug + ) + try: + (buckets, aws_profile) = _init_cmd(target) for b in buckets: cf_client = CFClient(aws_profile=aws_profile) - invalidate_cf_paths( - cf_client, b, work_paths - ) + bucket_name = b[1] + domain = b[4] + if not domain: + domain = cf_client.get_domain_by_bucket(bucket_name) + if domain: + distr_id = cf_client.get_dist_id_by_domain(domain) + if distr_id: + result = cf_client.check_invalidation(distr_id, invalidation_id) + logger.info( + "The status of invalidation %s is %s", + invalidation_id, result + ) + else: + logger.error( + "Can not check invalidation result for %s because domain not found" + " for bucket %s. ", invalidation_id, bucket_name + ) except Exception: print(traceback.format_exc()) sys.exit(2) + + +def _init_cmd(target: str) -> Tuple[List[Tuple[str, str, str, str, str]], str]: + conf = get_config() + if not conf: + sys.exit(1) + + aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile() + if not aws_profile: + logger.error("No AWS profile specified!") + sys.exit(1) + + return (_get_buckets([target], conf), aws_profile) diff --git a/charon/cmd/internal.py b/charon/cmd/internal.py index edc87c05..e901c8f4 100644 --- a/charon/cmd/internal.py +++ b/charon/cmd/internal.py @@ -28,7 +28,9 @@ logger = logging.getLogger(__name__) -def _get_buckets(targets: List[str], conf: CharonConfig) -> List[Tuple[str, str, str, str]]: +def _get_buckets( + targets: List[str], conf: CharonConfig +) -> List[Tuple[str, str, str, str, str]]: buckets = [] for target in targets: for bucket in conf.get_target(target): diff --git a/charon/pkgs/pkg_utils.py b/charon/pkgs/pkg_utils.py index 00fa293a..ce0d20ff 100644 --- a/charon/pkgs/pkg_utils.py +++ b/charon/pkgs/pkg_utils.py @@ -66,7 +66,8 @@ def invalidate_cf_paths( cf_client: CFClient, bucket: Tuple[str, str, str, str, str], invalidate_paths: List[str], - root="/" + root="/", + batch_size=15 ): logger.info("Invalidating CF cache for %s", bucket[1]) bucket_name = bucket[1] @@ -90,7 +91,9 @@ def invalidate_cf_paths( if domain: distr_id = cf_client.get_dist_id_by_domain(domain) if distr_id: - result = cf_client.invalidate_paths(distr_id, final_paths) + result = cf_client.invalidate_paths( + distr_id, final_paths, batch_size + ) if result: logger.info( "The CF invalidating request for metadata/indexing is sent, " From 78e40bfe8df2067c254633e4b9059a88eb14aa37 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Thu, 28 Mar 2024 15:48:53 +0800 Subject: [PATCH 20/31] Refine the command * Change checksum command name to checksum-validate * Refine some help messages for the command --- charon/cmd/__init__.py | 4 ++-- charon/cmd/cmd_cache.py | 5 +++-- charon/cmd/cmd_checksum.py | 15 ++++++++------- charon/cmd/cmd_index.py | 2 +- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/charon/cmd/__init__.py b/charon/cmd/__init__.py index a2ccadaa..b2cafd05 100644 --- a/charon/cmd/__init__.py +++ b/charon/cmd/__init__.py @@ -17,7 +17,7 @@ from charon.cmd.cmd_upload import upload from charon.cmd.cmd_delete import delete from charon.cmd.cmd_index import index -from charon.cmd.cmd_checksum import validate +from charon.cmd.cmd_checksum import checksum_validate from charon.cmd.cmd_cache import cf_invalidate, cf_check @@ -33,6 +33,6 @@ def cli(): cli.add_command(upload) cli.add_command(delete) cli.add_command(index) -cli.add_command(validate) +cli.add_command(checksum_validate) cli.add_command(cf_invalidate) cli.add_command(cf_check) diff --git a/charon/cmd/cmd_cache.py b/charon/cmd/cmd_cache.py index ed874723..4aaedacb 100644 --- a/charon/cmd/cmd_cache.py +++ b/charon/cmd/cmd_cache.py @@ -82,7 +82,7 @@ def cf_invalidate( quiet: bool = False, debug: bool = False ): - """This command will do invalidating on AWS CloudFront for the specified paths. + """Do invalidating on AWS CloudFront for the specified paths. """ _decide_mode( f"cfclear-{target}", "", @@ -167,7 +167,8 @@ def cf_check( quiet: bool = False, debug: bool = False ): - """This command will check the invalidation status of the specified invalidation id. + """Check the invalidation status of the specified invalidation id + for AWS CloudFront. """ _decide_mode( f"cfcheck-{target}", "", diff --git a/charon/cmd/cmd_checksum.py b/charon/cmd/cmd_checksum.py index b06c01ce..1591df77 100644 --- a/charon/cmd/cmd_checksum.py +++ b/charon/cmd/cmd_checksum.py @@ -99,7 +99,7 @@ required=True ) @command() -def validate( +def checksum_validate( path: str, target: str, includes: List[str], @@ -109,12 +109,13 @@ def validate( quiet: bool = False, debug: bool = False ): - """This command will validate the checksum of the specified path for the - maven repository. It will calculate the sha1 checksum of all artifact - files in the specified path and compare with the companied .sha1 files - of the artifacts, then record all mismatched artifacts in the report file. - If some artifact files misses the companied .sha1 files, they will also - be recorded. + """ + Validate the checksum of the specified path for themaven repository. + It will calculate the sha1 checksum of all artifact files in the + specified path and compare with the companied .sha1 files of the + artifacts, then record all mismatched artifacts in the report file. + If some artifact files misses the companied .sha1 files, they will also + be recorded. """ _decide_mode( "checksum-{}".format(target), path.replace("/", "_"), diff --git a/charon/cmd/cmd_index.py b/charon/cmd/cmd_index.py index e9a3e18c..418b05e6 100644 --- a/charon/cmd/cmd_index.py +++ b/charon/cmd/cmd_index.py @@ -65,7 +65,7 @@ def index( quiet: bool = False, dryrun: bool = False ): - """This command will re-generate the index.html files for the + """Generate or refresh the index.html files for the specified path. """ _decide_mode( From 8317e4be7d0e13d023ec04685a4d679a7062b83e Mon Sep 17 00:00:00 2001 From: Gang Li Date: Thu, 28 Mar 2024 17:26:07 +0800 Subject: [PATCH 21/31] Fix typo for domain check --- charon/cache.py | 10 +++++----- charon/cmd/cmd_cache.py | 2 +- charon/schemas/charon.json | 4 ++++ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/charon/cache.py b/charon/cache.py index 0112d5fe..1dc84e5c 100644 --- a/charon/cache.py +++ b/charon/cache.py @@ -12,10 +12,10 @@ DEFAULT_BUCKET_TO_DOMAIN = { "prod-maven-ga": "maven.repository.redhat.com", "prod-maven-ea": "maven.repository.redhat.com", - "stage-maven-ga": "maven.strage.repository.redhat.com", - "stage-maven-ea": "maven.strage.repository.redhat.com", - "prod-npm": "npm.repository.redhat.com", - "stage-npm": "npm.stage.repository.redhat.com" + "stage-maven-ga": "maven.stage.repository.redhat.com", + "stage-maven-ea": "maven.stage.repository.redhat.com", + "prod-npm": "npm.registry.redhat.com", + "stage-npm": "npm.stage.registry.redhat.com" } @@ -117,7 +117,7 @@ def check_invalidation(self, distr_id: str, invalidation_id: str) -> dict: invalidation = response.get('Invalidation', {}) return { 'Id': invalidation.get('Id', None), - 'CreateTime': invalidation.get('CreateTime', None), + 'CreateTime': str(invalidation.get('CreateTime', None)), 'Status': invalidation.get('Status', None) } except Exception as err: diff --git a/charon/cmd/cmd_cache.py b/charon/cmd/cmd_cache.py index 4aaedacb..e63bf222 100644 --- a/charon/cmd/cmd_cache.py +++ b/charon/cmd/cmd_cache.py @@ -123,7 +123,7 @@ def cf_invalidate( ) else: invalidate_cf_paths( - cf_client, b, work_paths, b, batch_size=3000 + cf_client, b, work_paths, batch_size=3000 ) except Exception: print(traceback.format_exc()) diff --git a/charon/schemas/charon.json b/charon/schemas/charon.json index d7850fad..f6a931d1 100644 --- a/charon/schemas/charon.json +++ b/charon/schemas/charon.json @@ -52,6 +52,10 @@ "registry": { "description": "npm registry", "type": "string" + }, + "domain": { + "description": "domain name for bucket", + "type": "string" } }, "required": [ From c7cdb9e1bf6a1d0841cf5cd69139c05c33635908 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Mon, 1 Apr 2024 15:36:15 +0800 Subject: [PATCH 22/31] Some updates * Add switch to disable file log handler * Use non-wildcard paths for invalidation * Fix a targets check in command upload and delete --- charon/cache.py | 11 ++++++++--- charon/cmd/cmd_cache.py | 7 +++++-- charon/cmd/cmd_delete.py | 6 ++++++ charon/cmd/cmd_index.py | 2 +- charon/cmd/cmd_upload.py | 7 +++++++ charon/cmd/internal.py | 17 +++++++++++++---- charon/pkgs/pkg_utils.py | 17 +++++++++++++---- charon/utils/logs.py | 8 ++++++-- 8 files changed, 59 insertions(+), 16 deletions(-) diff --git a/charon/cache.py b/charon/cache.py index 1dc84e5c..74406e4a 100644 --- a/charon/cache.py +++ b/charon/cache.py @@ -8,6 +8,8 @@ logger = logging.getLogger(__name__) ENDPOINT_ENV = "aws_endpoint_url" +INVALIDATION_BATCH_DEFAULT = 3000 +INVALIDATION_BATCH_WILDCARD = 15 DEFAULT_BUCKET_TO_DOMAIN = { "prod-maven-ga": "maven.repository.redhat.com", @@ -61,7 +63,7 @@ def __get_endpoint(self, extra_conf) -> str: def invalidate_paths( self, distr_id: str, paths: List[str], - batch_size: int = 15 + batch_size=INVALIDATION_BATCH_DEFAULT ) -> List[Dict[str, str]]: """Send a invalidating requests for the paths in distribution to CloudFront. This will invalidate the paths in the distribution to enforce the refreshment @@ -71,8 +73,7 @@ def invalidate_paths( get_dist_id_by_domain(domain) function * Can specify the invalidating paths through paths param. * Batch size is the number of paths to be invalidated in one request. - Because paths contains wildcard(*), so the default value is 15 which - is the maximum number in official doc: + The default value is 3000 which is the maximum number in official doc: https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/Invalidation.html#InvalidationLimits """ logger.debug("[CloudFront] Creating invalidation for paths: %s", paths) @@ -83,6 +84,10 @@ def invalidate_paths( results = [] for batch_paths in real_paths: caller_ref = str(uuid.uuid4()) + logger.debug( + "Processing invalidation for batch with ref %s, size: %s", + caller_ref, len(batch_paths) + ) try: response = self.__client.create_invalidation( DistributionId=distr_id, diff --git a/charon/cmd/cmd_cache.py b/charon/cmd/cmd_cache.py index e63bf222..95aae658 100644 --- a/charon/cmd/cmd_cache.py +++ b/charon/cmd/cmd_cache.py @@ -86,7 +86,7 @@ def cf_invalidate( """ _decide_mode( f"cfclear-{target}", "", - is_quiet=quiet, is_debug=debug + is_quiet=quiet, is_debug=debug, use_log_file=False ) if not paths and not path_file: logger.error( @@ -172,10 +172,13 @@ def cf_check( """ _decide_mode( f"cfcheck-{target}", "", - is_quiet=quiet, is_debug=debug + is_quiet=quiet, is_debug=debug, use_log_file=False ) try: (buckets, aws_profile) = _init_cmd(target) + if not buckets: + sys.exit(1) + for b in buckets: cf_client = CFClient(aws_profile=aws_profile) bucket_name = b[1] diff --git a/charon/cmd/cmd_delete.py b/charon/cmd/cmd_delete.py index dda57d2e..d4752f26 100644 --- a/charon/cmd/cmd_delete.py +++ b/charon/cmd/cmd_delete.py @@ -150,6 +150,12 @@ def delete( product_key = f"{product}-{version}" manifest_bucket_name = conf.get_manifest_bucket() buckets = _get_buckets(targets, conf) + if not buckets: + logger.error( + "The targets %s can not be found! Please check" + " your charon configuration to confirm the targets" + " are set correctly.", targets + ) if npm_archive_type != NpmArchiveType.NOT_NPM: logger.info("This is a npm archive") tmp_dir, succeeded = handle_npm_del( diff --git a/charon/cmd/cmd_index.py b/charon/cmd/cmd_index.py index 418b05e6..e27c5033 100644 --- a/charon/cmd/cmd_index.py +++ b/charon/cmd/cmd_index.py @@ -70,7 +70,7 @@ def index( """ _decide_mode( "index-{}".format(target), path.replace("/", "_"), - is_quiet=quiet, is_debug=debug + is_quiet=quiet, is_debug=debug, use_log_file=False ) try: conf = get_config() diff --git a/charon/cmd/cmd_upload.py b/charon/cmd/cmd_upload.py index 2fe19901..55696c2e 100644 --- a/charon/cmd/cmd_upload.py +++ b/charon/cmd/cmd_upload.py @@ -169,6 +169,13 @@ def upload( product_key = f"{product}-{version}" manifest_bucket_name = conf.get_manifest_bucket() buckets = _get_buckets(targets, conf) + if not buckets: + logger.error( + "The targets %s can not be found! Please check" + " your charon configuration to confirm the targets" + " are set correctly.", targets + ) + sys.exit(1) if npm_archive_type != NpmArchiveType.NOT_NPM: logger.info("This is a npm archive") tmp_dir, succeeded = handle_npm_uploading( diff --git a/charon/cmd/internal.py b/charon/cmd/internal.py index e901c8f4..11c92a0c 100644 --- a/charon/cmd/internal.py +++ b/charon/cmd/internal.py @@ -90,14 +90,23 @@ def _validate_prod_key(product: str, version: str) -> bool: return True -def _decide_mode(product: str, version: str, is_quiet: bool, is_debug: bool): +def _decide_mode( + product: str, version: str, is_quiet: bool, + is_debug: bool, use_log_file=True +): if is_quiet: logger.info("Quiet mode enabled, " "will only give warning and error logs.") - set_logging(product, version, level=logging.WARNING) + set_logging( + product, version, level=logging.WARNING, use_log_file=use_log_file + ) elif is_debug: logger.info("Debug mode enabled, " "will give all debug logs for tracing.") - set_logging(product, version, level=logging.DEBUG) + set_logging( + product, version, level=logging.DEBUG, use_log_file=use_log_file + ) else: - set_logging(product, version, level=logging.INFO) + set_logging( + product, version, level=logging.INFO, use_log_file=use_log_file + ) diff --git a/charon/pkgs/pkg_utils.py b/charon/pkgs/pkg_utils.py index ce0d20ff..88c31257 100644 --- a/charon/pkgs/pkg_utils.py +++ b/charon/pkgs/pkg_utils.py @@ -1,5 +1,9 @@ from typing import List, Tuple -from charon.cache import CFClient +from charon.cache import ( + CFClient, + INVALIDATION_BATCH_DEFAULT, + INVALIDATION_BATCH_WILDCARD +) import logging import os @@ -67,7 +71,7 @@ def invalidate_cf_paths( bucket: Tuple[str, str, str, str, str], invalidate_paths: List[str], root="/", - batch_size=15 + batch_size=INVALIDATION_BATCH_DEFAULT ): logger.info("Invalidating CF cache for %s", bucket[1]) bucket_name = bucket[1] @@ -85,14 +89,19 @@ def invalidate_cf_paths( if prefix: path = os.path.join(prefix, path) final_paths.append(path) - logger.debug("Invalidating paths: %s", final_paths) + logger.debug("Invalidating paths: %s, size: %s", final_paths, len(final_paths)) if not domain: domain = cf_client.get_domain_by_bucket(bucket_name) if domain: distr_id = cf_client.get_dist_id_by_domain(domain) if distr_id: + real_batch_size = batch_size + for path in final_paths: + if path.endswith('*'): + real_batch_size = INVALIDATION_BATCH_WILDCARD + break result = cf_client.invalidate_paths( - distr_id, final_paths, batch_size + distr_id, final_paths, real_batch_size ) if result: logger.info( diff --git a/charon/utils/logs.py b/charon/utils/logs.py index ed8469f9..9e273640 100644 --- a/charon/utils/logs.py +++ b/charon/utils/logs.py @@ -45,7 +45,10 @@ def __del__(self): pass -def set_logging(product: str, version: str, name="charon", level=logging.DEBUG, handler=None): +def set_logging( + product: str, version: str, name="charon", + level=logging.DEBUG, handler=None, use_log_file=True +): # create logger logger = logging.getLogger(name) for hdlr in list(logger.handlers): # make a copy so it doesn't change @@ -69,7 +72,8 @@ def set_logging(product: str, version: str, name="charon", level=logging.DEBUG, # add ch to logger logger.addHandler(handler) - set_log_file_handler(product, version, logger) + if use_log_file: + set_log_file_handler(product, version, logger) logger = logging.getLogger('charon') for hdlr in list(logger.handlers): # make a copy so it doesn't change From 6566aacbd6e9227b5e8766d2ea6f51fb74b08cd2 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Wed, 3 Apr 2024 09:13:25 +0800 Subject: [PATCH 23/31] Fix: re-index wrong usage of the type --- charon/pkgs/indexing.py | 4 ++-- tests/test_cf_reindex.py | 4 ++-- tests/test_maven_index.py | 2 +- tests/test_npm_index.py | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/charon/pkgs/indexing.py b/charon/pkgs/indexing.py index ee42a83c..42faa49c 100644 --- a/charon/pkgs/indexing.py +++ b/charon/pkgs/indexing.py @@ -270,8 +270,8 @@ def re_index( ): """Refresh the index.html for the specified folder in the bucket. """ - bucket_name = bucket[1] - prefix = bucket[2] + bucket_name = bucket.get("bucket") + prefix = bucket.get("prefix") s3_client = S3Client(aws_profile=aws_profile, dry_run=dry_run) real_prefix = prefix if prefix.strip() != "/" else "" s3_folder = os.path.join(real_prefix, path) diff --git a/tests/test_cf_reindex.py b/tests/test_cf_reindex.py index 42a6dbab..0e986af6 100644 --- a/tests/test_cf_reindex.py +++ b/tests/test_cf_reindex.py @@ -42,7 +42,7 @@ def test_cf_maven_after_reindex(self): ) re_index( - (TEST_BUCKET, TEST_BUCKET, "ga", "", "maven.repository.redhat.com"), + {"bucket": TEST_BUCKET, "prefix": "ga"}, "org/apache/httpcomponents/httpclient/", "maven" ) @@ -67,7 +67,7 @@ def test_cf_npm_after_reindex(self): ) re_index( - (TEST_BUCKET, TEST_BUCKET, "", "", "npm.registry.redhat.com"), + {"bucket": TEST_BUCKET, "prefix": ""}, "@babel/", "npm" ) diff --git a/tests/test_maven_index.py b/tests/test_maven_index.py index a137fb11..4952c5d7 100644 --- a/tests/test_maven_index.py +++ b/tests/test_maven_index.py @@ -178,7 +178,7 @@ def test_re_index(self): Body="Just a test content" ) re_index( - (TEST_BUCKET, TEST_BUCKET, "", "", None), + {"bucket": TEST_BUCKET, "prefix": ""}, commons_client_root, "maven" ) indedx_obj = test_bucket.Object(COMMONS_CLIENT_INDEX) diff --git a/tests/test_npm_index.py b/tests/test_npm_index.py index 129f5278..b435f765 100644 --- a/tests/test_npm_index.py +++ b/tests/test_npm_index.py @@ -223,7 +223,7 @@ def test_re_index(self): Key=test_file_path, Body="test content" ) re_index( - (TEST_BUCKET, TEST_BUCKET, prefix, "", None), + {"bucket": TEST_BUCKET, "prefix": prefix}, "@babel/", "npm" ) index_obj = test_bucket.Object(prefixed_namespace_babel_index) @@ -253,7 +253,7 @@ def test_re_index(self): Key=test_file_path, Body="test content" ) re_index( - (TEST_BUCKET, TEST_BUCKET, prefix, "", None), + {"bucket": TEST_BUCKET, "prefix": prefix}, "/", "npm" ) index_obj = test_bucket.Object(prefixed_root_index) @@ -284,7 +284,7 @@ def test_re_index(self): Key=test_file_path, Body="test content" ) re_index( - (TEST_BUCKET, TEST_BUCKET, prefix, "", None), + {"bucket": TEST_BUCKET, "prefix": prefix}, metadata_path, "npm" ) objs = list(test_bucket.objects.all()) From 086cb8157e3804d1a1c37c9e7e12048d48a0c8e7 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Wed, 3 Apr 2024 09:54:09 +0800 Subject: [PATCH 24/31] Fix two issues * HTML indexing: remove first redundant slash for list items * Add some bucket to domain mapping for CF --- charon/cache.py | 8 +++++++- charon/pkgs/indexing.py | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/charon/cache.py b/charon/cache.py index 74406e4a..86841ead 100644 --- a/charon/cache.py +++ b/charon/cache.py @@ -12,12 +12,18 @@ INVALIDATION_BATCH_WILDCARD = 15 DEFAULT_BUCKET_TO_DOMAIN = { + "prod-ga": "maven.repository.redhat.com", "prod-maven-ga": "maven.repository.redhat.com", + "prod-ea": "maven.repository.redhat.com", "prod-maven-ea": "maven.repository.redhat.com", + "stage-ga": "maven.stage.repository.redhat.com", "stage-maven-ga": "maven.stage.repository.redhat.com", + "stage-ea": "maven.stage.repository.redhat.com", "stage-maven-ea": "maven.stage.repository.redhat.com", "prod-npm": "npm.registry.redhat.com", - "stage-npm": "npm.stage.registry.redhat.com" + "prod-npm-npmjs": "npm.registry.redhat.com", + "stage-npm": "npm.stage.registry.redhat.com", + "stage-npm-npmjs": "npm.stage.registry.redhat.com" } diff --git a/charon/pkgs/indexing.py b/charon/pkgs/indexing.py index 42faa49c..db7a8fb9 100644 --- a/charon/pkgs/indexing.py +++ b/charon/pkgs/indexing.py @@ -170,6 +170,10 @@ def __to_html_content(package_type: str, contents: List[str], folder: str) -> st # index.html does not need to be included in html content. if not c.endswith("index.html"): items.append(c[len(folder):]) + temp_items = [] + for item in items: + temp_items.append(item[1:] if item.startswith("/") else item) + items = temp_items else: items.extend(contents) items = __sort_index_items(items) From da9a6e441b3d0f7645869f626aca61397d7cef27 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Wed, 3 Apr 2024 22:12:28 +0800 Subject: [PATCH 25/31] Wait for each invalidation request's completion --- charon/cache.py | 29 ++++++++++++++++++++++++++--- tests/test_cfclient.py | 11 ++++++++++- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/charon/cache.py b/charon/cache.py index 86841ead..17ea22ac 100644 --- a/charon/cache.py +++ b/charon/cache.py @@ -4,6 +4,7 @@ import os import logging import uuid +import time logger = logging.getLogger(__name__) @@ -82,13 +83,33 @@ def invalidate_paths( The default value is 3000 which is the maximum number in official doc: https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/Invalidation.html#InvalidationLimits """ - logger.debug("[CloudFront] Creating invalidation for paths: %s", paths) real_paths = [paths] # Split paths into batches by batch_size if batch_size: real_paths = [paths[i:i + batch_size] for i in range(0, len(paths), batch_size)] results = [] + current_invalidation = {} for batch_paths in real_paths: + while (current_invalidation and + 'InProgress' == current_invalidation.get('Status', '')): + time.sleep(5) + try: + result = self.check_invalidation(distr_id, current_invalidation.get('Id')) + if result: + current_invalidation = { + 'Id': result.get('Id', None), + 'Status': result.get('Status', None) + } + logger.debug("Check invalidation: %s", current_invalidation) + except Exception as err: + logger.warning( + "[CloudFront] Error occurred while checking invalidation status during" + " creating invalidation, invalidation: %s, error: %s", + current_invalidation, err + ) + break + if current_invalidation: + results.append(current_invalidation) caller_ref = str(uuid.uuid4()) logger.debug( "Processing invalidation for batch with ref %s, size: %s", @@ -107,15 +128,17 @@ def invalidate_paths( ) if response: invalidation = response.get('Invalidation', {}) - results.append({ + current_invalidation = { 'Id': invalidation.get('Id', None), 'Status': invalidation.get('Status', None) - }) + } except Exception as err: logger.error( "[CloudFront] Error occurred while creating invalidation" " for paths %s, error: %s", batch_paths, err ) + if current_invalidation: + results.append(current_invalidation) return results def check_invalidation(self, distr_id: str, invalidation_id: str) -> dict: diff --git a/tests/test_cfclient.py b/tests/test_cfclient.py index 455af65c..8a38a68e 100644 --- a/tests/test_cfclient.py +++ b/tests/test_cfclient.py @@ -45,14 +45,23 @@ def test_get_distribution_id(self): dist_id = self.cf_client.get_dist_id_by_domain("notexists.redhat.com") self.assertIsNone(dist_id) - def test_invalidate_paths(self): + def test_invalidate_paths_single(self): dist_id = self.cf_client.get_dist_id_by_domain("maven.repository.redhat.com") result = self.cf_client.invalidate_paths(dist_id, ["/*"]) + self.assertEqual(len(result), 1) self.assertTrue(result[0]['Id']) self.assertEqual('completed', str.lower(result[0]['Status'])) status = self.cf_client.invalidate_paths("noexists_id", ["/*"]) self.assertFalse(status) + def test_invalidate_paths_multi(self): + dist_id = self.cf_client.get_dist_id_by_domain("maven.repository.redhat.com") + result = self.cf_client.invalidate_paths(dist_id, ["/1", "/2", "/3"], batch_size=1) + self.assertEqual(len(result), 3) + for r in result: + self.assertTrue(r['Id']) + self.assertEqual('completed', str.lower(r['Status'])) + @pytest.mark.skip(reason=""" Because current moto 5.0.3 has not implemented the get_invalidation(), this test will fail. Will enable it when the it is implemented in future moto From 83aa6b4125909353fa4c70ce21ddbc2097b8bfc7 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Wed, 3 Apr 2024 16:23:11 +0800 Subject: [PATCH 26/31] Fix wrong picking of the npm package.json Sometimes the npm tarball will contain more than one package.json, we should always use the root package.json as the first product metadata. --- charon/pkgs/npm.py | 28 +++-- charon/utils/archive.py | 69 +++++++---- tests/__init__.py | 20 ++++ tests/input/code-frame-7.14.5-multi-pkgs.tgz | Bin 0 -> 3040 bytes tests/input/code-frame-7.14.5-no-root-pkg.tgz | Bin 0 -> 3000 bytes tests/test_npm_upload_diff_pkgs.py | 109 ++++++++++++++++++ 6 files changed, 193 insertions(+), 33 deletions(-) create mode 100644 tests/input/code-frame-7.14.5-multi-pkgs.tgz create mode 100644 tests/input/code-frame-7.14.5-no-root-pkg.tgz create mode 100644 tests/test_npm_upload_diff_pkgs.py diff --git a/charon/pkgs/npm.py b/charon/pkgs/npm.py index 894998a6..3c183aac 100644 --- a/charon/pkgs/npm.py +++ b/charon/pkgs/npm.py @@ -78,9 +78,10 @@ def default(self, o): def handle_npm_uploading( tarball_path: str, product: str, - buckets: List[Tuple[str, str, str, str]] = None, + buckets: List[Tuple[str, str, str, str]], aws_profile=None, dir_=None, + root_path="package", do_index=True, gen_sign=False, cf_enable=False, @@ -115,7 +116,7 @@ def handle_npm_uploading( prefix = remove_prefix(bucket[2], "/") registry = bucket[3] target_dir, valid_paths, package_metadata = _scan_metadata_paths_from_archive( - tarball_path, registry, prod=product, dir__=root_dir + tarball_path, registry, prod=product, dir__=dir_, pkg_root=root_path ) if not os.path.isdir(target_dir): logger.error("Error: the extracted target_dir path %s does not exist.", target_dir) @@ -255,9 +256,10 @@ def handle_npm_uploading( def handle_npm_del( tarball_path: str, product: str, - buckets: List[Tuple[str, str, str, str]] = None, + buckets: List[Tuple[str, str, str, str]], aws_profile=None, dir_=None, + root_path="package", do_index=True, cf_enable=False, dry_run=False, @@ -276,7 +278,7 @@ def handle_npm_del( Returns the directory used for archive processing and if the rollback is successful """ target_dir, package_name_path, valid_paths = _scan_paths_from_archive( - tarball_path, prod=product, dir__=dir_ + tarball_path, prod=product, dir__=dir_, pkg_root=root_path ) valid_dirs = __get_path_tree(valid_paths, target_dir) @@ -474,11 +476,15 @@ def _gen_npm_package_metadata_for_del( return meta_files -def _scan_metadata_paths_from_archive(path: str, registry: str, prod="", dir__=None) ->\ - Tuple[str, list, NPMPackageMetadata]: +def _scan_metadata_paths_from_archive( + path: str, registry: str, prod="", dir__=None, pkg_root="pakage" +) -> Tuple[str, list, NPMPackageMetadata]: tmp_root = mkdtemp(prefix=f"npm-charon-{prod}-", dir=dir__) try: - _, valid_paths = extract_npm_tarball(path, tmp_root, True, registry) + _, valid_paths = extract_npm_tarball( + path=path, target_dir=tmp_root, is_for_upload=True, + pkg_root=pkg_root, registry=registry + ) if len(valid_paths) > 1: version = _scan_for_version(valid_paths[1]) package = NPMPackageMetadata(version, True) @@ -488,9 +494,13 @@ def _scan_metadata_paths_from_archive(path: str, registry: str, prod="", dir__=N sys.exit(1) -def _scan_paths_from_archive(path: str, prod="", dir__=None) -> Tuple[str, str, list]: +def _scan_paths_from_archive( + path: str, prod="", dir__=None, pkg_root="package" +) -> Tuple[str, str, list]: tmp_root = mkdtemp(prefix=f"npm-charon-{prod}-", dir=dir__) - package_name_path, valid_paths = extract_npm_tarball(path, tmp_root, False) + package_name_path, valid_paths = extract_npm_tarball( + path=path, target_dir=tmp_root, is_for_upload=False, pkg_root=pkg_root + ) return tmp_root, package_name_path, valid_paths diff --git a/charon/utils/archive.py b/charon/utils/archive.py index 5bcb2777..eca56ebe 100644 --- a/charon/utils/archive.py +++ b/charon/utils/archive.py @@ -46,8 +46,9 @@ def extract_zip_with_files(zf: ZipFile, target_dir: str, file_suffix: str, debug zf.extractall(target_dir, members=filtered) -def extract_npm_tarball(path: str, target_dir: str, is_for_upload: bool, registry=DEFAULT_REGISTRY)\ - -> Tuple[str, list]: +def extract_npm_tarball( + path: str, target_dir: str, is_for_upload: bool, pkg_root="package", registry=DEFAULT_REGISTRY +) -> Tuple[str, list]: """ Extract npm tarball will relocate the tgz file and metadata files. * Locate tar path ( e.g.: jquery/-/jquery-7.6.1.tgz or @types/jquery/-/jquery-2.2.3.tgz). * Locate version metadata path (e.g.: jquery/7.6.1 or @types/jquery/2.2.3). @@ -56,30 +57,50 @@ def extract_npm_tarball(path: str, target_dir: str, is_for_upload: bool, registr valid_paths = [] package_name_path = str() tgz = tarfile.open(path) + pkg_file = None + root_pkg_file_exists = True + try: + root_pkg_path = os.path.join(pkg_root, "package.json") + logger.debug(root_pkg_path) + pkg_file = tgz.getmember(root_pkg_path) + root_pkg_file_exists = pkg_file.isfile() + except KeyError: + root_pkg_file_exists = False + pkg_file = None tgz.extractall() - for f in tgz: - if f.name.endswith("package.json"): - version_data, parse_paths = __parse_npm_package_version_paths(f.path) - package_name_path = parse_paths[0] - os.makedirs(os.path.join(target_dir, parse_paths[0])) - tarball_parent_path = os.path.join(target_dir, parse_paths[0], "-") - valid_paths.append(os.path.join(tarball_parent_path, _get_tgz_name(path))) - version_metadata_parent_path = os.path.join( - target_dir, parse_paths[0], parse_paths[1] + if not root_pkg_file_exists: + logger.info( + "Root package.json is not found for archive: %s, will search others", + path + ) + for f in tgz: + if f.name.endswith("package.json"): + logger.info("Found package.json as %s", f.path) + pkg_file = f + break + if pkg_file: + version_data, parse_paths = __parse_npm_package_version_paths(pkg_file.path) + package_name_path = parse_paths[0] + os.makedirs(os.path.join(target_dir, parse_paths[0])) + tarball_parent_path = os.path.join(target_dir, parse_paths[0], "-") + valid_paths.append(os.path.join(tarball_parent_path, _get_tgz_name(path))) + version_metadata_parent_path = os.path.join( + target_dir, parse_paths[0], parse_paths[1] + ) + valid_paths.append(os.path.join(version_metadata_parent_path, "package.json")) + + if is_for_upload: + tgz_relative_path = "/".join([parse_paths[0], "-", _get_tgz_name(path)]) + __write_npm_version_dist( + path, pkg_file.path, version_data, tgz_relative_path, registry ) - valid_paths.append(os.path.join(version_metadata_parent_path, "package.json")) - - if is_for_upload: - tgz_relative_path = "/".join([parse_paths[0], "-", _get_tgz_name(path)]) - __write_npm_version_dist(path, f.path, version_data, tgz_relative_path, registry) - - os.makedirs(tarball_parent_path) - target = os.path.join(tarball_parent_path, os.path.basename(path)) - shutil.copyfile(path, target) - os.makedirs(version_metadata_parent_path) - target = os.path.join(version_metadata_parent_path, os.path.basename(f.path)) - shutil.copyfile(f.path, target) - break + + os.makedirs(tarball_parent_path) + target = os.path.join(tarball_parent_path, os.path.basename(path)) + shutil.copyfile(path, target) + os.makedirs(version_metadata_parent_path) + target = os.path.join(version_metadata_parent_path, os.path.basename(pkg_file.path)) + shutil.copyfile(pkg_file.path, target) return package_name_path, valid_paths diff --git a/tests/__init__.py b/tests/__init__.py index e69de29b..e3cdc8ed 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,20 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import logging + +logging.basicConfig(level=logging.INFO) +logging.getLogger("charon").setLevel(logging.DEBUG) diff --git a/tests/input/code-frame-7.14.5-multi-pkgs.tgz b/tests/input/code-frame-7.14.5-multi-pkgs.tgz new file mode 100644 index 0000000000000000000000000000000000000000..b9a284a4b81263b7f64e5b92ae16891bf0a52f46 GIT binary patch literal 3040 zcmV<63m^0!iwFP!000001MM1XbK5pDpZO~gwdqI`nR-~Z8|$1_k?5FHJv@?JU&eN0 zii9Ml6sZzq9ABdUe!BqplpUuxaXQy>Cox4}-wW(6NUja@cVj_ohO=H8wI`1Pb%2At zJ-jv!_Uh?t1Wy{R{d%jl+uGmTeNt~U_V*5+kiExICkr7944;rE3&;K}-b3^Mr?Ae% zU*4(jnCE^y85rRGR_ng_H(IS`F8(dJeM0JAPlfy8A0GdY3Lz!eSkclUDg8V%X4I*f zo<(=&J`PrK`aSiT?YW3?pf+0So*1?$Gktp<$f?)VrM?kRLVe%!83~p~KupgK4BI6J zL8gQ=6*Ftkb^{W4gn6NFQsQ`~fvlJ+xEo=x^n7GIT~hK26(dt>tVnqo1Z#F!t8ryj zi@vV~FkTU4OK(Nju!NCt8`1JiR-s9R7X1K8BU%?=vqfFgrYzA?uC1lLSUT`OKTc#h@o*%F(DZPD5*}$_x2dX8{ zo9_)Lq=zK%L#k{lZAGH0STv5YW^_F#T)B)Kk^E4C4C0K?2>|D<{0XYOhp@bj%LUB& z^p6nARC&9UfLz6)0fEW`WsJ8G=Z5dv?qag^Zd{<98xEs@H4j}Ar^$i_JvwJ)UOFn= z6rTp6@5*Yh2!nw7hlJ0o3!mPKqOvml-;;5lTI$SmEI9{78}?Fu66;7C3W);E5DUmq zJMZg54V0=~`j_sfi%S=-VYA-+k^g;hRm&+l;4=GIt;*0QRb|UmCEhw zXXd@9z@V9kWUr{>-N4u`3Wh6MK|XyVA2;osC4dd{kth|Zb0}gdrICb@SsMf&VNH$; z48}YpLc#&p6lV;R10z_fE8{~sQEi8mQ${`~jY^f&D-j`Zi^*)aEMlfQ&~^mNYOL1t zluvLO6JpOvO2*OA5rn8BeHtlKZ}a9vV#g+CaE1Jg*qBP#j*@%!d@fBFCdHx?AFY!0 z2dEe(DL7&fiJW^r41(=8*AXuNI*O&h?epgmW?F?|xSCNPC~WXXO1wlwK~Gb`PFRq% zW)}&oE8v9*sV(s~gu?FUjBM_tdYXq^P25j??xxsnDW2e&##PFV56QnsXqs5 zQiu|=WKh+*yz=Jpq5h+?+49^@a@NYYozat2S0GT1ag#w8<1Q-X=%w_ebZgqwpNK>+ zN7#pwt}Ps!Nsg#Vb+I%z94GCtgT}1h*0JHuryfGImNXZ1Ar%CZ=Q_F zdc`+T$>sa8l<*M)1LMQFZSjph;nrf!sHArH)}n~Aeb#xoD9UE7l3H=1B{_`U7F9#> z^)WYyv1_zEDMU zsxnH6DwkHIM}=RiSrhMOCQtzXw-UFC8bLXycI8Ta=Rr?bDpg`v%Z?};Q`oLNhsu*d z+T|JZo*$ul5bQe@62#RktHe#4xKPO-GVfh{*1DIa%9l8^A`+BL733wULmI$MfTNph z*y*L%t-J=NJab3gl&T?vFOaQ;bDb)t6)FAFEhWu#+HDw-r%7&&8&{c~uD|~X zamYqJ&YjrcgGOm~n@jimkDOSj5x3;2G*`)1qv&kNXi(;q+))`fNlZT5mQNv*avvR$ z(`7#H#Hx76i+<(`KX0V(IU+|&-hYBlS|XV&q(folp5nO>GDfO-G~nvYJdmuam01Lqq$Z~sT{cWJ$yW_(v=-ZC6kSH$7DI)K&9T6dH1Z-s2PdT zHGHW7#=7)|@ZaV9M$99tu2@rd32mpucn(!>V7$D|x{_MjqBC*g0v4y^RKV-01|oW& zf(sz&P!3ZTiiZISc~npMV}T@=i3;tcRsDR=$pyzP&E~Iu##O-m5Iz6x>s@U)(H=#{ z1Mh#Ejb=0V{I{{U*D9X>ehV1rQ_>f2CKaXYt#5sPt}2@q(yTXHJ8P^|Za^k<2Ca>4 zxMX1dLf!w^l;?OaSVA_!x4gbR3j@z*igHT*mCf*tF(!VPr?XqK@C_G|e-)}4r3B6| zPzQa?$xsap_m-@otpWmXhOdhuMJo8lczuhQ!4lXoZyrFF6jGtyc&5#ZiRGE$3KSH% zZf-kpYAJ(KN@*e~u+IS@7Bw6=6#zF%qA7A?2XKB3AghT-;5=>FuIYpp>JUvkHdGTK z0m-?G7&3)wRpoj@3*=e$9Iup{CEx5;70YJAH!#Tf+g$W@744`&MPbx&kb@0+b6XSb zI1Wfr*67ecx`@lYS$eCiCE%htBxVpPDy)Ss*11G{a*JFLeeOAqcZ0PWLT1_gefy!J z;2U;>Kg2ZS72tXSs47&z0IZX!$|(k&j6-Hr5e@^n_@X~$j*sGDd%3V~9DG*vOgh6inJ{?Uo5Z_aVqoF=L9)lEZpbe)g zNCiyA#}jP=(iQ*tuu_cR!Mr73;A^Qx~2QD9WJ-_ZwE z((4R5uQg6J0xn~MXi~fL6O9jnybk>9PW91H0iSe7!|51qt6=4L8k3&u6Rk=*V|{`S zIUbLIJ-QPpMw|)I4K=|9T}?6(0TjGHn`ntHJ*@)=_SX|6P9v*-^Y`U{H;C&0<9zug z;NkDTcH{T|2aSEG|GV}5W>NpY1!TYf@~O2ng5m?P;sdba1F(O_2VhbDA8Vc7KvP%N zBgl9l|JV1Ljr{jN_2xm5|Gx!%xBVua`D;5Xp&X|l7! zjQ4a0a?Ksa1G=JsgpjkiHGDVbfj7jktZgb_3EVjCUmef_3Mim}0tzUgfC36Apnw7j iD4>7>3Mim}0tzUgfC36ApnwAYk?>!EF$ZA)Pyhfljm7W) literal 0 HcmV?d00001 diff --git a/tests/input/code-frame-7.14.5-no-root-pkg.tgz b/tests/input/code-frame-7.14.5-no-root-pkg.tgz new file mode 100644 index 0000000000000000000000000000000000000000..96c85af0020be6f88c0d5ccb930076b3bedb5dc8 GIT binary patch literal 3000 zcmV;p3rF-HiwFP!000001MOOSbK1rh&)@nK8`^YWg2ltwY0M=p0Xf!`hX-(dGqxKG zX#u4|qFTw0Z^C!K=j^VY*hzXPN$0jY(+1c*@7+DWvywN4`Ma^CHN)AgjM}5GLUo{L z2M74scy>@ve@67E(K@WRTKlcTgZ)SKM&t0{*&}lBb==8P$O6MBKhAqlW-`)gr>J@dVZv>Q3-}ih*f|U^v({lsE zc8NhyDd9@R%*M0bfCL_4Ug(>YIG$;sDy9nUMi{I-9~IA5l)Oa8$c!3mQeFkYh8@*v z+*#G4H?;u9D?)7Lt?34~FbeK6TAs;j)UCPnT_A?{^1;r-B})r0?O+woRnuDw0r{`Z zSV-AVVX*kI^F(VtdER^SeE)|(IyQT<-{q4}`B!g4U7^^qP3ki4*ifH|u}wI4Y_{ro z?!sQe46X*oZkm*FFF1o)g5fO}@ae{5cHsGU33qU};ab5{t`&wt-}!V5YD&PeeQHXz zC0Pa7ZiZq%*089^s@5EPUb9_`zJsC>?F)$6qONIEmUt=m*2-S49QZ##>EGmbiq*mz z_3xL)AX&{^8P4xqvZ>b9dgT2B!z_U9xo5b{=5sAR7u%xl(st7}T!<4HUvv)D=aN&| zDu49}{kjy@e_Y0|W#dobfBgXQe}Dg=!2fSS>Gh9}@t3yoar|#I8aTJV-#Bd559lr_B7q-LWm{=0l2t{}I7ZCqc}}=?8R?MxP{Iu6jL->y=Z*XduH3+~youog z=6w1`2ryONg%YrD|6_m z@KAgjguW}$U>ODh^^XXjSC>A$6QHs-{NIytpIYkNb1XRrpbcj!KZ!Wf1|U(O8Daq$ zY3Bodq=8elOaIdSaB=CvXV|Paf8_r@zpCXN9derkM5{8iNmbc0Rf)F=nDAYw#;qI- zJQH-wg)|tmmu^|aWb(Wpk-Bi#0fk3oufY)!`!AKtLoy?M0L1rd1NO+25Cq1Q*!x&L z>(0F!3JRKuN%oRD-Ytynqha{OD9DEo4zwM?sv5iX zG?f$F#)Q}llCsh1bYO`pGN+L<^)_!#BzGKQhF93nn2n`G>}a`fFBUR%5mIbA$>lMF+0V3{RF}-6OOLkEam*kO=c`6XIE^~oT#P0$LgaC^>>Ib7mDgs0 z68zkzp&|xPidCYFJZe1F!@D|EWIhUq+C2??Vt{Hrj=Xt(z<+c$3(xJOX0468IXy{r1%l-m4;geZ?y^EUFJvZVTGOHaL?nAT z;y#jcZIReaaYRp|WU6fA*g;vM_YT^9#;S?1cEH>bOO?yR%3w+76k{_k0N#YXMKUJw ziXWho%gKoVA_e+8CBatr%;r$@lMt z;PzS?-OJ{xl9m?jpp;9i+L8~~X+~H#vBJQcE)qibRhrc?SZX%_|KnW&2@i3_jMXQF z5VN~Zt0|Q-*xB`c<4(01|4b>^#XzBD3RgndAw-~$%nhu2+yKQYF6vhZi`ev3pxfOz z25{Cce!C2=YD>5*kW!_cGa;fHFN6ZV!D4NVE37$<`@jYZYw6M@aOubvfHKw#Rjf`` zu2SO4r4{*6;UCp(i1#ZKEWrO;iAP0^pqz8Na;3iYprP@g#(TjEsu8~B_3e2Wcs^5jA}pXd zPZ2|=alBi^YFIMM=I`5&6b0X~ z8~h=r8Sens3&2(30xrNNSyeg3pp$XPoGKz=U>9HXr^4~kJRC0<_KidEQ9s(thpB@7 z6OByA$FuY9L?il?oKD8S>V2(GO5G{^h6eLopPh`)W&}7U-O=nd86T7G=r#F8AN8w> z_V3e)Hl333gy_T5fv)wdL?87AXMKJ2io685(HOS54x1g6&c*}{N>#cBa>vS0oAgfL zXZNK(&}Xl!5WIT3H+==l#^L3dah5kD(O!2DJJB2 zG6waSPLLRLB_KD_gceLS$*c&l;P11kmiW@wx^Q8CHAUexv-&rGU;cNa2>&Nqw?EX> zwe=Njd;vYkfNON`>4BF1GrsZU$d10-+`Y` zlf6A=+|WIX-rN&Buqz5!2)T+oxR}K{@CPw0YgY Date: Thu, 11 Apr 2024 12:10:12 +0800 Subject: [PATCH 27/31] Refine the output for cf invalidation request --- charon/cache.py | 5 ++++- charon/pkgs/pkg_utils.py | 8 +++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/charon/cache.py b/charon/cache.py index 17ea22ac..652fe8b6 100644 --- a/charon/cache.py +++ b/charon/cache.py @@ -12,6 +12,9 @@ INVALIDATION_BATCH_DEFAULT = 3000 INVALIDATION_BATCH_WILDCARD = 15 +INVALIDATION_STATUS_COMPLETED = "Completed" +INVALIDATION_STATUS_INPROGRESS = "InProgress" + DEFAULT_BUCKET_TO_DOMAIN = { "prod-ga": "maven.repository.redhat.com", "prod-maven-ga": "maven.repository.redhat.com", @@ -91,7 +94,7 @@ def invalidate_paths( current_invalidation = {} for batch_paths in real_paths: while (current_invalidation and - 'InProgress' == current_invalidation.get('Status', '')): + INVALIDATION_STATUS_INPROGRESS == current_invalidation.get('Status', '')): time.sleep(5) try: result = self.check_invalidation(distr_id, current_invalidation.get('Id')) diff --git a/charon/pkgs/pkg_utils.py b/charon/pkgs/pkg_utils.py index 88c31257..7d9cabc7 100644 --- a/charon/pkgs/pkg_utils.py +++ b/charon/pkgs/pkg_utils.py @@ -104,9 +104,15 @@ def invalidate_cf_paths( distr_id, final_paths, real_batch_size ) if result: + output = {} + for invalidation in result: + status = invalidation.get('Status') + if status not in output: + output[status] = [] + output[status].append(invalidation["Id"]) logger.info( "The CF invalidating request for metadata/indexing is sent, " - "request status as below:\n %s", result + "request result as below:\n %s", output ) else: logger.error( From 28a7241445e4d1b1f7db8416e0efe26675316014 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Thu, 11 Apr 2024 12:32:00 +0800 Subject: [PATCH 28/31] Add extra 1s wait for next CF invalidation request --- charon/cache.py | 3 +++ charon/pkgs/maven.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/charon/cache.py b/charon/cache.py index 652fe8b6..5dd103d4 100644 --- a/charon/cache.py +++ b/charon/cache.py @@ -113,6 +113,9 @@ def invalidate_paths( break if current_invalidation: results.append(current_invalidation) + # To avoid conflict rushing request, we can wait 1s here + # for next invalidation request sending. + time.sleep(1) caller_ref = str(uuid.uuid4()) logger.debug( "Processing invalidation for batch with ref %s, size: %s", diff --git a/charon/pkgs/maven.py b/charon/pkgs/maven.py index c7413c80..f8ba8abc 100644 --- a/charon/pkgs/maven.py +++ b/charon/pkgs/maven.py @@ -459,7 +459,7 @@ def handle_maven_uploading( else: logger.info("Bypass indexing") - # Finally do the CF invalidating for metadata files + # 11. Finally do the CF invalidating for metadata files if cf_enable and len(cf_invalidate_paths) > 0: cf_client = CFClient(aws_profile=aws_profile) cf_invalidate_paths = __wildcard_metadata_paths(cf_invalidate_paths) @@ -639,6 +639,7 @@ def handle_maven_del( else: logger.info("Bypassing indexing") + # 9. Finally do the CF invalidating for metadata files if cf_enable and len(cf_invalidate_paths): cf_client = CFClient(aws_profile=aws_profile) cf_invalidate_paths = __wildcard_metadata_paths(cf_invalidate_paths) From fbe8af7d5c65d352cd517bd14919f0f804f99327 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Thu, 11 Apr 2024 20:18:36 +0800 Subject: [PATCH 29/31] Add progress counting for CF requests processing --- charon/cache.py | 16 ++++++++++++++-- charon/pkgs/pkg_utils.py | 15 ++++++++++++--- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/charon/cache.py b/charon/cache.py index 5dd103d4..45a57751 100644 --- a/charon/cache.py +++ b/charon/cache.py @@ -86,16 +86,23 @@ def invalidate_paths( The default value is 3000 which is the maximum number in official doc: https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/Invalidation.html#InvalidationLimits """ + INPRO_W_SECS = 5 + NEXT_W_SECS = 1 real_paths = [paths] # Split paths into batches by batch_size if batch_size: real_paths = [paths[i:i + batch_size] for i in range(0, len(paths), batch_size)] + total_time_approx = len(real_paths) * (INPRO_W_SECS * 2 + NEXT_W_SECS) + logger.info("There will be %d invalidating requests in total," + " will take more than %d seconds", + len(real_paths), total_time_approx) results = [] current_invalidation = {} + processed_count = 0 for batch_paths in real_paths: while (current_invalidation and INVALIDATION_STATUS_INPROGRESS == current_invalidation.get('Status', '')): - time.sleep(5) + time.sleep(INPRO_W_SECS) try: result = self.check_invalidation(distr_id, current_invalidation.get('Id')) if result: @@ -113,9 +120,14 @@ def invalidate_paths( break if current_invalidation: results.append(current_invalidation) + processed_count += 1 + if processed_count % 10 == 0: + logger.info( + "[CloudFront] ######### %d/%d requests finished", + processed_count, len(real_paths)) # To avoid conflict rushing request, we can wait 1s here # for next invalidation request sending. - time.sleep(1) + time.sleep(NEXT_W_SECS) caller_ref = str(uuid.uuid4()) logger.debug( "Processing invalidation for batch with ref %s, size: %s", diff --git a/charon/pkgs/pkg_utils.py b/charon/pkgs/pkg_utils.py index 7d9cabc7..c340236e 100644 --- a/charon/pkgs/pkg_utils.py +++ b/charon/pkgs/pkg_utils.py @@ -2,7 +2,8 @@ from charon.cache import ( CFClient, INVALIDATION_BATCH_DEFAULT, - INVALIDATION_BATCH_WILDCARD + INVALIDATION_BATCH_WILDCARD, + INVALIDATION_STATUS_COMPLETED ) import logging import os @@ -110,9 +111,17 @@ def invalidate_cf_paths( if status not in output: output[status] = [] output[status].append(invalidation["Id"]) + non_completed = {} + for status, ids in output.items(): + if status != INVALIDATION_STATUS_COMPLETED: + non_completed[status] = ids logger.info( - "The CF invalidating request for metadata/indexing is sent, " - "request result as below:\n %s", output + "The CF invalidating requests done, these following requests " + "are not completed yet:\n %s\nPlease use cf-check command to " + "check its details.", non_completed + ) + logger.debug( + "All invalidations requested in this process:\n %s", output ) else: logger.error( From a18fb7f77c34b2b8ac8ff8e6e6b33956e8896af7 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Thu, 11 Apr 2024 21:29:48 +0800 Subject: [PATCH 30/31] Fix a simple logging issue --- charon/pkgs/pkg_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charon/pkgs/pkg_utils.py b/charon/pkgs/pkg_utils.py index c340236e..9325f14b 100644 --- a/charon/pkgs/pkg_utils.py +++ b/charon/pkgs/pkg_utils.py @@ -116,7 +116,7 @@ def invalidate_cf_paths( if status != INVALIDATION_STATUS_COMPLETED: non_completed[status] = ids logger.info( - "The CF invalidating requests done, these following requests " + "The CF invalidating requests done, following requests " "are not completed yet:\n %s\nPlease use cf-check command to " "check its details.", non_completed ) From 13b14599d16c45ae18f0b3cfd0f48cee1e6f8554 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Fri, 12 Apr 2024 19:42:05 +0800 Subject: [PATCH 31/31] Update release info for spec file of 1.3.0 --- charon.spec | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/charon.spec b/charon.spec index 249a5527..9c7210fa 100644 --- a/charon.spec +++ b/charon.spec @@ -80,12 +80,14 @@ export LANG=en_US.UTF-8 LANGUAGE=en_US.en LC_ALL=en_US.UTF-8 %changelog -* Mon Mar 25 2024 Gang Li +* Fri Apr 12 2024 Gang Li - 1.3.0 release - Add validate command: validate the checksum for maven artifacts - Add index command: support to re-index of the speicified folder -- Add CF invalidating feature: invalidate generated metadata files (maven-metadata*/package.json/index.html) after product uploading/deleting in CloudFront -- Add CF invalidating feature: add command to do CF invalidating and checking +- Add CF invalidating features: + - Invalidate generated metadata files (maven-metadata*/package.json/index.html) after product uploading/deleting in CloudFront + - Add command to do CF invalidating and checking +- Fix bug: picking the root package.json as the first priority one to generate npm package path * Mon Sep 18 2023 Harsh Modi - 1.2.2 release