diff --git a/.github/workflows/linters.yaml b/.github/workflows/linters.yaml index 86293445..92c165f1 100644 --- a/.github/workflows/linters.yaml +++ b/.github/workflows/linters.yaml @@ -29,16 +29,16 @@ jobs: - name: Run flake8 on python${{ matrix.python-version }} run: python -m tox -e flake8 - markdownlint: - name: Markdownlint - runs-on: ubuntu-latest + # markdownlint: + # name: Markdownlint + # runs-on: ubuntu-latest - steps: - - name: Check out repo - uses: actions/checkout@v2 + # steps: + # - name: Check out repo + # uses: actions/checkout@v2 - - name: Run markdownlint - uses: containerbuildsystem/actions/markdownlint@master + # - name: Run markdownlint + # uses: containerbuildsystem/actions/markdownlint@master pylint: name: Pylint analyzer for Python ${{ matrix.python-version }} @@ -91,22 +91,22 @@ jobs: # - name: Run mypy on python${{ matrix.python-version }} # run: python -m tox -e mypy - bandit: - name: Bandit analyzer for Python ${{ matrix.python-version }} - runs-on: ubuntu-latest - - strategy: - matrix: - python-version: [ "3.8" ] - - steps: - - uses: actions/checkout@v1 - - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip setuptools tox - - - name: Run bandit analyzer on python${{ matrix.python-version }} - run: python -m tox -e bandit + # bandit: + # name: Bandit analyzer for Python ${{ matrix.python-version }} + # runs-on: ubuntu-latest + + # strategy: + # matrix: + # python-version: [ "3.8" ] + + # steps: + # - uses: actions/checkout@v1 + # - uses: actions/setup-python@v4 + # with: + # python-version: ${{ matrix.python-version }} + # - name: Install dependencies + # run: | + # python -m pip install --upgrade pip setuptools tox + + # - name: Run bandit analyzer on python${{ matrix.python-version }} + # run: python -m tox -e bandit diff --git a/.gitignore b/.gitignore index 8ca90496..f4df6301 100644 --- a/.gitignore +++ b/.gitignore @@ -11,8 +11,12 @@ coverage .vscode package/ .local +local .DS_Store # Unit test __pytest_reports htmlcov + +# Generated when local run +*.log diff --git a/README.md b/README.md index cdd1608d..af0f6c89 100644 --- a/README.md +++ b/README.md @@ -96,3 +96,21 @@ This command will delete some paths from repo in S3. but not delete the artifacts themselves. * During or after the paths' deletion, regenerate the metadata files and index files for both types. + +### charon-index: refresh the index.html for the specified path + +```bash +usage: charon index $PATH [-t, --target] [-D, --debug] [-q, --quiet] +``` + +This command will refresh the index.html for the specified path. + +* Note that if the path is a NPM metadata path which contains package.json, this refreshment will not work because this type of folder will display the package.json instead of the index.html in http request. + +### charon-validate: validate the checksum of files in specified path in a maven repository + +```bash +usage: charon validate $path [-t, --target] [-f, --report_file_path] [-i, --includes] [-r, --recursive] [-D, --debug] [-q, --quiet] +``` + +This command will validate the checksum of the specified path for the maven repository. It will calculate the sha1 checksum of all artifact files in the specified path and compare with the companied .sha1 files of the artifacts, then record all mismatched artifacts in the report file. If some artifact files misses the companied .sha1 files, they will also be recorded. diff --git a/charon.spec b/charon.spec index dfe39b55..9c7210fa 100644 --- a/charon.spec +++ b/charon.spec @@ -51,7 +51,6 @@ Requires: python%{python3_pkgversion}-zipp Requires: python%{python3_pkgversion}-attrs Requires: python%{python3_pkgversion}-pyrsistent - %description Simple Python tool with command line interface for charon init, upload, delete, gen and ls functions. @@ -81,6 +80,15 @@ export LANG=en_US.UTF-8 LANGUAGE=en_US.en LC_ALL=en_US.UTF-8 %changelog +* Fri Apr 12 2024 Gang Li +- 1.3.0 release +- Add validate command: validate the checksum for maven artifacts +- Add index command: support to re-index of the speicified folder +- Add CF invalidating features: + - Invalidate generated metadata files (maven-metadata*/package.json/index.html) after product uploading/deleting in CloudFront + - Add command to do CF invalidating and checking +- Fix bug: picking the root package.json as the first priority one to generate npm package path + * Mon Sep 18 2023 Harsh Modi - 1.2.2 release - hot fix for "dist_tags" derived issue diff --git a/charon/__init__.py b/charon/__init__.py index ca82d84e..9eefcae0 100644 --- a/charon/__init__.py +++ b/charon/__init__.py @@ -13,9 +13,3 @@ See the License for the specific language governing permissions and limitations under the License. """ - -from charon.cmd.command import cli, upload, delete - -# init group command -cli.add_command(upload) -cli.add_command(delete) diff --git a/charon/cache.py b/charon/cache.py new file mode 100644 index 00000000..45a57751 --- /dev/null +++ b/charon/cache.py @@ -0,0 +1,204 @@ +from boto3 import session +from botocore.exceptions import ClientError +from typing import Dict, List +import os +import logging +import uuid +import time + +logger = logging.getLogger(__name__) + +ENDPOINT_ENV = "aws_endpoint_url" +INVALIDATION_BATCH_DEFAULT = 3000 +INVALIDATION_BATCH_WILDCARD = 15 + +INVALIDATION_STATUS_COMPLETED = "Completed" +INVALIDATION_STATUS_INPROGRESS = "InProgress" + +DEFAULT_BUCKET_TO_DOMAIN = { + "prod-ga": "maven.repository.redhat.com", + "prod-maven-ga": "maven.repository.redhat.com", + "prod-ea": "maven.repository.redhat.com", + "prod-maven-ea": "maven.repository.redhat.com", + "stage-ga": "maven.stage.repository.redhat.com", + "stage-maven-ga": "maven.stage.repository.redhat.com", + "stage-ea": "maven.stage.repository.redhat.com", + "stage-maven-ea": "maven.stage.repository.redhat.com", + "prod-npm": "npm.registry.redhat.com", + "prod-npm-npmjs": "npm.registry.redhat.com", + "stage-npm": "npm.stage.registry.redhat.com", + "stage-npm-npmjs": "npm.stage.registry.redhat.com" +} + + +class CFClient(object): + """The CFClient is a wrapper of the original boto3 clouldfrong client, + which will provide CloudFront functions to be used in the charon. + """ + + def __init__( + self, + aws_profile=None, + extra_conf=None + ) -> None: + self.__client = self.__init_aws_client(aws_profile, extra_conf) + + def __init_aws_client( + self, aws_profile=None, extra_conf=None + ): + if aws_profile: + logger.debug("[CloudFront] Using aws profile: %s", aws_profile) + cf_session = session.Session(profile_name=aws_profile) + else: + cf_session = session.Session() + endpoint_url = self.__get_endpoint(extra_conf) + return cf_session.client( + 'cloudfront', + endpoint_url=endpoint_url + ) + + def __get_endpoint(self, extra_conf) -> str: + endpoint_url = os.getenv(ENDPOINT_ENV) + if not endpoint_url or not endpoint_url.strip(): + if isinstance(extra_conf, Dict): + endpoint_url = extra_conf.get(ENDPOINT_ENV, None) + if endpoint_url: + logger.info( + "[CloudFront] Using endpoint url for aws CF client: %s", + endpoint_url + ) + else: + logger.debug("[CloudFront] No user-specified endpoint url is used.") + return endpoint_url + + def invalidate_paths( + self, distr_id: str, paths: List[str], + batch_size=INVALIDATION_BATCH_DEFAULT + ) -> List[Dict[str, str]]: + """Send a invalidating requests for the paths in distribution to CloudFront. + This will invalidate the paths in the distribution to enforce the refreshment + from backend S3 bucket for these paths. For details see: + https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/Invalidation.html + * The distr_id is the id for the distribution. This id can be get through + get_dist_id_by_domain(domain) function + * Can specify the invalidating paths through paths param. + * Batch size is the number of paths to be invalidated in one request. + The default value is 3000 which is the maximum number in official doc: + https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/Invalidation.html#InvalidationLimits + """ + INPRO_W_SECS = 5 + NEXT_W_SECS = 1 + real_paths = [paths] + # Split paths into batches by batch_size + if batch_size: + real_paths = [paths[i:i + batch_size] for i in range(0, len(paths), batch_size)] + total_time_approx = len(real_paths) * (INPRO_W_SECS * 2 + NEXT_W_SECS) + logger.info("There will be %d invalidating requests in total," + " will take more than %d seconds", + len(real_paths), total_time_approx) + results = [] + current_invalidation = {} + processed_count = 0 + for batch_paths in real_paths: + while (current_invalidation and + INVALIDATION_STATUS_INPROGRESS == current_invalidation.get('Status', '')): + time.sleep(INPRO_W_SECS) + try: + result = self.check_invalidation(distr_id, current_invalidation.get('Id')) + if result: + current_invalidation = { + 'Id': result.get('Id', None), + 'Status': result.get('Status', None) + } + logger.debug("Check invalidation: %s", current_invalidation) + except Exception as err: + logger.warning( + "[CloudFront] Error occurred while checking invalidation status during" + " creating invalidation, invalidation: %s, error: %s", + current_invalidation, err + ) + break + if current_invalidation: + results.append(current_invalidation) + processed_count += 1 + if processed_count % 10 == 0: + logger.info( + "[CloudFront] ######### %d/%d requests finished", + processed_count, len(real_paths)) + # To avoid conflict rushing request, we can wait 1s here + # for next invalidation request sending. + time.sleep(NEXT_W_SECS) + caller_ref = str(uuid.uuid4()) + logger.debug( + "Processing invalidation for batch with ref %s, size: %s", + caller_ref, len(batch_paths) + ) + try: + response = self.__client.create_invalidation( + DistributionId=distr_id, + InvalidationBatch={ + 'CallerReference': caller_ref, + 'Paths': { + 'Quantity': len(batch_paths), + 'Items': batch_paths + } + } + ) + if response: + invalidation = response.get('Invalidation', {}) + current_invalidation = { + 'Id': invalidation.get('Id', None), + 'Status': invalidation.get('Status', None) + } + except Exception as err: + logger.error( + "[CloudFront] Error occurred while creating invalidation" + " for paths %s, error: %s", batch_paths, err + ) + if current_invalidation: + results.append(current_invalidation) + return results + + def check_invalidation(self, distr_id: str, invalidation_id: str) -> dict: + try: + response = self.__client.get_invalidation( + DistributionId=distr_id, + Id=invalidation_id + ) + if response: + invalidation = response.get('Invalidation', {}) + return { + 'Id': invalidation.get('Id', None), + 'CreateTime': str(invalidation.get('CreateTime', None)), + 'Status': invalidation.get('Status', None) + } + except Exception as err: + logger.error( + "[CloudFront] Error occurred while check invalidation of id %s, " + "error: %s", invalidation_id, err + ) + + def get_dist_id_by_domain(self, domain: str) -> str: + """Get distribution id by a domain name. The id can be used to send invalidating + request through #invalidate_paths function + * Domain are Ronda domains, like "maven.repository.redhat.com" + or "npm.registry.redhat.com" + """ + try: + response = self.__client.list_distributions() + if response: + dist_list_items = response.get("DistributionList", {}).get("Items", []) + for distr in dist_list_items: + aliases_items = distr.get('Aliases', {}).get('Items', []) + if aliases_items and domain in aliases_items: + return distr['Id'] + logger.error("[CloudFront]: Distribution not found for domain %s", domain) + except ClientError as err: + logger.error( + "[CloudFront]: Error occurred while get distribution for domain %s: %s", + domain, err + ) + return None + + def get_domain_by_bucket(self, bucket: str) -> str: + return DEFAULT_BUCKET_TO_DOMAIN.get(bucket, None) diff --git a/charon/cmd/__init__.py b/charon/cmd/__init__.py index 9eefcae0..b2cafd05 100644 --- a/charon/cmd/__init__.py +++ b/charon/cmd/__init__.py @@ -13,3 +13,26 @@ See the License for the specific language governing permissions and limitations under the License. """ +from click import group +from charon.cmd.cmd_upload import upload +from charon.cmd.cmd_delete import delete +from charon.cmd.cmd_index import index +from charon.cmd.cmd_checksum import checksum_validate +from charon.cmd.cmd_cache import cf_invalidate, cf_check + + +@group() +def cli(): + """Charon is a tool to synchronize several types of + artifacts repository data to Red Hat Ronda + service (maven.repository.redhat.com). + """ + + +# init group command +cli.add_command(upload) +cli.add_command(delete) +cli.add_command(index) +cli.add_command(checksum_validate) +cli.add_command(cf_invalidate) +cli.add_command(cf_check) diff --git a/charon/cmd/cmd_cache.py b/charon/cmd/cmd_cache.py new file mode 100644 index 00000000..95aae658 --- /dev/null +++ b/charon/cmd/cmd_cache.py @@ -0,0 +1,216 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +from charon.config import get_config +from charon.cmd.internal import _decide_mode, _get_buckets +from charon.cache import CFClient +from charon.pkgs.pkg_utils import invalidate_cf_paths +from click import command, option, argument +from typing import List, Tuple + +import traceback +import logging +import sys +import os + +logger = logging.getLogger(__name__) + + +@option( + "--target", + "-t", + "target", + help=""" + The target to do the invalidating, which will decide the s3 bucket + which and its related domain to get the distribution. + """, + required=True +) +@option( + "--path", + "-p", + "paths", + help=""" + The paths which will be invalidated in CF. The path can use the format as CF defining + in https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/Invalidation.html + """, + multiple=True +) +@option( + "--path-file", + "-f", + "path_file", + help=""" + The file which contain the paths to be invalidated in CF. Pahts in this file follow the + format of CF defining too, and each path should be in a single line. + """ +) +@option( + "--debug", + "-D", + "debug", + help="Debug mode, will print all debug logs for problem tracking.", + is_flag=True, + default=False +) +@option( + "--quiet", + "-q", + "quiet", + help="Quiet mode, will shrink most of the logs except warning and errors.", + is_flag=True, + default=False +) +@command() +def cf_invalidate( + target: str, + paths: List[str], + path_file: str, + quiet: bool = False, + debug: bool = False +): + """Do invalidating on AWS CloudFront for the specified paths. + """ + _decide_mode( + f"cfclear-{target}", "", + is_quiet=quiet, is_debug=debug, use_log_file=False + ) + if not paths and not path_file: + logger.error( + "No path specified, please specify at least one path " + "through --path or --path-file.") + sys.exit(1) + + work_paths = [] + if paths: + work_paths.extend(paths) + + if path_file: + with open(path_file, "r", encoding="utf-8") as f: + for line in f.readlines(): + work_paths.append(str(line).strip()) + + use_wildcard = False + for path in work_paths: + if "*" in path: + use_wildcard = True + break + + try: + (buckets, aws_profile) = _init_cmd(target) + + for b in buckets: + cf_client = CFClient(aws_profile=aws_profile) + # Per aws official doc, if the paths contains wildcard, it is + # limited to 15 as max items in one request. Otherwise it could + # be 3000 + if use_wildcard: + invalidate_cf_paths( + cf_client, b, work_paths + ) + else: + invalidate_cf_paths( + cf_client, b, work_paths, batch_size=3000 + ) + except Exception: + print(traceback.format_exc()) + sys.exit(2) + + +@argument( + "invalidation_id", + type=str +) +@option( + "--target", + "-t", + "target", + help=""" + The target to do the invalidating, which will decide the s3 bucket + which and its related domain to get the distribution. + """, + required=True +) +@option( + "--debug", + "-D", + "debug", + help="Debug mode, will print all debug logs for problem tracking.", + is_flag=True, + default=False +) +@option( + "--quiet", + "-q", + "quiet", + help="Quiet mode, will shrink most of the logs except warning and errors.", + is_flag=True, + default=False +) +@command() +def cf_check( + invalidation_id: str, + target: str, + quiet: bool = False, + debug: bool = False +): + """Check the invalidation status of the specified invalidation id + for AWS CloudFront. + """ + _decide_mode( + f"cfcheck-{target}", "", + is_quiet=quiet, is_debug=debug, use_log_file=False + ) + try: + (buckets, aws_profile) = _init_cmd(target) + if not buckets: + sys.exit(1) + + for b in buckets: + cf_client = CFClient(aws_profile=aws_profile) + bucket_name = b[1] + domain = b[4] + if not domain: + domain = cf_client.get_domain_by_bucket(bucket_name) + if domain: + distr_id = cf_client.get_dist_id_by_domain(domain) + if distr_id: + result = cf_client.check_invalidation(distr_id, invalidation_id) + logger.info( + "The status of invalidation %s is %s", + invalidation_id, result + ) + else: + logger.error( + "Can not check invalidation result for %s because domain not found" + " for bucket %s. ", invalidation_id, bucket_name + ) + except Exception: + print(traceback.format_exc()) + sys.exit(2) + + +def _init_cmd(target: str) -> Tuple[List[Tuple[str, str, str, str, str]], str]: + conf = get_config() + if not conf: + sys.exit(1) + + aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile() + if not aws_profile: + logger.error("No AWS profile specified!") + sys.exit(1) + + return (_get_buckets([target], conf), aws_profile) diff --git a/charon/cmd/cmd_checksum.py b/charon/cmd/cmd_checksum.py new file mode 100644 index 00000000..1591df77 --- /dev/null +++ b/charon/cmd/cmd_checksum.py @@ -0,0 +1,155 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from typing import List + +from charon.config import get_config +from charon.pkgs.checksum_http import handle_checksum_validation_http +from charon.cmd.internal import _decide_mode +from click import command, option, argument + +import traceback +import logging +import os +import sys + +logger = logging.getLogger(__name__) + + +@argument( + "path", + type=str +) +@option( + "--debug", + "-D", + "debug", + help="Debug mode, will print all debug logs for problem tracking.", + is_flag=True, + default=False +) +@option( + "--quiet", + "-q", + "quiet", + help="Quiet mode, will shrink most of the logs except warning and errors.", + is_flag=True, + default=False +) +@option( + "--skip", + "-k", + "skips", + multiple=True, + help=""" + Paths to be skipped. This is used for recursive mode when $PATH has sub folders. + """ +) +@option( + "--recursive", + "-r", + "recursive", + help=""" + Decide if do validation recursively in the specified path. + Warning: if the path is high level which contains lots of sub path(e.g org/ + or com/), set this flag will take very long time to do the validation. + """, + is_flag=True, + default=False +) +@option( + "--report-file-path", + "-f", + "report_file_path", + help=""" + The path where the final report files will be generated + """ +) +@option( + "--includes", + "-i", + "includes", + help=""" + The comma splitted file suffix for all files that need to + validate. e.g, ".jar,.pom,.xml". If not specified, will use + default file types + """ +) +@option( + "--target", + "-t", + "target", + help=""" + The target to do the uploading, which will decide which s3 bucket + and what root path where all files will be uploaded to. + Can accept more than one target. + """, + required=True +) +@command() +def checksum_validate( + path: str, + target: str, + includes: List[str], + report_file_path: str, + skips: List[str], + recursive: bool = False, + quiet: bool = False, + debug: bool = False +): + """ + Validate the checksum of the specified path for themaven repository. + It will calculate the sha1 checksum of all artifact files in the + specified path and compare with the companied .sha1 files of the + artifacts, then record all mismatched artifacts in the report file. + If some artifact files misses the companied .sha1 files, they will also + be recorded. + """ + _decide_mode( + "checksum-{}".format(target), path.replace("/", "_"), + is_quiet=quiet, is_debug=debug + ) + try: + conf = get_config() + if not conf: + sys.exit(1) + + aws_bucket = "" + root_path = "" + t = conf.get_target(target) + if not t: + sys.exit(1) + for b in t: + aws_bucket = b.get('bucket') + prefix = b.get('prefix', '') + + # NOTE: This is a liitle hacky, which constrain the configuration of + # of target should define the bucket to contain "prod-maven" + # or "stage-maven" to decide that the bucket is for maven repo + # in our defined aws env for production or stage + if "prod-maven" not in aws_bucket and "stage-maven" not in aws_bucket: + logger.error("The target %s is not a maven repository.", target) + sys.exit(1) + + root_path = os.path.join(prefix, path) + skip_paths = [os.path.join(prefix, p) for p in skips if p != "" and p != "/"] + if path == "/": + root_path = prefix + handle_checksum_validation_http( + aws_bucket, root_path, includes, report_file_path, recursive, skip_paths + ) + except Exception: + print(traceback.format_exc()) + sys.exit(2) diff --git a/charon/cmd/cmd_delete.py b/charon/cmd/cmd_delete.py new file mode 100644 index 00000000..d4752f26 --- /dev/null +++ b/charon/cmd/cmd_delete.py @@ -0,0 +1,199 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from typing import List + +from charon.config import get_config +from charon.utils.archive import detect_npm_archive, NpmArchiveType +from charon.pkgs.maven import handle_maven_del +from charon.pkgs.npm import handle_npm_del +from charon.cmd.internal import ( + _decide_mode, _validate_prod_key, + _get_local_repo, _get_buckets, + _get_ignore_patterns, _safe_delete +) +from click import command, option, argument + +import traceback +import logging +import os +import sys + +logger = logging.getLogger(__name__) + + +@argument( + "repo", + type=str, +) +@option( + "--product", + "-p", + help=""" + The product key, will combine with version to decide + the metadata of the files in tarball. + """, + nargs=1, + required=True, + multiple=False, +) +@option( + "--version", + "-v", + help=""" + The product version, will combine with product to decide + the metadata of the files in tarball. + """, + required=True, + multiple=False, +) +@option( + "--target", + "-t", + 'targets', + help=""" + The target to do the deletion, which will decide which s3 bucket + and what root path where all files will be deleted from. + Can accept more than one target. + """, + required=True, + multiple=True, +) +@option( + "--root_path", + "-r", + default="maven-repository", + help="""The root path in the tarball before the real maven paths, + will be trailing off before uploading + """, +) +@option( + "--ignore_patterns", + "-i", + multiple=True, + help=""" + The regex patterns list to filter out the files which should + not be allowed to upload to S3. Can accept more than one pattern. + """, +) +@option( + "--work_dir", + "-w", + help=""" + The temporary working directory into which archives should + be extracted, when needed. + """, +) +@option( + "--debug", + "-D", + help="Debug mode, will print all debug logs for problem tracking.", + is_flag=True, + default=False +) +@option( + "--quiet", + "-q", + help="Quiet mode, will shrink most of the logs except warning and errors.", + is_flag=True, + default=False +) +@option("--dryrun", "-n", is_flag=True, default=False) +@command() +def delete( + repo: str, + product: str, + version: str, + targets: List[str], + root_path="maven-repository", + ignore_patterns: List[str] = None, + work_dir: str = None, + debug=False, + quiet=False, + dryrun=False +): + """Roll back all files in a released product REPO from + Ronda Service. The REPO points to a product released + tarball which is hosted in a remote url or a local path. + """ + tmp_dir = work_dir + try: + _decide_mode(product, version, is_quiet=quiet, is_debug=debug) + if dryrun: + logger.info("Running in dry-run mode," + "no files will be deleted.") + if not _validate_prod_key(product, version): + return + conf = get_config() + if not conf: + sys.exit(1) + + aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile() + if not aws_profile: + logger.error("No AWS profile specified!") + sys.exit(1) + + archive_path = _get_local_repo(repo) + npm_archive_type = detect_npm_archive(archive_path) + product_key = f"{product}-{version}" + manifest_bucket_name = conf.get_manifest_bucket() + buckets = _get_buckets(targets, conf) + if not buckets: + logger.error( + "The targets %s can not be found! Please check" + " your charon configuration to confirm the targets" + " are set correctly.", targets + ) + if npm_archive_type != NpmArchiveType.NOT_NPM: + logger.info("This is a npm archive") + tmp_dir, succeeded = handle_npm_del( + archive_path, + product_key, + buckets=buckets, + aws_profile=aws_profile, + dir_=work_dir, + cf_enable=conf.is_aws_cf_enable(), + dry_run=dryrun, + manifest_bucket_name=manifest_bucket_name + ) + if not succeeded: + sys.exit(1) + else: + ignore_patterns_list = None + if ignore_patterns: + ignore_patterns_list = ignore_patterns + else: + ignore_patterns_list = _get_ignore_patterns(conf) + logger.info("This is a maven archive") + tmp_dir, succeeded = handle_maven_del( + archive_path, + product_key, + ignore_patterns_list, + root=root_path, + buckets=buckets, + aws_profile=aws_profile, + dir_=work_dir, + cf_enable=conf.is_aws_cf_enable(), + dry_run=dryrun, + manifest_bucket_name=manifest_bucket_name + ) + if not succeeded: + sys.exit(1) + except Exception: + print(traceback.format_exc()) + sys.exit(2) # distinguish between exception and bad config or bad state + finally: + if not debug: + _safe_delete(tmp_dir) diff --git a/charon/cmd/cmd_index.py b/charon/cmd/cmd_index.py new file mode 100644 index 00000000..e27c5033 --- /dev/null +++ b/charon/cmd/cmd_index.py @@ -0,0 +1,117 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +from charon.config import get_config +from charon.cmd.internal import _decide_mode +from charon.pkgs.indexing import re_index +from charon.constants import PACKAGE_TYPE_MAVEN, PACKAGE_TYPE_NPM +from click import command, option, argument + +import traceback +import logging +import os +import sys + +logger = logging.getLogger(__name__) + + +@argument( + "path", + type=str, +) +@option( + "--target", + "-t", + help=""" + The target to do the index refreshing, which will decide + which s3 bucket and what root path where all files will + be deleted from. + """, + required=True +) +@option( + "--debug", + "-D", + help="Debug mode, will print all debug logs for problem tracking.", + is_flag=True, + default=False +) +@option( + "--quiet", + "-q", + help="Quiet mode, will shrink most of the logs except warning and errors.", + is_flag=True, + default=False +) +@option("--dryrun", "-n", is_flag=True, default=False) +@command() +def index( + path: str, + target: str, + debug: bool = False, + quiet: bool = False, + dryrun: bool = False +): + """Generate or refresh the index.html files for the + specified path. + """ + _decide_mode( + "index-{}".format(target), path.replace("/", "_"), + is_quiet=quiet, is_debug=debug, use_log_file=False + ) + try: + conf = get_config() + if not conf: + sys.exit(1) + + aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile() + if not aws_profile: + logger.error("No AWS profile specified!") + sys.exit(1) + + tgt = conf.get_target(target) + if not tgt: + # log is recorded get_target + sys.exit(1) + + for b in tgt: + aws_bucket = b.get('bucket') + + package_type = None + if "maven" in aws_bucket: + logger.info( + "The target is a maven repository. Will refresh the index as maven package type" + ) + package_type = PACKAGE_TYPE_MAVEN + elif "npm" in aws_bucket: + package_type = PACKAGE_TYPE_NPM + logger.info( + "The target is a npm repository. Will refresh the index as npm package type" + ) + else: + logger.error( + "The target %s is not supported. Only maven or npm target is supported.", + target + ) + + if not aws_bucket: + logger.error("No bucket specified for target %s!", target) + else: + re_index(b, path, package_type, aws_profile, dryrun) + + except Exception: + print(traceback.format_exc()) + sys.exit(2) # distinguish between exception and bad config or bad state diff --git a/charon/cmd/cmd_upload.py b/charon/cmd/cmd_upload.py new file mode 100644 index 00000000..55696c2e --- /dev/null +++ b/charon/cmd/cmd_upload.py @@ -0,0 +1,223 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from typing import List + +from charon.config import get_config +from charon.utils.archive import detect_npm_archive, NpmArchiveType +from charon.pkgs.maven import handle_maven_uploading +from charon.pkgs.npm import handle_npm_uploading +from charon.cmd.internal import ( + _decide_mode, _validate_prod_key, + _get_local_repo, _get_buckets, + _get_ignore_patterns, _safe_delete +) +from click import command, option, argument + +import traceback +import logging +import os +import sys + +logger = logging.getLogger(__name__) + + +@argument( + "repo", + type=str, +) +@option( + "--product", + "-p", + help=""" + The product key, will combine with version to decide + the metadata of the files in tarball. + """, + nargs=1, + required=True, + multiple=False, +) +@option( + "--version", + "-v", + help=""" + The product version, will combine with key to decide + the metadata of the files in tarball. + """, + required=True, + multiple=False, +) +@option( + "--target", + "-t", + 'targets', + help=""" + The target to do the uploading, which will decide which s3 bucket + and what root path where all files will be uploaded to. + Can accept more than one target. + """, + required=True, + multiple=True, +) +@option( + "--root_path", + "-r", + default="maven-repository", + help=""" + The root path in the tarball before the real maven paths, + will be trailing off before uploading. + """, +) +@option( + "--ignore_patterns", + "-i", + multiple=True, + help=""" + The regex patterns list to filter out the files which should + not be allowed to upload to S3. Can accept more than one pattern. + """, +) +@option( + "--work_dir", + "-w", + help=""" + The temporary working directory into which archives should + be extracted, when needed. + """, +) +@option( + "--contain_signature", + "-s", + is_flag=True, + help=""" + Toggle signature generation and upload feature in charon. + """ +) +@option( + "--sign_key", + "-k", + help=""" + rpm-sign key to be used, will replace {{ key }} in default configuration for signature. + Does noting if detach_signature_command does not contain {{ key }} field. + """, +) +@option( + "--debug", + "-D", + help="Debug mode, will print all debug logs for problem tracking.", + is_flag=True, + default=False +) +@option( + "--quiet", + "-q", + help="Quiet mode, will shrink most of the logs except warning and errors.", + is_flag=True, + default=False +) +@option("--dryrun", "-n", is_flag=True, default=False) +@command() +def upload( + repo: str, + product: str, + version: str, + targets: List[str], + root_path="maven-repository", + ignore_patterns: List[str] = None, + work_dir: str = None, + contain_signature: bool = False, + sign_key: str = "redhatdevel", + debug=False, + quiet=False, + dryrun=False +): + """Upload all files from a released product REPO to Ronda + Service. The REPO points to a product released tarball which + is hosted in a remote url or a local path. + """ + tmp_dir = work_dir + try: + _decide_mode(product, version, is_quiet=quiet, is_debug=debug) + if dryrun: + logger.info("Running in dry-run mode," + "no files will be uploaded.") + if not _validate_prod_key(product, version): + return + conf = get_config() + if not conf: + sys.exit(1) + + aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile() + if not aws_profile: + logger.error("No AWS profile specified!") + sys.exit(1) + + archive_path = _get_local_repo(repo) + npm_archive_type = detect_npm_archive(archive_path) + product_key = f"{product}-{version}" + manifest_bucket_name = conf.get_manifest_bucket() + buckets = _get_buckets(targets, conf) + if not buckets: + logger.error( + "The targets %s can not be found! Please check" + " your charon configuration to confirm the targets" + " are set correctly.", targets + ) + sys.exit(1) + if npm_archive_type != NpmArchiveType.NOT_NPM: + logger.info("This is a npm archive") + tmp_dir, succeeded = handle_npm_uploading( + archive_path, + product_key, + buckets=buckets, + aws_profile=aws_profile, + dir_=work_dir, + gen_sign=contain_signature, + cf_enable=conf.is_aws_cf_enable(), + key=sign_key, + dry_run=dryrun, + manifest_bucket_name=manifest_bucket_name + ) + if not succeeded: + sys.exit(1) + else: + ignore_patterns_list = None + if ignore_patterns: + ignore_patterns_list = ignore_patterns + else: + ignore_patterns_list = _get_ignore_patterns(conf) + logger.info("This is a maven archive") + tmp_dir, succeeded = handle_maven_uploading( + archive_path, + product_key, + ignore_patterns_list, + root=root_path, + buckets=buckets, + aws_profile=aws_profile, + dir_=work_dir, + gen_sign=contain_signature, + cf_enable=conf.is_aws_cf_enable(), + key=sign_key, + dry_run=dryrun, + manifest_bucket_name=manifest_bucket_name + ) + if not succeeded: + sys.exit(1) + except Exception: + print(traceback.format_exc()) + sys.exit(2) # distinguish between exception and bad config or bad state + finally: + if not debug: + _safe_delete(tmp_dir) diff --git a/charon/cmd/command.py b/charon/cmd/command.py deleted file mode 100644 index 2ef88aed..00000000 --- a/charon/cmd/command.py +++ /dev/null @@ -1,450 +0,0 @@ -""" -Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" -from typing import List, Tuple - -from charon.config import CharonConfig, get_config -from charon.constants import DEFAULT_REGISTRY -from charon.utils.logs import set_logging -from charon.utils.archive import detect_npm_archive, download_archive, NpmArchiveType -from charon.pkgs.maven import handle_maven_uploading, handle_maven_del -from charon.pkgs.npm import handle_npm_uploading, handle_npm_del -from click import command, option, argument, group -from json import loads -from shutil import rmtree - -import traceback -import logging -import os -import sys - -logger = logging.getLogger(__name__) - - -@argument( - "repo", - type=str, -) -@option( - "--product", - "-p", - help=""" - The product key, will combine with version to decide - the metadata of the files in tarball. - """, - nargs=1, - required=True, - multiple=False, -) -@option( - "--version", - "-v", - help=""" - The product version, will combine with key to decide - the metadata of the files in tarball. - """, - required=True, - multiple=False, -) -@option( - "--target", - "-t", - 'targets', - help=""" - The target to do the uploading, which will decide which s3 bucket - and what root path where all files will be uploaded to. - Can accept more than one target. - """, - required=True, - multiple=True, -) -@option( - "--root_path", - "-r", - default="maven-repository", - help=""" - The root path in the tarball before the real maven paths, - will be trailing off before uploading. - """, -) -@option( - "--ignore_patterns", - "-i", - multiple=True, - help=""" - The regex patterns list to filter out the files which should - not be allowed to upload to S3. Can accept more than one pattern. - """, -) -@option( - "--work_dir", - "-w", - help=""" - The temporary working directory into which archives should - be extracted, when needed. - """, -) -@option( - "--contain_signature", - "-s", - is_flag=True, - help=""" - Toggle signature generation and upload feature in charon. - """ -) -@option( - "--sign_key", - "-k", - help=""" - rpm-sign key to be used, will replace {{ key }} in default configuration for signature. - Does noting if detach_signature_command does not contain {{ key }} field. - """, -) -@option( - "--debug", - "-D", - help="Debug mode, will print all debug logs for problem tracking.", - is_flag=True, - default=False -) -@option( - "--quiet", - "-q", - help="Quiet mode, will shrink most of the logs except warning and errors.", - is_flag=True, - default=False -) -@option("--dryrun", "-n", is_flag=True, default=False) -@command() -def upload( - repo: str, - product: str, - version: str, - targets: List[str], - root_path="maven-repository", - ignore_patterns: List[str] = None, - work_dir: str = None, - contain_signature: bool = False, - sign_key: str = "redhatdevel", - debug=False, - quiet=False, - dryrun=False -): - """Upload all files from a released product REPO to Ronda - Service. The REPO points to a product released tarball which - is hosted in a remote url or a local path. - """ - tmp_dir = work_dir - try: - __decide_mode(product, version, is_quiet=quiet, is_debug=debug) - if dryrun: - logger.info("Running in dry-run mode," - "no files will be uploaded.") - if not __validate_prod_key(product, version): - return - conf = get_config() - if not conf: - sys.exit(1) - - aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile() - if not aws_profile: - logger.error("No AWS profile specified!") - sys.exit(1) - - archive_path = __get_local_repo(repo) - npm_archive_type = detect_npm_archive(archive_path) - product_key = f"{product}-{version}" - manifest_bucket_name = conf.get_manifest_bucket() - buckets = __get_buckets(targets, conf) - if npm_archive_type != NpmArchiveType.NOT_NPM: - logger.info("This is a npm archive") - tmp_dir, succeeded = handle_npm_uploading( - archive_path, - product_key, - buckets=buckets, - aws_profile=aws_profile, - dir_=work_dir, - gen_sign=contain_signature, - key=sign_key, - dry_run=dryrun, - manifest_bucket_name=manifest_bucket_name - ) - if not succeeded: - sys.exit(1) - else: - ignore_patterns_list = None - if ignore_patterns: - ignore_patterns_list = ignore_patterns - else: - ignore_patterns_list = __get_ignore_patterns(conf) - logger.info("This is a maven archive") - tmp_dir, succeeded = handle_maven_uploading( - archive_path, - product_key, - ignore_patterns_list, - root=root_path, - buckets=buckets, - aws_profile=aws_profile, - dir_=work_dir, - gen_sign=contain_signature, - key=sign_key, - dry_run=dryrun, - manifest_bucket_name=manifest_bucket_name - ) - if not succeeded: - sys.exit(1) - except Exception: - print(traceback.format_exc()) - sys.exit(2) # distinguish between exception and bad config or bad state - finally: - if not debug: - __safe_delete(tmp_dir) - - -@argument( - "repo", - type=str, -) -@option( - "--product", - "-p", - help=""" - The product key, will combine with version to decide - the metadata of the files in tarball. - """, - nargs=1, - required=True, - multiple=False, -) -@option( - "--version", - "-v", - help=""" - The product version, will combine with product to decide - the metadata of the files in tarball. - """, - required=True, - multiple=False, -) -@option( - "--target", - "-t", - 'targets', - help=""" - The target to do the deletion, which will decide which s3 bucket - and what root path where all files will be deleted from. - Can accept more than one target. - """, - required=True, - multiple=True, -) -@option( - "--root_path", - "-r", - default="maven-repository", - help="""The root path in the tarball before the real maven paths, - will be trailing off before uploading - """, -) -@option( - "--ignore_patterns", - "-i", - multiple=True, - help=""" - The regex patterns list to filter out the files which should - not be allowed to upload to S3. Can accept more than one pattern. - """, -) -@option( - "--work_dir", - "-w", - help=""" - The temporary working directory into which archives should - be extracted, when needed. - """, -) -@option( - "--debug", - "-D", - help="Debug mode, will print all debug logs for problem tracking.", - is_flag=True, - default=False -) -@option( - "--quiet", - "-q", - help="Quiet mode, will shrink most of the logs except warning and errors.", - is_flag=True, - default=False -) -@option("--dryrun", "-n", is_flag=True, default=False) -@command() -def delete( - repo: str, - product: str, - version: str, - targets: List[str], - root_path="maven-repository", - ignore_patterns: List[str] = None, - work_dir: str = None, - debug=False, - quiet=False, - dryrun=False -): - """Roll back all files in a released product REPO from - Ronda Service. The REPO points to a product released - tarball which is hosted in a remote url or a local path. - """ - tmp_dir = work_dir - try: - __decide_mode(product, version, is_quiet=quiet, is_debug=debug) - if dryrun: - logger.info("Running in dry-run mode," - "no files will be deleted.") - if not __validate_prod_key(product, version): - return - conf = get_config() - if not conf: - sys.exit(1) - - aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile() - if not aws_profile: - logger.error("No AWS profile specified!") - sys.exit(1) - - archive_path = __get_local_repo(repo) - npm_archive_type = detect_npm_archive(archive_path) - product_key = f"{product}-{version}" - manifest_bucket_name = conf.get_manifest_bucket() - buckets = __get_buckets(targets, conf) - if npm_archive_type != NpmArchiveType.NOT_NPM: - logger.info("This is a npm archive") - tmp_dir, succeeded = handle_npm_del( - archive_path, - product_key, - buckets=buckets, - aws_profile=aws_profile, - dir_=work_dir, - dry_run=dryrun, - manifest_bucket_name=manifest_bucket_name - ) - if not succeeded: - sys.exit(1) - else: - ignore_patterns_list = None - if ignore_patterns: - ignore_patterns_list = ignore_patterns - else: - ignore_patterns_list = __get_ignore_patterns(conf) - logger.info("This is a maven archive") - tmp_dir, succeeded = handle_maven_del( - archive_path, - product_key, - ignore_patterns_list, - root=root_path, - buckets=buckets, - aws_profile=aws_profile, - dir_=work_dir, - dry_run=dryrun, - manifest_bucket_name=manifest_bucket_name - ) - if not succeeded: - sys.exit(1) - except Exception: - print(traceback.format_exc()) - sys.exit(2) # distinguish between exception and bad config or bad state - finally: - if not debug: - __safe_delete(tmp_dir) - - -def __get_buckets(targets: List[str], conf: CharonConfig) -> List[Tuple[str, str, str, str]]: - buckets = [] - for target in targets: - for bucket in conf.get_target(target): - aws_bucket = bucket.get('bucket') - prefix = bucket.get('prefix', '') - registry = bucket.get('registry', DEFAULT_REGISTRY) - buckets.append((target, aws_bucket, prefix, registry)) - return buckets - - -def __safe_delete(tmp_dir: str): - if tmp_dir and os.path.exists(tmp_dir): - logger.info("Cleaning up work directory: %s", tmp_dir) - try: - rmtree(tmp_dir) - except Exception as e: - logger.error("Failed to clear work directory. %s", e) - - -def __get_ignore_patterns(conf: CharonConfig) -> List[str]: - ignore_patterns = os.getenv("CHARON_IGNORE_PATTERNS") - if ignore_patterns: - try: - return loads(ignore_patterns) - except (ValueError, TypeError): - logger.warning("Warning: ignore_patterns %s specified in " - "system environment, but not a valid json " - "style array. Will skip it.", ignore_patterns) - if conf: - return conf.get_ignore_patterns() - return None - - -def __get_local_repo(url: str) -> str: - archive_path = url - if url.startswith("http://") or url.startswith("https://"): - logger.info("Start downloading tarball %s", url) - archive_path = download_archive(url) - logger.info("Tarball downloaded at: %s", archive_path) - return archive_path - - -def __validate_prod_key(product: str, version: str) -> bool: - if not product or product.strip() == "": - logger.error("Error: product can not be empty!") - return False - if not version or version.strip() == "": - logger.error("Error: version can not be empty!") - return False - if "," in product: - logger.error("Error: there are invalid characters in product!") - return False - if "," in version: - logger.error("Error: there are invalid characters in version!") - return False - return True - - -def __decide_mode(product: str, version: str, is_quiet: bool, is_debug: bool): - if is_quiet: - logger.info("Quiet mode enabled, " - "will only give warning and error logs.") - set_logging(product, version, level=logging.WARNING) - elif is_debug: - logger.info("Debug mode enabled, " - "will give all debug logs for tracing.") - set_logging(product, version, level=logging.DEBUG) - else: - set_logging(product, version, level=logging.INFO) - - -@group() -def cli(): - """Charon is a tool to synchronize several types of - artifacts repository data to Red Hat Ronda - service (maven.repository.redhat.com). - """ diff --git a/charon/cmd/internal.py b/charon/cmd/internal.py new file mode 100644 index 00000000..11c92a0c --- /dev/null +++ b/charon/cmd/internal.py @@ -0,0 +1,112 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from typing import List, Tuple + +from charon.config import CharonConfig +from charon.constants import DEFAULT_REGISTRY +from charon.utils.logs import set_logging +from charon.utils.archive import download_archive +from json import loads +from shutil import rmtree + +import logging +import os + +logger = logging.getLogger(__name__) + + +def _get_buckets( + targets: List[str], conf: CharonConfig +) -> List[Tuple[str, str, str, str, str]]: + buckets = [] + for target in targets: + for bucket in conf.get_target(target): + aws_bucket = bucket.get('bucket') + prefix = bucket.get('prefix', '') + registry = bucket.get('registry', DEFAULT_REGISTRY) + cf_domain = bucket.get('domain', None) + buckets.append((target, aws_bucket, prefix, registry, cf_domain)) + return buckets + + +def _safe_delete(tmp_dir: str): + if tmp_dir and os.path.exists(tmp_dir): + logger.info("Cleaning up work directory: %s", tmp_dir) + try: + rmtree(tmp_dir) + except Exception as e: + logger.error("Failed to clear work directory. %s", e) + + +def _get_ignore_patterns(conf: CharonConfig) -> List[str]: + ignore_patterns = os.getenv("CHARON_IGNORE_PATTERNS") + if ignore_patterns: + try: + return loads(ignore_patterns) + except (ValueError, TypeError): + logger.warning("Warning: ignore_patterns %s specified in " + "system environment, but not a valid json " + "style array. Will skip it.", ignore_patterns) + if conf: + return conf.get_ignore_patterns() + return None + + +def _get_local_repo(url: str) -> str: + archive_path = url + if url.startswith("http://") or url.startswith("https://"): + logger.info("Start downloading tarball %s", url) + archive_path = download_archive(url) + logger.info("Tarball downloaded at: %s", archive_path) + return archive_path + + +def _validate_prod_key(product: str, version: str) -> bool: + if not product or product.strip() == "": + logger.error("Error: product can not be empty!") + return False + if not version or version.strip() == "": + logger.error("Error: version can not be empty!") + return False + if "," in product: + logger.error("Error: there are invalid characters in product!") + return False + if "," in version: + logger.error("Error: there are invalid characters in version!") + return False + return True + + +def _decide_mode( + product: str, version: str, is_quiet: bool, + is_debug: bool, use_log_file=True +): + if is_quiet: + logger.info("Quiet mode enabled, " + "will only give warning and error logs.") + set_logging( + product, version, level=logging.WARNING, use_log_file=use_log_file + ) + elif is_debug: + logger.info("Debug mode enabled, " + "will give all debug logs for tracing.") + set_logging( + product, version, level=logging.DEBUG, use_log_file=use_log_file + ) + else: + set_logging( + product, version, level=logging.INFO, use_log_file=use_log_file + ) diff --git a/charon/config.py b/charon/config.py index 8f128617..f9b6403c 100644 --- a/charon/config.py +++ b/charon/config.py @@ -38,6 +38,7 @@ def __init__(self, data: Dict): self.__manifest_bucket: str = data.get("manifest_bucket", None) self.__ignore_signature_suffix: Dict = data.get("ignore_signature_suffix", None) self.__signature_command: str = data.get("detach_signature_command", None) + self.__aws_cf_enable: bool = data.get("aws_cf_enable", False) def get_ignore_patterns(self) -> List[str]: return self.__ignore_patterns @@ -63,6 +64,9 @@ def get_ignore_signature_suffix(self, package_type: str) -> List[str]: def get_detach_signature_command(self) -> str: return self.__signature_command + def is_aws_cf_enable(self) -> bool: + return self.__aws_cf_enable + def get_config() -> Optional[CharonConfig]: config_file_path = os.path.join(os.getenv("HOME"), ".charon", CONFIG_FILE) diff --git a/charon/pkgs/checksum_http.py b/charon/pkgs/checksum_http.py new file mode 100644 index 00000000..515bf5a3 --- /dev/null +++ b/charon/pkgs/checksum_http.py @@ -0,0 +1,268 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from charon.utils.files import digest +from typing import Tuple, List, Dict +from html.parser import HTMLParser +import tempfile +import os +import logging +import requests +import shutil + +logger = logging.getLogger(__name__) + +DEFAULT_ARTIFACT_TYPES = ['.pom', '.jar', '.war', '.ear', '.zip', '.tar', '.gz', '.xml'] + + +def handle_checksum_validation_http( + bucket: str, + path: str, + includes: str, + report_file_path: str, + recursive: bool = False, + skips: List[str] = None +): + """ Handle the checksum check for maven artifacts. + * target contains bucket name and prefix for the bucket, which will + be used to store artifacts with the prefix. See target definition + in Charon configuration for details. + * path is the root path where to start the validation in the bucket. + * includes are the file suffixes which will decide the types of files + to do the validation. + * recursive decide if to validate the path recursively, default false. + Becareful to set true because it will be very time-consuming to do the + recursive validation as it will recursively scan all sub paths in + the path. + + This will generate a file contains all artifacts which mismatched with its + checksum files. Will use sha1 to do the validation. + """ + local_dir = tempfile.mkdtemp() + results = ([], [], []) + try: + if not os.path.exists(local_dir): + os.makedirs(local_dir) + root_url = _decide_root_url(bucket) + logger.debug("Root url is %s", root_url) + _collect_invalid_files( + root_url, path, includes, local_dir, recursive, skips, results + ) + finally: + shutil.rmtree(local_dir) + if results and any([ + results[0] and len(results[0]) > 0, + results[1] and len(results[1]) > 0, + results[2] and len(results[2]) > 0 + ]): + _gen_report(report_file_path, results) + + +def _collect_invalid_files( + root_url: str, + path: str, + includes: str, + work_dir: str, + recursive: bool, + skips: List[str], + results: Tuple[List[str], List[str], List[Dict[str, str]]] +): + if skips and path in skips: + logger.info("Path %s is in skips list, will not check it", path) + return + logger.info("Validating path %s", path) + + try: + folder_url = os.path.join(root_url, path) + items = _list_folder_content(folder_url, path) + sub_folders = [item for item in items if item.endswith("/")] + files = [item for item in items if not item.endswith("/")] + if path+"/" in sub_folders: + sub_folders.remove(path+"/") + logger.debug("Folders in path %s: %s", path, sub_folders) + logger.debug("Files in path %s: %s", path, files) + include_types = DEFAULT_ARTIFACT_TYPES + if includes and includes.strip() != "": + include_types = includes.split(",") + for f in files: + if any(f.endswith(filetype) for filetype in include_types): + _do_validation(root_url, f, work_dir, results) + except Exception as e: + logger.error("Error happened during checking path %s: %s", path, e) + if recursive: + for folder in sub_folders: + _collect_invalid_files(root_url, folder, includes, work_dir, recursive, skips, results) + + +def _do_validation( + root_url: str, file: str, work_dir: str, + results: Tuple[List[str], List[str], List[Dict[str, str]]] +): + mismatch_files = results[0] + missing_checksum_files = results[1] + error_files = results[2] + item_path = file + checksum_file_url = os.path.join(root_url, item_path + ".sha1") + checksum = None + if not _remote_file_exists(checksum_file_url): + logger.info("Missing checksum file for file %s", item_path) + missing_checksum_files.append(item_path) + else: + local_path = os.path.join(work_dir, item_path) + try: + # At first we want to get checksum from s3 metadata for files, but found it + # does not match with the file itself after checking. So here we download + # the file itself and do digesting directly + _download_file(root_url, item_path, work_dir) + checksum = digest(local_path) + except Exception as e: + logger.error("Validation failed for file %s: %s", item_path, e) + error_files.append({"path": item_path, "error": str(e)}) + finally: + if os.path.exists(local_path): + os.remove(local_path) + if checksum and checksum.strip() != "": + remote_checksum = _read_remote_file_content(checksum_file_url) + if remote_checksum is None: + logger.info("Missing checksum file for file %s", item_path) + missing_checksum_files.append(item_path) + elif checksum.strip().lower() != remote_checksum.strip().lower(): + logger.info("""Found mismatched file %s, file checksum %s, + remote checksum: %s""", item_path, checksum, remote_checksum) + mismatch_files.append(item_path) + + +def _gen_report( + report_file_path: str, + content: Tuple[List[str], List[str], List[Dict[str, str]]] +): + """Generate a report file.""" + work_dir = report_file_path + if work_dir and work_dir.strip() != "": + if not os.path.isdir(work_dir): + tmp_dir = tempfile.gettempdir() + work_dir = os.path.join(tmp_dir, work_dir) + if not os.path.isdir(work_dir): + os.makedirs(work_dir) + logger.debug("Created %s as report file directory.", work_dir) + else: + work_dir = tempfile.mkdtemp() + logger.debug("""The report file path is empty. + Created temp dir %s as report file path.""", work_dir) + + def _check_and_remove_file(file_name: str): + if os.path.isfile(file_name): + os.remove(file_name) + + def _write_one_col_file(items: List[str], file_name: str): + if items and len(items) > 0: + _check_and_remove_file(file_name) + with open(file_name, "w") as f: + for i in items: + f.write(i + "\n") + logger.info("The report file %s is generated.", file_name) + + _write_one_col_file(content[0], os.path.join(work_dir, "mismatched_files.csv")) + _write_one_col_file(content[1], os.path.join(work_dir, "missing_checksum_files.csv")) + + if content[2] and len(content[2]) > 0: + error_file = os.path.join(work_dir, "error_files.csv") + _check_and_remove_file(error_file) + with open(error_file, "w") as f: + f.write("path,error\n") + for d in content[2]: + f.write("{path},{error}\n".format(path=d["path"], error=d["error"])) + logger.info("The report file %s is generated.", error_file) + + +def _remote_file_exists(file_url: str) -> bool: + with requests.head(file_url) as r: + if r.status_code == 200: + return True + return False + + +def _download_file(root_url: str, file_path: str, work_dir: str): + file_url = os.path.join(root_url, file_path) + logger.debug("Start downloading file %s", file_url) + local_filename = os.path.join(work_dir, file_path) + local_dir = os.path.dirname(local_filename) + if not os.path.exists(local_dir): + logger.debug("Creating dir %s", local_dir) + os.makedirs(local_dir) + # NOTE the stream=True parameter below + try: + with requests.get(file_url, stream=True) as r: + if r.status_code == 200: + with open(local_filename, 'wb') as f: + # shutil.copyfileobj(r.raw, f) + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + logger.debug("Downloaded file %s to %s", file_path, local_filename) + except Exception as e: + logger.error("Download file %s failed: %s", file_path, e) + raise e + return local_filename + + +def _list_folder_content(folder_url: str, folder_path: str) -> List[str]: + try: + with requests.get(folder_url) as r: + if r.status_code == 200: + contentType = r.headers.get('Content-Type') + if contentType and "text/html" in contentType: + pageContent = r.text + p = _IndexParser() + p.feed(pageContent) + return p.get_content(folder_path) + else: + logger.warning("%s is not a folder!", folder_url) + except Exception as e: + logger.error("Can not list folder %s. The error is %s", folder_url, e) + return [] + + +class _IndexParser(HTMLParser): + def __init__(self): + super().__init__() + self.reset() + self.__content = [] + + def handle_starttag(self, tag, attrs): + if tag == "a": + for name, link in attrs: + if name == "href" and link.strip() not in ['../', '']: + self.__content.append(link) + + def get_content(self, parent): + return [os.path.join(parent, i) for i in self.__content] + + +def _read_remote_file_content(remote_file_url: str) -> str: + try: + with requests.get(remote_file_url) as r: + if r.status_code == 200: + return r.text.strip() if r.text else "" + except Exception as e: + logger.error("Can not read file %s. The error is %s", remote_file_url, e) + return None + + +def _decide_root_url(bucket: str) -> str: + if bucket.strip().startswith("prod-maven"): + return "https://maven.repository.redhat.com" + if bucket.strip().startswith("stage-maven"): + return "https://maven.stage.repository.redhat.com" + return None diff --git a/charon/pkgs/indexing.py b/charon/pkgs/indexing.py index f478e0a5..db7a8fb9 100644 --- a/charon/pkgs/indexing.py +++ b/charon/pkgs/indexing.py @@ -15,12 +15,15 @@ """ from charon.config import get_template from charon.storage import S3Client +# from charon.cache import CFClient +# from charon.pkgs.pkg_utils import invalidate_cf_paths from charon.constants import (INDEX_HTML_TEMPLATE, NPM_INDEX_HTML_TEMPLATE, PACKAGE_TYPE_MAVEN, PACKAGE_TYPE_NPM, PROD_INFO_SUFFIX) +from charon.utils.files import digest_content from jinja2 import Template import os import logging -from typing import List, Set +from typing import List, Set, Tuple from charon.utils.strings import remove_prefix @@ -149,6 +152,17 @@ def __generate_index_html( def __to_html(package_type: str, contents: List[str], folder: str, top_level: str) -> str: + html_content = __to_html_content(package_type, contents, folder) + html_path = os.path.join(top_level, folder, "index.html") + if folder == "/": + html_path = os.path.join(top_level, "index.html") + os.makedirs(os.path.dirname(html_path), exist_ok=True) + with open(html_path, 'w', encoding='utf-8') as html: + html.write(html_content) + return html_path + + +def __to_html_content(package_type: str, contents: List[str], folder: str) -> str: items = [] if folder != "/": items.append("../") @@ -156,17 +170,15 @@ def __to_html(package_type: str, contents: List[str], folder: str, top_level: st # index.html does not need to be included in html content. if not c.endswith("index.html"): items.append(c[len(folder):]) + temp_items = [] + for item in items: + temp_items.append(item[1:] if item.startswith("/") else item) + items = temp_items else: items.extend(contents) items = __sort_index_items(items) index = IndexedHTML(title=folder, header=folder, items=items) - html_path = os.path.join(top_level, folder, "index.html") - if folder == "/": - html_path = os.path.join(top_level, "index.html") - os.makedirs(os.path.dirname(html_path), exist_ok=True) - with open(html_path, 'w', encoding='utf-8') as html: - html.write(index.generate_index_file_content(package_type)) - return html_path + return index.generate_index_file_content(package_type) def __sort_index_items(items): @@ -250,3 +262,67 @@ def __compare(self, other) -> int: return -1 else: return 0 + + +def re_index( + bucket: Tuple[str, str, str, str, str], + path: str, + package_type: str, + aws_profile: str = None, + # cf_enable: bool = False, + dry_run: bool = False +): + """Refresh the index.html for the specified folder in the bucket. + """ + bucket_name = bucket.get("bucket") + prefix = bucket.get("prefix") + s3_client = S3Client(aws_profile=aws_profile, dry_run=dry_run) + real_prefix = prefix if prefix.strip() != "/" else "" + s3_folder = os.path.join(real_prefix, path) + if path.strip() == "" or path.strip() == "/": + s3_folder = prefix + items: List[str] = s3_client.list_folder_content(bucket_name, s3_folder) + contents = [i for i in items if not i.endswith(PROD_INFO_SUFFIX)] + if PACKAGE_TYPE_NPM == package_type: + if any([True if "package.json" in c else False for c in contents]): + logger.warning( + "The path %s contains NPM package.json which will work as " + "package metadata for indexing. This indexing is ignored.", + path + ) + return + + if len(contents) >= 1: + real_contents = [] + if real_prefix and real_prefix.strip() != "": + for c in contents: + if c.strip() != "": + if c.startswith(real_prefix): + real_c = remove_prefix(c, real_prefix) + real_c = remove_prefix(real_c, "/") + real_contents.append(real_c) + else: + real_contents.append(c) + else: + real_contents = contents + logger.debug(real_contents) + index_content = __to_html_content(package_type, real_contents, path) + if not dry_run: + index_path = os.path.join(path, "index.html") + if path == "/": + index_path = "index.html" + s3_client.simple_delete_file(index_path, (bucket_name, real_prefix)) + s3_client.simple_upload_file( + index_path, index_content, (bucket_name, real_prefix), + "text/html", digest_content(index_content) + ) + # We will not invalidate index.html per cost consideration + # if cf_enable: + # cf_client = CFClient(aws_profile=aws_profile) + # invalidate_cf_paths(cf_client, bucket, [index_path]) + else: + logger.warning( + "The path %s does not contain any contents in bucket %s. " + "Will not do any re-indexing", + path, bucket_name + ) diff --git a/charon/pkgs/maven.py b/charon/pkgs/maven.py index 9fd57422..f8ba8abc 100644 --- a/charon/pkgs/maven.py +++ b/charon/pkgs/maven.py @@ -20,7 +20,12 @@ from charon.utils.archive import extract_zip_all from charon.utils.strings import remove_prefix from charon.storage import S3Client -from charon.pkgs.pkg_utils import upload_post_process, rollback_post_process +from charon.cache import CFClient +from charon.pkgs.pkg_utils import ( + upload_post_process, + rollback_post_process, + invalidate_cf_paths +) from charon.config import CharonConfig, get_template, get_config from charon.constants import (META_FILE_GEN_KEY, META_FILE_DEL_KEY, META_FILE_FAILED, MAVEN_METADATA_TEMPLATE, @@ -53,7 +58,9 @@ def __get_mvn_template(kind: str, default: str) -> str: META_TEMPLATE = __get_mvn_template("maven-metadata.xml.j2", MAVEN_METADATA_TEMPLATE) ARCH_TEMPLATE = __get_mvn_template("archetype-catalog.xml.j2", ARCHETYPE_CATALOG_TEMPLATE) -STANDARD_GENERATED_IGNORES = ["maven-metadata.xml", "archetype-catalog.xml"] +MAVEN_METADATA_FILE = "maven-metadata.xml" +MAVEN_ARCH_FILE = "archetype-catalog.xml" +STANDARD_GENERATED_IGNORES = [MAVEN_METADATA_FILE, MAVEN_ARCH_FILE] class MavenMetadata(object): @@ -214,7 +221,7 @@ def gen_meta_file(group_id, artifact_id: str, versions: list, root="/", digest=T ).generate_meta_file_content() g_path = "/".join(group_id.split(".")) meta_files = [] - final_meta_path = os.path.join(root, g_path, artifact_id, "maven-metadata.xml") + final_meta_path = os.path.join(root, g_path, artifact_id, MAVEN_METADATA_FILE) try: overwrite_file(final_meta_path, content) meta_files.append(final_meta_path) @@ -257,11 +264,12 @@ def handle_maven_uploading( prod_key: str, ignore_patterns=None, root="maven-repository", - buckets: List[Tuple[str, str, str, str]] = None, + buckets: List[Tuple[str, str, str, str, str]] = None, aws_profile=None, dir_=None, do_index=True, gen_sign=False, + cf_enable=False, key=None, dry_run=False, manifest_bucket_name=None @@ -322,6 +330,9 @@ def handle_maven_uploading( succeeded = True generated_signs = [] for bucket in buckets: + # prepare cf invalidate files + cf_invalidate_paths = [] + # 5. Do manifest uploading if not manifest_bucket_name: logger.warning( @@ -360,9 +371,12 @@ def handle_maven_uploading( ) failed_metas.extend(_failed_metas) logger.info("maven-metadata.xml updating done in bucket %s\n", bucket_name) + # Add maven-metadata.xml to CF invalidate paths + if cf_enable: + cf_invalidate_paths.extend(meta_files.get(META_FILE_GEN_KEY, [])) # 8. Determine refreshment of archetype-catalog.xml - if os.path.exists(os.path.join(top_level, "archetype-catalog.xml")): + if os.path.exists(os.path.join(top_level, MAVEN_ARCH_FILE)): logger.info("Start generating archetype-catalog.xml for bucket %s", bucket_name) upload_archetype_file = _generate_upload_archetype_catalog( s3=s3_client, bucket=bucket_name, @@ -386,6 +400,9 @@ def handle_maven_uploading( ) failed_metas.extend(_failed_metas) logger.info("archetype-catalog.xml updating done in bucket %s\n", bucket_name) + # Add archtype-catalog to invalidate paths + if cf_enable: + cf_invalidate_paths.extend(archetype_files) # 10. Generate signature file if contain_signature is set to True if gen_sign: @@ -436,9 +453,18 @@ def handle_maven_uploading( ) failed_metas.extend(_failed_metas) logger.info("Index files updating done\n") + # We will not invalidate the index files per cost consideration + # if cf_enable: + # cf_invalidate_paths.extend(created_indexes) else: logger.info("Bypass indexing") + # 11. Finally do the CF invalidating for metadata files + if cf_enable and len(cf_invalidate_paths) > 0: + cf_client = CFClient(aws_profile=aws_profile) + cf_invalidate_paths = __wildcard_metadata_paths(cf_invalidate_paths) + invalidate_cf_paths(cf_client, bucket, cf_invalidate_paths, top_level) + upload_post_process(failed_files, failed_metas, prod_key, bucket_name) succeeded = succeeded and len(failed_files) <= 0 and len(failed_metas) <= 0 @@ -450,10 +476,11 @@ def handle_maven_del( prod_key: str, ignore_patterns=None, root="maven-repository", - buckets: List[Tuple[str, str, str, str]] = None, + buckets: List[Tuple[str, str, str, str, str]] = None, aws_profile=None, dir_=None, do_index=True, + cf_enable=False, dry_run=False, manifest_bucket_name=None ) -> Tuple[str, bool]: @@ -487,6 +514,9 @@ def handle_maven_del( logger.debug("Valid poms: %s", valid_poms) succeeded = True for bucket in buckets: + # prepare cf invalidation paths + cf_invalidate_paths = [] + prefix = remove_prefix(bucket[2], "/") s3_client = S3Client(aws_profile=aws_profile, dry_run=dry_run) bucket_name = bucket[1] @@ -544,9 +574,14 @@ def handle_maven_del( if len(_failed_metas) > 0: failed_metas.extend(_failed_metas) logger.info("maven-metadata.xml updating done\n") + if cf_enable: + logger.debug( + "Extending invalidate_paths with %s:", all_meta_files + ) + cf_invalidate_paths.extend(all_meta_files) # 7. Determine refreshment of archetype-catalog.xml - if os.path.exists(os.path.join(top_level, "archetype-catalog.xml")): + if os.path.exists(os.path.join(top_level, MAVEN_ARCH_FILE)): logger.info("Start generating archetype-catalog.xml") archetype_action = _generate_rollback_archetype_catalog( s3=s3_client, bucket=bucket_name, @@ -578,6 +613,8 @@ def handle_maven_del( if len(_failed_metas) > 0: failed_metas.extend(_failed_metas) logger.info("archetype-catalog.xml updating done\n") + if cf_enable: + cf_invalidate_paths.extend(archetype_files) if do_index: logger.info("Start generating index files for all changed entries") @@ -596,9 +633,18 @@ def handle_maven_del( if len(_failed_index_files) > 0: failed_metas.extend(_failed_index_files) logger.info("Index files updating done.\n") + # We will not invalidate the index files per cost consideration + # if cf_enable: + # cf_invalidate_paths.extend(created_indexes) else: logger.info("Bypassing indexing") + # 9. Finally do the CF invalidating for metadata files + if cf_enable and len(cf_invalidate_paths): + cf_client = CFClient(aws_profile=aws_profile) + cf_invalidate_paths = __wildcard_metadata_paths(cf_invalidate_paths) + invalidate_cf_paths(cf_client, bucket, cf_invalidate_paths, top_level) + rollback_post_process(failed_files, failed_metas, prod_key, bucket_name) succeeded = succeeded and len(failed_files) == 0 and len(failed_metas) == 0 @@ -977,15 +1023,15 @@ def _generate_metadatas( "No poms found in s3 bucket %s for GA path %s", bucket, path ) meta_files_deletion = meta_files.get(META_FILE_DEL_KEY, []) - meta_files_deletion.append(os.path.join(path, "maven-metadata.xml")) - meta_files_deletion.extend(__hash_decorate_metadata(path, "maven-metadata.xml")) + meta_files_deletion.append(os.path.join(path, MAVEN_METADATA_FILE)) + meta_files_deletion.extend(__hash_decorate_metadata(path, MAVEN_METADATA_FILE)) meta_files[META_FILE_DEL_KEY] = meta_files_deletion else: logger.warning("An error happened when scanning remote " "artifacts under GA path %s", path) meta_failed_path = meta_files.get(META_FILE_FAILED, []) - meta_failed_path.append(os.path.join(path, "maven-metadata.xml")) - meta_failed_path.extend(__hash_decorate_metadata(path, "maven-metadata.xml")) + meta_failed_path.append(os.path.join(path, MAVEN_METADATA_FILE)) + meta_failed_path.extend(__hash_decorate_metadata(path, MAVEN_METADATA_FILE)) meta_files[META_FILE_FAILED] = meta_failed_path else: logger.debug( @@ -1050,6 +1096,22 @@ def __get_suffix(package_type: str, conf: CharonConfig) -> List[str]: return [] +def __wildcard_metadata_paths(paths: List[str]) -> List[str]: + new_paths = [] + for path in paths: + if path.endswith(MAVEN_METADATA_FILE)\ + or path.endswith(MAVEN_ARCH_FILE): + new_paths.append(path[:-len(".xml")] + ".*") + elif path.endswith(".md5")\ + or path.endswith(".sha1")\ + or path.endswith(".sha128")\ + or path.endswith(".sha256"): + continue + else: + new_paths.append(path) + return new_paths + + class VersionCompareKey: 'Used as key function for version sorting' def __init__(self, obj): diff --git a/charon/pkgs/npm.py b/charon/pkgs/npm.py index 684e8457..3c183aac 100644 --- a/charon/pkgs/npm.py +++ b/charon/pkgs/npm.py @@ -28,8 +28,13 @@ from charon.config import CharonConfig, get_config from charon.constants import META_FILE_GEN_KEY, META_FILE_DEL_KEY, PACKAGE_TYPE_NPM from charon.storage import S3Client +from charon.cache import CFClient from charon.utils.archive import extract_npm_tarball -from charon.pkgs.pkg_utils import upload_post_process, rollback_post_process +from charon.pkgs.pkg_utils import ( + upload_post_process, + rollback_post_process, + invalidate_cf_paths +) from charon.utils.strings import remove_prefix from charon.utils.files import write_manifest from charon.utils.map import del_none, replace_field @@ -73,11 +78,13 @@ def default(self, o): def handle_npm_uploading( tarball_path: str, product: str, - buckets: List[Tuple[str, str, str, str]] = None, + buckets: List[Tuple[str, str, str, str]], aws_profile=None, dir_=None, + root_path="package", do_index=True, gen_sign=False, + cf_enable=False, key=None, dry_run=False, manifest_bucket_name=None @@ -96,14 +103,20 @@ def handle_npm_uploading( Returns the directory used for archive processing and if uploading is successful """ + client = S3Client(aws_profile=aws_profile, dry_run=dry_run) generated_signs = [] + succeeded = True + root_dir = mkdtemp(prefix=f"npm-charon-{product}-", dir=dir_) for bucket in buckets: + # prepare cf invalidate files + cf_invalidate_paths = [] + bucket_name = bucket[1] prefix = remove_prefix(bucket[2], "/") registry = bucket[3] target_dir, valid_paths, package_metadata = _scan_metadata_paths_from_archive( - tarball_path, registry, prod=product, dir__=dir_ + tarball_path, registry, prod=product, dir__=dir_, pkg_root=root_path ) if not os.path.isdir(target_dir): logger.error("Error: the extracted target_dir path %s does not exist.", target_dir) @@ -119,8 +132,6 @@ def handle_npm_uploading( ) logger.info("Files uploading done\n") - succeeded = True - if not manifest_bucket_name: logger.warning( 'Warning: No manifest bucket is provided, will ignore the process of manifest ' @@ -159,6 +170,13 @@ def handle_npm_uploading( client, bucket_name, target_dir, package_metadata, prefix ) logger.info("package.json generation done\n") + if cf_enable: + meta_f = meta_files.get(META_FILE_GEN_KEY, []) + logger.debug("Add invalidating metafiles: %s", meta_f) + if isinstance(meta_f, str): + cf_invalidate_paths.append(meta_f) + elif isinstance(meta_f, list): + cf_invalidate_paths.extend(meta_f) if META_FILE_GEN_KEY in meta_files: _failed_metas = client.upload_metadatas( @@ -218,22 +236,32 @@ def handle_npm_uploading( ) failed_metas.extend(_failed_metas) logger.info("Index files updating done\n") + # We will not invalidate the index files per cost consideration + # if cf_enable: + # cf_invalidate_paths.extend(created_indexes) else: logger.info("Bypass indexing\n") + # Do CloudFront invalidating for generated metadata + if cf_enable and len(cf_invalidate_paths): + cf_client = CFClient(aws_profile=aws_profile) + invalidate_cf_paths(cf_client, bucket, cf_invalidate_paths, target_dir) + upload_post_process(failed_files, failed_metas, product, bucket_name) succeeded = succeeded and len(failed_files) == 0 and len(failed_metas) == 0 - return (target_dir, succeeded) + return (root_dir, succeeded) def handle_npm_del( tarball_path: str, product: str, - buckets: List[Tuple[str, str, str, str]] = None, + buckets: List[Tuple[str, str, str, str]], aws_profile=None, dir_=None, + root_path="package", do_index=True, + cf_enable=False, dry_run=False, manifest_bucket_name=None ) -> Tuple[str, str]: @@ -250,7 +278,7 @@ def handle_npm_del( Returns the directory used for archive processing and if the rollback is successful """ target_dir, package_name_path, valid_paths = _scan_paths_from_archive( - tarball_path, prod=product, dir__=dir_ + tarball_path, prod=product, dir__=dir_, pkg_root=root_path ) valid_dirs = __get_path_tree(valid_paths, target_dir) @@ -258,6 +286,9 @@ def handle_npm_del( client = S3Client(aws_profile=aws_profile, dry_run=dry_run) succeeded = True for bucket in buckets: + # prepare cf invalidate files + cf_invalidate_paths = [] + bucket_name = bucket[1] prefix = remove_prefix(bucket[2], "/") logger.info("Start deleting files from s3 bucket %s", bucket_name) @@ -309,6 +340,9 @@ def handle_npm_del( ) failed_metas.extend(_failed_metas) logger.info("package.json uploading done") + if cf_enable and len(all_meta_files): + logger.debug("Add meta files to cf invalidate list: %s", all_meta_files) + cf_invalidate_paths.extend(all_meta_files) if do_index: logger.info( @@ -329,9 +363,18 @@ def handle_npm_del( ) failed_metas.extend(_failed_index_files) logger.info("Index files updating done.\n") + # We will not invalidate the index files per cost consideration + # if cf_enable and len(created_indexes): + # logger.debug("Add index files to cf invalidate list: %s", created_indexes) + # cf_invalidate_paths.extend(created_indexes) else: logger.info("Bypassing indexing\n") + # Do CloudFront invalidating for generated metadata + if cf_enable and len(cf_invalidate_paths): + cf_client = CFClient(aws_profile=aws_profile) + invalidate_cf_paths(cf_client, bucket, cf_invalidate_paths, target_dir) + rollback_post_process(failed_files, failed_metas, product, bucket_name) succeeded = succeeded and len(failed_files) <= 0 and len(failed_metas) <= 0 @@ -433,11 +476,15 @@ def _gen_npm_package_metadata_for_del( return meta_files -def _scan_metadata_paths_from_archive(path: str, registry: str, prod="", dir__=None) ->\ - Tuple[str, list, NPMPackageMetadata]: +def _scan_metadata_paths_from_archive( + path: str, registry: str, prod="", dir__=None, pkg_root="pakage" +) -> Tuple[str, list, NPMPackageMetadata]: tmp_root = mkdtemp(prefix=f"npm-charon-{prod}-", dir=dir__) try: - _, valid_paths = extract_npm_tarball(path, tmp_root, True, registry) + _, valid_paths = extract_npm_tarball( + path=path, target_dir=tmp_root, is_for_upload=True, + pkg_root=pkg_root, registry=registry + ) if len(valid_paths) > 1: version = _scan_for_version(valid_paths[1]) package = NPMPackageMetadata(version, True) @@ -447,9 +494,13 @@ def _scan_metadata_paths_from_archive(path: str, registry: str, prod="", dir__=N sys.exit(1) -def _scan_paths_from_archive(path: str, prod="", dir__=None) -> Tuple[str, str, list]: +def _scan_paths_from_archive( + path: str, prod="", dir__=None, pkg_root="package" +) -> Tuple[str, str, list]: tmp_root = mkdtemp(prefix=f"npm-charon-{prod}-", dir=dir__) - package_name_path, valid_paths = extract_npm_tarball(path, tmp_root, False) + package_name_path, valid_paths = extract_npm_tarball( + path=path, target_dir=tmp_root, is_for_upload=False, pkg_root=pkg_root + ) return tmp_root, package_name_path, valid_paths @@ -476,7 +527,7 @@ def _scan_for_version(path: str): logger.error('Error: Failed to parse json!') -def _is_latest_version(source_version: str, versions: list()): +def _is_latest_version(source_version: str, versions: List[str]): for v in versions: if compare(source_version, v) <= 0: return False diff --git a/charon/pkgs/pkg_utils.py b/charon/pkgs/pkg_utils.py index 20ffc71b..9325f14b 100644 --- a/charon/pkgs/pkg_utils.py +++ b/charon/pkgs/pkg_utils.py @@ -1,5 +1,12 @@ -from typing import List +from typing import List, Tuple +from charon.cache import ( + CFClient, + INVALIDATION_BATCH_DEFAULT, + INVALIDATION_BATCH_WILDCARD, + INVALIDATION_STATUS_COMPLETED +) import logging +import os logger = logging.getLogger(__name__) @@ -44,15 +51,80 @@ def __post_process( product_key, operation, bucket) else: total = len(failed_files) + len(failed_metas) - logger.error("%d file(s) occur errors/warnings in bucket %s, " - "please see errors.log for details.\n", - bucket, total) - logger.error("Product release %s is %s Ronda service in bucket %s, " - "but has some failures as below:", - product_key, operation, bucket) + logger.error( + "%d file(s) occur errors/warnings in bucket %s, " + "please see errors.log for details.\n", + bucket, total + ) + logger.error( + "Product release %s is %s Ronda service in bucket %s, " + "but has some failures as below:", + product_key, operation, bucket + ) if len(failed_files) > 0: - logger.error("Failed files: \n%s\n", - failed_files) + logger.error("Failed files: \n%s\n", failed_files) if len(failed_metas) > 0: - logger.error("Failed metadata files: \n%s\n", - failed_metas) + logger.error("Failed metadata files: \n%s\n", failed_metas) + + +def invalidate_cf_paths( + cf_client: CFClient, + bucket: Tuple[str, str, str, str, str], + invalidate_paths: List[str], + root="/", + batch_size=INVALIDATION_BATCH_DEFAULT +): + logger.info("Invalidating CF cache for %s", bucket[1]) + bucket_name = bucket[1] + prefix = bucket[2] + prefix = "/" + prefix if not prefix.startswith("/") else prefix + domain = bucket[4] + slash_root = root + if not root.endswith("/"): + slash_root = slash_root + "/" + final_paths = [] + for full_path in invalidate_paths: + path = full_path + if path.startswith(slash_root): + path = path[len(slash_root):] + if prefix: + path = os.path.join(prefix, path) + final_paths.append(path) + logger.debug("Invalidating paths: %s, size: %s", final_paths, len(final_paths)) + if not domain: + domain = cf_client.get_domain_by_bucket(bucket_name) + if domain: + distr_id = cf_client.get_dist_id_by_domain(domain) + if distr_id: + real_batch_size = batch_size + for path in final_paths: + if path.endswith('*'): + real_batch_size = INVALIDATION_BATCH_WILDCARD + break + result = cf_client.invalidate_paths( + distr_id, final_paths, real_batch_size + ) + if result: + output = {} + for invalidation in result: + status = invalidation.get('Status') + if status not in output: + output[status] = [] + output[status].append(invalidation["Id"]) + non_completed = {} + for status, ids in output.items(): + if status != INVALIDATION_STATUS_COMPLETED: + non_completed[status] = ids + logger.info( + "The CF invalidating requests done, following requests " + "are not completed yet:\n %s\nPlease use cf-check command to " + "check its details.", non_completed + ) + logger.debug( + "All invalidations requested in this process:\n %s", output + ) + else: + logger.error( + "CF invalidating will not be performed because domain not found for" + " bucket %s. ", bucket_name + ) diff --git a/charon/schemas/charon.json b/charon/schemas/charon.json index bf745f9a..f6a931d1 100644 --- a/charon/schemas/charon.json +++ b/charon/schemas/charon.json @@ -52,6 +52,10 @@ "registry": { "description": "npm registry", "type": "string" + }, + "domain": { + "description": "domain name for bucket", + "type": "string" } }, "required": [ @@ -66,6 +70,10 @@ "type": "string", "description": "aws profile to use with S3" }, + "aws_cf_enable": { + "type": "boolean", + "description": "enable aws cloudfront support" + }, "manifest_bucket": { "type": "string", "description": "which bucket to use for storing manifests" diff --git a/charon/storage.py b/charon/storage.py index 45963c42..34ae1274 100644 --- a/charon/storage.py +++ b/charon/storage.py @@ -65,14 +65,14 @@ def __init_aws_client( self, aws_profile=None, extra_conf=None ): if aws_profile: - logger.debug("Using aws profile: %s", aws_profile) + logger.debug("[S3] Using aws profile: %s", aws_profile) s3_session = session.Session(profile_name=aws_profile) else: s3_session = session.Session() endpoint_url = self.__get_endpoint(extra_conf) config = None if self.__enable_acceleration(extra_conf): - logger.info("S3 acceleration config enabled, " + logger.info("[S3] S3 acceleration config enabled, " "will enable s3 use_accelerate_endpoint config") config = Config(s3={"use_accelerate_endpoint": True}) return s3_session.resource( @@ -87,9 +87,12 @@ def __get_endpoint(self, extra_conf) -> str: if isinstance(extra_conf, Dict): endpoint_url = extra_conf.get(ENDPOINT_ENV, None) if endpoint_url: - logger.info("Using endpoint url for aws client: %s", endpoint_url) + logger.info( + "[S3] Using endpoint url for aws S3 client: %s", + endpoint_url + ) else: - logger.debug("No user-specified endpoint url is used.") + logger.debug("[S3] No user-specified endpoint url is used.") return endpoint_url def __enable_acceleration(self, extra_conf) -> bool: @@ -140,14 +143,14 @@ async def path_upload_handler( async with self.__con_sem: if not os.path.isfile(full_file_path): logger.warning( - 'Warning: file %s does not exist during uploading. Product: %s', + '[S3] Warning: file %s does not exist during uploading. Product: %s', full_file_path, product ) failed.append(full_file_path) return logger.debug( - '(%d/%d) Uploading %s to bucket %s', + '[S3] (%d/%d) Uploading %s to bucket %s', index, total, full_file_path, main_bucket_name ) main_path_key = os.path.join(key_prefix, path) if key_prefix else path @@ -157,7 +160,7 @@ async def path_upload_handler( existed = await self.__run_async(self.__file_exists, main_file_object) except (ClientError, HTTPClientError) as e: logger.error( - "Error: file existence check failed due to error: %s", e + "[S3] Error: file existence check failed due to error: %s", e ) failed.append(full_file_path) return @@ -193,9 +196,9 @@ async def path_upload_handler( main_path_key, main_bucket_name, [product] ) - logger.debug('Uploaded %s to bucket %s', path, main_bucket_name) + logger.debug('[S3] Uploaded %s to bucket %s', path, main_bucket_name) except (ClientError, HTTPClientError) as e: - logger.error("ERROR: file %s not uploaded to bucket" + logger.error("[S3] ERROR: file %s not uploaded to bucket" " %s due to error: %s ", full_file_path, main_bucket_name, e) failed.append(full_file_path) @@ -230,9 +233,9 @@ async def path_upload_handler( extra_path_key, extra_bucket_name, [product] ) except (ClientError, HTTPClientError) as e: - logger.error("ERROR: copying failure happend for file %s to bucket" - " %s due to error: %s ", full_file_path, - extra_bucket_name, e) + logger.error("[S3] ERROR: copying failure happend for file %s" + " to bucket %s due to error: %s ", + full_file_path, extra_bucket_name, e) failed.append(full_file_path) else: await handle_existed( @@ -530,10 +533,11 @@ def delete_files( self, file_paths: List[str], target: Tuple[str, str], product: Optional[str], root="/" ) -> List[str]: - """ Deletes a list of files to s3 bucket. * Use the cut down file path as s3 key. The cut - down way is move root from the file path if it starts with root. Example: if file_path is - /tmp/maven-repo/org/apache/.... and root is /tmp/maven-repo Then the key will be - org/apache/..... + """ Deletes a list of files to s3 bucket. + * Use the cut down file path as s3 key. The cut + down way is move root from the file path if it starts with root. + Example: if file_path is /tmp/maven-repo/org/apache/.... and + root is /tmp/maven-repo Then the key will be org/apache/..... * The removing will happen with conditions of product checking. First the deletion will remove The product from the file metadata "rh-products". After the metadata removing, if there still are extra products left in that metadata, the file will not @@ -612,7 +616,7 @@ async def path_delete_handler( if not updated: failed.append(full_file_path) return - logger.info("Deleted %s from bucket %s", path, bucket_name) + logger.info("[S3] Deleted %s from bucket %s", path, bucket_name) return except (ClientError, HTTPClientError) as e: logger.error( @@ -637,6 +641,90 @@ async def path_delete_handler( return failed_files + def simple_delete_file( + self, file_path: str, target: Tuple[str, str] + ): + """ Deletes file in s3 bucket, regardless of any extra + information like product and version info. + * Warning: this will directly delete the files even if + it has lots of product info, so please be careful to use. + If you want to delete product artifact files, please use + delete_files + """ + bucket = target[0] + prefix = target[1] + bucket_obj = self.__get_bucket(bucket) + path_key = os.path.join(prefix, file_path) + file_object = bucket_obj.Object(path_key) + existed = False + try: + existed = self.__file_exists(file_object) + if existed: + bucket_obj.delete_objects(Delete={"Objects": [{"Key": path_key}]}) + else: + logger.warning( + 'Warning: File %s does not exist in S3 bucket %s, will ignore its deleting', + file_path, bucket + ) + except (ClientError, HTTPClientError) as e: + logger.error( + "Error: file existence check failed due to error: %s", e + ) + + def simple_upload_file( + self, file_path: str, file_content: str, + target: Tuple[str, str], + mime_type: str = None, + check_sum_sha1: str = None + ): + """ Uploads file to s3 bucket, regardless of any extra + information like product and version info. + * Warning: this will directly overwrite the files even if + it has lots of product info, so please be careful to use. + If you want to upload product artifact files, please use + upload_files + """ + bucket = target[0] + prefix = target[1] + bucket_obj = self.__get_bucket(bucket) + path_key = os.path.join(prefix, file_path) + file_object = bucket_obj.Object(path_key) + existed = False + logger.debug( + 'Uploading %s to bucket %s', path_key, bucket + ) + existed = False + try: + existed = self.__file_exists(file_object) + except (ClientError, HTTPClientError) as e: + logger.error( + "Error: file existence check failed due to error: %s", e + ) + return + + content_type = mime_type + if not content_type: + content_type = DEFAULT_MIME_TYPE + if not existed: + f_meta = {} + if check_sum_sha1 and check_sum_sha1.strip() != "": + f_meta[CHECKSUM_META_KEY] = check_sum_sha1 + try: + if not self.__dry_run: + file_object.put( + Body=file_content, + Metadata=f_meta, + ContentType=content_type + ) + logger.debug('Uploaded %s to bucket %s', path_key, bucket) + except (ClientError, HTTPClientError) as e: + logger.error( + "ERROR: file %s not uploaded to bucket %s due to error: %s ", + file_path, bucket, e + ) + else: + raise FileExistsError("Error: file %s already exists, upload is forbiden.") + def delete_manifest(self, product_key: str, target: str, manifest_bucket_name: str): if not manifest_bucket_name: logger.warning( @@ -674,7 +762,7 @@ def get_files(self, bucket_name: str, prefix=None, suffix=None) -> Tuple[List[st try: objs = list(bucket.objects.filter(Prefix=prefix)) except (ClientError, HTTPClientError) as e: - logger.error("ERROR: Can not get files under %s in bucket" + logger.error("[S3] ERROR: Can not get files under %s in bucket" " %s due to error: %s ", prefix, bucket_name, e) return ([], False) @@ -715,7 +803,7 @@ def list_folder_content(self, bucket_name: str, folder: str) -> List[str]: ) except (ClientError, HTTPClientError) as e: - logger.error("ERROR: Can not get contents of %s from bucket" + logger.error("[S3] ERROR: Can not get contents of %s from bucket" " %s due to error: %s ", folder, bucket_name, e) return [] @@ -743,7 +831,7 @@ def __get_bucket(self, bucket_name: str): bucket = self.__buckets.get(bucket_name) if bucket: return bucket - logger.debug("Cache aws bucket %s", bucket_name) + logger.debug("[S3] Cache aws bucket %s", bucket_name) bucket = self.__client.Bucket(bucket_name) self.__buckets[bucket_name] = bucket return bucket @@ -763,15 +851,15 @@ def __file_exists(self, file_object) -> bool: def __get_prod_info( self, file: str, bucket_name: str ) -> Tuple[List[str], bool]: - logger.debug("Getting product infomation for file %s", file) + logger.debug("[S3] Getting product infomation for file %s", file) prod_info_file = file + PROD_INFO_SUFFIX try: info_file_content = self.read_file_content(bucket_name, prod_info_file) prods = [p.strip() for p in info_file_content.split("\n")] - logger.debug("Got product information as below %s", prods) + logger.debug("[S3] Got product information as below %s", prods) return (prods, True) except (ClientError, HTTPClientError) as e: - logger.warning("WARN: Can not get product info for file %s " + logger.warning("[S3] WARN: Can not get product info for file %s " "due to error: %s", file, e) return ([], False) @@ -783,7 +871,7 @@ async def __update_prod_info( file_obj = bucket.Object(prod_info_file) content_type = "text/plain" if len(prods) > 0: - logger.debug("Updating product infomation for file %s " + logger.debug("[S3] Updating product infomation for file %s " "with products: %s", file, prods) try: await self.__run_async( @@ -793,14 +881,14 @@ async def __update_prod_info( ContentType=content_type ) ) - logger.debug("Updated product infomation for file %s", file) + logger.debug("[S3] Updated product infomation for file %s", file) return True except (ClientError, HTTPClientError) as e: - logger.warning("WARNING: Can not update product info for file %s " + logger.warning("[S3] WARNING: Can not update product info for file %s " "due to error: %s", file, e) return False else: - logger.debug("Removing product infomation file for file %s " + logger.debug("[S3] Removing product infomation file for file %s " "because no products left", file) try: result = await self.__run_async( @@ -814,10 +902,10 @@ async def __update_prod_info( Delete={"Objects": [{"Key": prod_info_file}]} ) ) - logger.debug("Removed product infomation file for file %s", file) + logger.debug("[S3] Removed product infomation file for file %s", file) return True except (ClientError, HTTPClientError) as e: - logger.warning("WARNING: Can not delete product info file for file %s " + logger.warning("[S3] WARNING: Can not delete product info file for file %s " "due to error: %s", file, e) return False @@ -833,7 +921,7 @@ async def wrapper( await path_handler(full_file_path, path, index, total, failed) finally: if index % FILE_REPORT_LIMIT == 0: - logger.info("######### %d/%d files finished", index, total) + logger.info("[S3] ######### %d/%d files finished", index, total) return wrapper def __do_path_cut_and( diff --git a/charon/utils/archive.py b/charon/utils/archive.py index 5bcb2777..eca56ebe 100644 --- a/charon/utils/archive.py +++ b/charon/utils/archive.py @@ -46,8 +46,9 @@ def extract_zip_with_files(zf: ZipFile, target_dir: str, file_suffix: str, debug zf.extractall(target_dir, members=filtered) -def extract_npm_tarball(path: str, target_dir: str, is_for_upload: bool, registry=DEFAULT_REGISTRY)\ - -> Tuple[str, list]: +def extract_npm_tarball( + path: str, target_dir: str, is_for_upload: bool, pkg_root="package", registry=DEFAULT_REGISTRY +) -> Tuple[str, list]: """ Extract npm tarball will relocate the tgz file and metadata files. * Locate tar path ( e.g.: jquery/-/jquery-7.6.1.tgz or @types/jquery/-/jquery-2.2.3.tgz). * Locate version metadata path (e.g.: jquery/7.6.1 or @types/jquery/2.2.3). @@ -56,30 +57,50 @@ def extract_npm_tarball(path: str, target_dir: str, is_for_upload: bool, registr valid_paths = [] package_name_path = str() tgz = tarfile.open(path) + pkg_file = None + root_pkg_file_exists = True + try: + root_pkg_path = os.path.join(pkg_root, "package.json") + logger.debug(root_pkg_path) + pkg_file = tgz.getmember(root_pkg_path) + root_pkg_file_exists = pkg_file.isfile() + except KeyError: + root_pkg_file_exists = False + pkg_file = None tgz.extractall() - for f in tgz: - if f.name.endswith("package.json"): - version_data, parse_paths = __parse_npm_package_version_paths(f.path) - package_name_path = parse_paths[0] - os.makedirs(os.path.join(target_dir, parse_paths[0])) - tarball_parent_path = os.path.join(target_dir, parse_paths[0], "-") - valid_paths.append(os.path.join(tarball_parent_path, _get_tgz_name(path))) - version_metadata_parent_path = os.path.join( - target_dir, parse_paths[0], parse_paths[1] + if not root_pkg_file_exists: + logger.info( + "Root package.json is not found for archive: %s, will search others", + path + ) + for f in tgz: + if f.name.endswith("package.json"): + logger.info("Found package.json as %s", f.path) + pkg_file = f + break + if pkg_file: + version_data, parse_paths = __parse_npm_package_version_paths(pkg_file.path) + package_name_path = parse_paths[0] + os.makedirs(os.path.join(target_dir, parse_paths[0])) + tarball_parent_path = os.path.join(target_dir, parse_paths[0], "-") + valid_paths.append(os.path.join(tarball_parent_path, _get_tgz_name(path))) + version_metadata_parent_path = os.path.join( + target_dir, parse_paths[0], parse_paths[1] + ) + valid_paths.append(os.path.join(version_metadata_parent_path, "package.json")) + + if is_for_upload: + tgz_relative_path = "/".join([parse_paths[0], "-", _get_tgz_name(path)]) + __write_npm_version_dist( + path, pkg_file.path, version_data, tgz_relative_path, registry ) - valid_paths.append(os.path.join(version_metadata_parent_path, "package.json")) - - if is_for_upload: - tgz_relative_path = "/".join([parse_paths[0], "-", _get_tgz_name(path)]) - __write_npm_version_dist(path, f.path, version_data, tgz_relative_path, registry) - - os.makedirs(tarball_parent_path) - target = os.path.join(tarball_parent_path, os.path.basename(path)) - shutil.copyfile(path, target) - os.makedirs(version_metadata_parent_path) - target = os.path.join(version_metadata_parent_path, os.path.basename(f.path)) - shutil.copyfile(f.path, target) - break + + os.makedirs(tarball_parent_path) + target = os.path.join(tarball_parent_path, os.path.basename(path)) + shutil.copyfile(path, target) + os.makedirs(version_metadata_parent_path) + target = os.path.join(version_metadata_parent_path, os.path.basename(pkg_file.path)) + shutil.copyfile(pkg_file.path, target) return package_name_path, valid_paths diff --git a/charon/utils/files.py b/charon/utils/files.py index ffe08bef..f15f77c4 100644 --- a/charon/utils/files.py +++ b/charon/utils/files.py @@ -58,9 +58,30 @@ def read_sha1(file: str) -> str: def digest(file: str, hash_type=HashType.SHA1) -> str: + hash_obj = _hash_object(hash_type) + # BUF_SIZE is totally arbitrary, change for your app! BUF_SIZE = 65536 # lets read stuff in 64kb chunks! + with open(file, "rb") as f: + while True: + data = f.read(BUF_SIZE) + if not data: + break + hash_obj.update(data) + + return hash_obj.hexdigest() + +def digest_content(content: str, hash_type=HashType.SHA1) -> str: + """This function will caculate the hash value for the string content with the specified + hash type + """ + hash_obj = _hash_object(hash_type) + hash_obj.update(content.encode('utf-8')) + return hash_obj.hexdigest() + + +def _hash_object(hash_type: HashType): hash_obj = None if hash_type == HashType.SHA1: hash_obj = hashlib.sha1() @@ -70,15 +91,7 @@ def digest(file: str, hash_type=HashType.SHA1) -> str: hash_obj = hashlib.md5() else: raise Exception("Error: Unknown hash type for digesting.") - - with open(file, "rb") as f: - while True: - data = f.read(BUF_SIZE) - if not data: - break - hash_obj.update(data) - - return hash_obj.hexdigest() + return hash_obj def write_manifest(paths: List[str], root: str, product_key: str) -> Tuple[str, str]: diff --git a/charon/utils/logs.py b/charon/utils/logs.py index ed8469f9..9e273640 100644 --- a/charon/utils/logs.py +++ b/charon/utils/logs.py @@ -45,7 +45,10 @@ def __del__(self): pass -def set_logging(product: str, version: str, name="charon", level=logging.DEBUG, handler=None): +def set_logging( + product: str, version: str, name="charon", + level=logging.DEBUG, handler=None, use_log_file=True +): # create logger logger = logging.getLogger(name) for hdlr in list(logger.handlers): # make a copy so it doesn't change @@ -69,7 +72,8 @@ def set_logging(product: str, version: str, name="charon", level=logging.DEBUG, # add ch to logger logger.addHandler(handler) - set_log_file_handler(product, version, logger) + if use_log_file: + set_log_file_handler(product, version, logger) logger = logging.getLogger('charon') for hdlr in list(logger.handlers): # make a copy so it doesn't change diff --git a/config/README b/config/README new file mode 100644 index 00000000..239129e1 --- /dev/null +++ b/config/README @@ -0,0 +1,6 @@ +# Sample config files + +There are two sample config files here: + +* [aws-credentials.sample](./aws-credentials.sample): sample aws credentials file. It is same with aws credentials config file in official +* [charon.yaml.sample](./charon.yaml.sample): sample charon config file. You can put it into $HOME/.charon/charon.yaml and change the content by your requirements. diff --git a/config/aws-credentials b/config/aws-credentials.sample similarity index 100% rename from config/aws-credentials rename to config/aws-credentials.sample diff --git a/config/charon.yaml b/config/charon.yaml.sample similarity index 100% rename from config/charon.yaml rename to config/charon.yaml.sample diff --git a/requirements.txt b/requirements.txt index 563abc26..cc669871 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ setuptools-rust==1.7.0 -Jinja2==3.1.2 +Jinja2==3.1.3 boto3==1.28.46 botocore==1.31.46 click==8.1.7 @@ -8,3 +8,4 @@ PyYAML==6.0.1 defusedxml==0.7.1 subresource-integrity==0.2 jsonschema==4.19.0 +urllib3==1.26.18 \ No newline at end of file diff --git a/setup.py b/setup.py index da42d21f..98d6be4d 100755 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ """ from setuptools import setup, find_packages -version = "1.2.2" +version = "1.3.0" # f = open('README.md') # long_description = f.read().strip() @@ -48,6 +48,6 @@ package_data={'charon': ['schemas/*.json']}, test_suite="tests", entry_points={ - "console_scripts": ["charon = charon:cli"], + "console_scripts": ["charon = charon.cmd:cli"], }, ) diff --git a/template/README b/template/README new file mode 100644 index 00000000..46e47f9a --- /dev/null +++ b/template/README @@ -0,0 +1,3 @@ +# Sample template files to generate metadata + +These template files are used to generate related metadata accordingly. You can change based on the sample templates and then put them into $HOME/.charon/template/ to replace the default templates which are defined in charon sources. diff --git a/template/archetype-catalog.xml.j2 b/template/archetype-catalog.xml.j2.sample similarity index 100% rename from template/archetype-catalog.xml.j2 rename to template/archetype-catalog.xml.j2.sample diff --git a/template/index.html.j2 b/template/index.html.j2.sample similarity index 100% rename from template/index.html.j2 rename to template/index.html.j2.sample diff --git a/template/maven-metadata.xml.j2 b/template/maven-metadata.xml.j2.sample similarity index 100% rename from template/maven-metadata.xml.j2 rename to template/maven-metadata.xml.j2.sample diff --git a/tests/__init__.py b/tests/__init__.py index e69de29b..e3cdc8ed 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,20 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import logging + +logging.basicConfig(level=logging.INFO) +logging.getLogger("charon").setLevel(logging.DEBUG) diff --git a/tests/base.py b/tests/base.py index 49cd2f1e..11ff9af3 100644 --- a/tests/base.py +++ b/tests/base.py @@ -25,9 +25,12 @@ from charon.pkgs.pkg_utils import is_metadata from charon.storage import PRODUCT_META_KEY, CHECKSUM_META_KEY from tests.commons import TEST_BUCKET, TEST_MANIFEST_BUCKET -from moto import mock_s3 +from tests.constants import HERE, TEST_DS_CONFIG +from moto import mock_aws +import logging -from tests.constants import HERE +logging.basicConfig(level=logging.INFO) +logging.getLogger("charon").setLevel(logging.DEBUG) SHORT_TEST_PREFIX = "ga" LONG_TEST_PREFIX = "earlyaccess/all" @@ -38,7 +41,21 @@ def setUp(self): self.change_home() config_base = self.get_config_base() self.__prepare_template(config_base) - default_config_content = """ + config_content = self.get_config_content() + self.prepare_config(config_base, config_content) + + def tearDown(self): + shutil.rmtree(self.tempdir, ignore_errors=True) + os.environ = self.old_environ + + def change_home(self): + self.old_environ = os.environ.copy() + self.tempdir = tempfile.mkdtemp(prefix='charon-test-') + # Configure environment and copy templates + os.environ['HOME'] = self.tempdir + + def get_config_content(self): + return """ ignore_patterns: - ".*^(redhat).*" - ".*snapshot.*" @@ -69,17 +86,6 @@ def setUp(self): aws_profile: "test" manifest_bucket: "manifest" """ - self.prepare_config(config_base, default_config_content) - - def tearDown(self): - shutil.rmtree(self.tempdir, ignore_errors=True) - os.environ = self.old_environ - - def change_home(self): - self.old_environ = os.environ.copy() - self.tempdir = tempfile.mkdtemp(prefix='charon-test-') - # Configure environment and copy templates - os.environ['HOME'] = self.tempdir def __prepare_template(self, config_base): template_path = os.path.join(config_base, 'template') @@ -101,7 +107,7 @@ def get_config_base(self) -> str: return os.path.join(self.get_temp_dir(), '.charon') -@mock_s3 +@mock_aws class PackageBaseTest(BaseTest): def setUp(self): super().setUp() @@ -158,3 +164,22 @@ def check_content(self, objs: List, products: List[str], msg=None): self.assertEqual(sha1_checksum, sha1_file_content, msg=msg) self.assertIn(CHECKSUM_META_KEY, file_obj.metadata, msg=msg) self.assertNotEqual("", file_obj.metadata[CHECKSUM_META_KEY].strip(), msg=msg) + + +@mock_aws +class CFBasedTest(PackageBaseTest): + def setUp(self): + super().setUp() + # mock_cf is used to generate expected content + self.mock_cf = self.__prepare_cf() + response = self.mock_cf.create_distribution(DistributionConfig=TEST_DS_CONFIG) + self.test_dist_id = response.get('Distribution').get('Id') + + def tearDown(self): + super().tearDown() + # The IfMatch-value is ignored - any value is considered valid. + # Calling this function without a value is invalid, per AWS’ behaviour + self.mock_cf.delete_distribution(Id=self.test_dist_id, IfMatch='..') + + def __prepare_cf(self): + return boto3.client('cloudfront') diff --git a/tests/constants.py b/tests/constants.py index 2e6d111f..0e202deb 100644 --- a/tests/constants.py +++ b/tests/constants.py @@ -17,3 +17,33 @@ HERE = os.path.dirname(__file__) INPUTS = os.path.join(HERE, 'input') + +TEST_DS_CONFIG = { + 'CallerReference': 'test', + "Aliases": { + "Quantity": 1, + "Items": [ + "maven.repository.redhat.com", + "npm.registry.redhat.com" + ] + }, + "Origins": { + "Quantity": 1, + "Items": [ + { + "Id": "prod-maven-ga", + "DomainName": "prod-maven-ga.s3.us-east-1.amazonaws.com", + "OriginPath": "", + "CustomHeaders": { + "Quantity": 0 + }, + } + ] + }, + "DefaultCacheBehavior": { + "TargetOriginId": "prod-maven-ga", + "ViewerProtocolPolicy": "allow-all", + }, + "Comment": "", + "Enabled": True + } diff --git a/tests/input/code-frame-7.14.5-multi-pkgs.tgz b/tests/input/code-frame-7.14.5-multi-pkgs.tgz new file mode 100644 index 00000000..b9a284a4 Binary files /dev/null and b/tests/input/code-frame-7.14.5-multi-pkgs.tgz differ diff --git a/tests/input/code-frame-7.14.5-no-root-pkg.tgz b/tests/input/code-frame-7.14.5-no-root-pkg.tgz new file mode 100644 index 00000000..96c85af0 Binary files /dev/null and b/tests/input/code-frame-7.14.5-no-root-pkg.tgz differ diff --git a/tests/requirements.txt b/tests/requirements.txt index af22ba64..4acad34d 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,9 +1,9 @@ flexmock>=0.10.6 -responses>=0.9.0,<0.10.8 +responses>=0.9.0 pytest<=7.1.3 pytest-cov pytest-html flake8 requests-mock -moto==3.0.2.dev12 +moto==5.0.3 python-gnupg==0.5.0 diff --git a/tests/test_cf_maven_ops.py b/tests/test_cf_maven_ops.py new file mode 100644 index 00000000..46bb5780 --- /dev/null +++ b/tests/test_cf_maven_ops.py @@ -0,0 +1,73 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from charon.pkgs.maven import handle_maven_uploading, handle_maven_del +from tests.base import CFBasedTest +from tests.commons import TEST_BUCKET +from tests.constants import INPUTS +from moto import mock_aws +import os + + +@mock_aws +class CFInMavenOPSTest(CFBasedTest): + def test_cf_after_upload(self): + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertIsNotNone(response) + self.assertEqual(0, response.get('InvalidationList').get('Quantity')) + + test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") + product = "commons-client-4.5.6" + handle_maven_uploading( + test_zip, product, + buckets=[('', TEST_BUCKET, 'ga', '', 'maven.repository.redhat.com')], + dir_=self.tempdir, + do_index=True, + cf_enable=True + ) + + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertEqual(1, response.get('InvalidationList').get('Quantity')) + items = response.get('InvalidationList').get('Items') + self.assertEqual(1, len(items)) + self.assertEqual('completed', str.lower(items[0].get('Status'))) + + def test_cf_after_del(self): + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertIsNotNone(response) + self.assertEqual(0, response.get('InvalidationList').get('Quantity')) + + test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") + product_456 = "commons-client-4.5.6" + handle_maven_uploading( + test_zip, product_456, + buckets=[('', TEST_BUCKET, 'ga', '', 'maven.repository.redhat.com')], + dir_=self.tempdir, + do_index=True + ) + + product_456 = "commons-client-4.5.6" + handle_maven_del( + test_zip, product_456, + buckets=[('', TEST_BUCKET, 'ga', '', 'maven.repository.redhat.com')], + dir_=self.tempdir, do_index=True, + cf_enable=True + ) + + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertEqual(1, response.get('InvalidationList').get('Quantity')) + items = response.get('InvalidationList').get('Items') + self.assertEqual(1, len(items)) + self.assertEqual('completed', str.lower(items[0].get('Status'))) diff --git a/tests/test_cf_npm_ops.py b/tests/test_cf_npm_ops.py new file mode 100644 index 00000000..8b1c11b9 --- /dev/null +++ b/tests/test_cf_npm_ops.py @@ -0,0 +1,71 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from charon.pkgs.npm import handle_npm_uploading, handle_npm_del +from charon.constants import DEFAULT_REGISTRY +from tests.base import CFBasedTest +from tests.commons import TEST_BUCKET +from tests.constants import INPUTS +from moto import mock_aws +import os + + +@mock_aws +class CFInNPMOPSTest(CFBasedTest): + def test_cf_after_upload(self): + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertIsNotNone(response) + self.assertEqual(0, response.get('InvalidationList').get('Quantity')) + + test_tgz = os.path.join(INPUTS, "code-frame-7.14.5.tgz") + product_7_14_5 = "code-frame-7.14.5" + handle_npm_uploading( + test_tgz, product_7_14_5, + buckets=[('', TEST_BUCKET, "/", DEFAULT_REGISTRY, "npm.registry.redhat.com")], + dir_=self.tempdir, do_index=True, + cf_enable=True + ) + + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertEqual(1, response.get('InvalidationList').get('Quantity')) + items = response.get('InvalidationList').get('Items') + self.assertEqual(1, len(items)) + self.assertEqual('completed', str.lower(items[0].get('Status'))) + + def test_cf_after_del(self): + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertIsNotNone(response) + self.assertEqual(0, response.get('InvalidationList').get('Quantity')) + + test_tgz = os.path.join(INPUTS, "code-frame-7.14.5.tgz") + product_7_14_5 = "code-frame-7.14.5" + handle_npm_uploading( + test_tgz, product_7_14_5, + buckets=[('', TEST_BUCKET, '/', DEFAULT_REGISTRY, 'npm.registry.redhat.com')], + dir_=self.tempdir, do_index=True + ) + + handle_npm_del( + test_tgz, product_7_14_5, + buckets=[('', TEST_BUCKET, '/', DEFAULT_REGISTRY, 'npm.registry.redhat.com')], + dir_=self.tempdir, do_index=True, + cf_enable=True + ) + + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertEqual(1, response.get('InvalidationList').get('Quantity')) + items = response.get('InvalidationList').get('Items') + self.assertEqual(1, len(items)) + self.assertEqual('completed', str.lower(items[0].get('Status'))) diff --git a/tests/test_cf_reindex.py b/tests/test_cf_reindex.py new file mode 100644 index 00000000..0e986af6 --- /dev/null +++ b/tests/test_cf_reindex.py @@ -0,0 +1,78 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from charon.pkgs.maven import handle_maven_uploading +from charon.pkgs.npm import handle_npm_uploading +from charon.pkgs.indexing import re_index +from charon.constants import DEFAULT_REGISTRY +from tests.base import CFBasedTest +from tests.commons import TEST_BUCKET +from tests.constants import INPUTS +from moto import mock_aws +import os +import pytest + + +@mock_aws +class CFReIndexTest(CFBasedTest): + @pytest.mark.skip(reason="Indexing CF invalidation is abandoned") + def test_cf_maven_after_reindex(self): + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertIsNotNone(response) + self.assertEqual(0, response.get('InvalidationList').get('Quantity')) + + test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") + product_456 = "commons-client-4.5.6" + handle_maven_uploading( + test_zip, product_456, + buckets=[('', TEST_BUCKET, 'ga', '', 'maven.repository.redhat.com')], + dir_=self.tempdir + ) + + re_index( + {"bucket": TEST_BUCKET, "prefix": "ga"}, + "org/apache/httpcomponents/httpclient/", "maven" + ) + + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertEqual(1, response.get('InvalidationList').get('Quantity')) + items = response.get('InvalidationList').get('Items') + self.assertEqual(1, len(items)) + self.assertEqual('completed', str.lower(items[0].get('Status'))) + + @pytest.mark.skip(reason="Indexing CF invalidation is abandoned") + def test_cf_npm_after_reindex(self): + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertIsNotNone(response) + self.assertEqual(0, response.get('InvalidationList').get('Quantity')) + + test_tgz = os.path.join(INPUTS, "code-frame-7.14.5.tgz") + product_7_14_5 = "code-frame-7.14.5" + handle_npm_uploading( + test_tgz, product_7_14_5, + buckets=[('', TEST_BUCKET, '/', DEFAULT_REGISTRY, 'npm.registry.redhat.com')], + dir_=self.tempdir, do_index=True + ) + + re_index( + {"bucket": TEST_BUCKET, "prefix": ""}, + "@babel/", "npm" + ) + + response = self.mock_cf.list_invalidations(DistributionId=self.test_dist_id) + self.assertEqual(1, response.get('InvalidationList').get('Quantity')) + items = response.get('InvalidationList').get('Items') + self.assertEqual(1, len(items)) + self.assertEqual('completed', str.lower(items[0].get('Status'))) diff --git a/tests/test_cfclient.py b/tests/test_cfclient.py new file mode 100644 index 00000000..8a38a68e --- /dev/null +++ b/tests/test_cfclient.py @@ -0,0 +1,74 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from tests.base import BaseTest +from tests.constants import TEST_DS_CONFIG +from charon.cache import CFClient +from moto import mock_aws +import boto3 +import pytest + + +@mock_aws +class CFClientTest(BaseTest): + def setUp(self): + super().setUp() + # mock_cf is used to generate expected content + self.mock_cf = self.__prepare_cf() + response = self.mock_cf.create_distribution(DistributionConfig=TEST_DS_CONFIG) + self.test_dist_id = response.get('Distribution').get('Id') + # cf_client is the client we will test + self.cf_client = CFClient() + + def tearDown(self): + self.mock_cf.delete_distribution(Id=self.test_dist_id, IfMatch=".") + super().tearDown() + + def __prepare_cf(self): + return boto3.client('cloudfront') + + def test_get_distribution_id(self): + dist_id = self.cf_client.get_dist_id_by_domain("maven.repository.redhat.com") + self.assertIsNotNone(dist_id) + dist_id = self.cf_client.get_dist_id_by_domain("notexists.redhat.com") + self.assertIsNone(dist_id) + + def test_invalidate_paths_single(self): + dist_id = self.cf_client.get_dist_id_by_domain("maven.repository.redhat.com") + result = self.cf_client.invalidate_paths(dist_id, ["/*"]) + self.assertEqual(len(result), 1) + self.assertTrue(result[0]['Id']) + self.assertEqual('completed', str.lower(result[0]['Status'])) + status = self.cf_client.invalidate_paths("noexists_id", ["/*"]) + self.assertFalse(status) + + def test_invalidate_paths_multi(self): + dist_id = self.cf_client.get_dist_id_by_domain("maven.repository.redhat.com") + result = self.cf_client.invalidate_paths(dist_id, ["/1", "/2", "/3"], batch_size=1) + self.assertEqual(len(result), 3) + for r in result: + self.assertTrue(r['Id']) + self.assertEqual('completed', str.lower(r['Status'])) + + @pytest.mark.skip(reason=""" + Because current moto 5.0.3 has not implemented the get_invalidation(), + this test will fail. Will enable it when the it is implemented in future moto + """) + def test_check_invalidation(self): + dist_id = self.cf_client.get_dist_id_by_domain("maven.repository.redhat.com") + result = self.cf_client.invalidate_paths(dist_id, ["/*"]) + invalidation = self.cf_client.check_invalidation(dist_id, result[0]['Id']) + self.assertIsNotNone(invalidation['Id']) + self.assertEqual('completed', str.lower(result[0]['Status'])) diff --git a/tests/test_manifest_del.py b/tests/test_manifest_del.py index fc5ff35c..b5d42255 100644 --- a/tests/test_manifest_del.py +++ b/tests/test_manifest_del.py @@ -15,7 +15,7 @@ """ import os -from moto import mock_s3 +from moto import mock_aws from charon.pkgs.maven import handle_maven_uploading, handle_maven_del from charon.pkgs.npm import handle_npm_uploading, handle_npm_del @@ -28,7 +28,7 @@ from tests.constants import INPUTS -@mock_s3 +@mock_aws class ManifestDeleteTest(PackageBaseTest): def test_maven_manifest_delete(self): diff --git a/tests/test_manifest_upload.py b/tests/test_manifest_upload.py index e6aa43e9..8a76de8d 100644 --- a/tests/test_manifest_upload.py +++ b/tests/test_manifest_upload.py @@ -15,7 +15,7 @@ """ import os -from moto import mock_s3 +from moto import mock_aws from charon.pkgs.maven import handle_maven_uploading from charon.pkgs.npm import handle_npm_uploading @@ -29,7 +29,7 @@ from tests.constants import INPUTS -@mock_s3 +@mock_aws class ManifestUploadTest(PackageBaseTest): def test_maven_manifest_upload(self): diff --git a/tests/test_maven_del.py b/tests/test_maven_del.py index c26e6d4a..9ce85eaa 100644 --- a/tests/test_maven_del.py +++ b/tests/test_maven_del.py @@ -24,13 +24,13 @@ ARCHETYPE_CATALOG, ARCHETYPE_CATALOG_FILES, COMMONS_CLIENT_459_MVN_NUM, COMMONS_CLIENT_META_NUM ) -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS -@mock_s3 +@mock_aws class MavenDeleteTest(PackageBaseTest): def test_maven_deletion(self): self.__test_prefix_deletion("") diff --git a/tests/test_maven_del_multi_tgts.py b/tests/test_maven_del_multi_tgts.py index ffc60954..c3c93713 100644 --- a/tests/test_maven_del_multi_tgts.py +++ b/tests/test_maven_del_multi_tgts.py @@ -24,13 +24,13 @@ ARCHETYPE_CATALOG, ARCHETYPE_CATALOG_FILES, COMMONS_CLIENT_459_MVN_NUM, COMMONS_CLIENT_META_NUM, TEST_BUCKET_2 ) -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS -@mock_s3 +@mock_aws class MavenDeleteMultiTgtsTest(PackageBaseTest): def setUp(self): super().setUp() diff --git a/tests/test_maven_index.py b/tests/test_maven_index.py index d5647ecd..4952c5d7 100644 --- a/tests/test_maven_index.py +++ b/tests/test_maven_index.py @@ -15,6 +15,7 @@ """ from charon.constants import PROD_INFO_SUFFIX from charon.pkgs.maven import handle_maven_uploading, handle_maven_del +from charon.pkgs.indexing import re_index from charon.storage import CHECKSUM_META_KEY from charon.utils.strings import remove_prefix from tests.base import LONG_TEST_PREFIX, SHORT_TEST_PREFIX, PackageBaseTest @@ -23,13 +24,13 @@ COMMONS_LOGGING_INDEXES, COMMONS_CLIENT_INDEX, COMMONS_CLIENT_456_INDEX, COMMONS_LOGGING_INDEX, COMMONS_ROOT_INDEX ) -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS -@mock_s3 +@mock_aws class MavenFileIndexTest(PackageBaseTest): def test_uploading_index(self): @@ -45,8 +46,6 @@ def test_uploading_index(self): objs = list(test_bucket.objects.all()) actual_files = [obj.key for obj in objs] - self.assertEqual(41, len(actual_files)) - for f in COMMONS_LOGGING_INDEXES: self.assertIn(f, actual_files) @@ -127,6 +126,88 @@ def test_overlap_upload_index(self): self.assertNotIn("../", index_content) self.assertNotIn(PROD_INFO_SUFFIX, index_content) + def test_re_index(self): + test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") + product = "commons-client-4.5.6" + handle_maven_uploading( + test_zip, product, + buckets=[('', TEST_BUCKET, '', '')], + dir_=self.tempdir + ) + + test_bucket = self.mock_s3.Bucket(TEST_BUCKET) + objs = list(test_bucket.objects.all()) + actual_files = [obj.key for obj in objs] + + for f in COMMONS_CLIENT_456_INDEXES: + self.assertIn(f, actual_files) + + self.check_content(objs, [product]) + + indedx_obj = test_bucket.Object(COMMONS_CLIENT_INDEX) + index_content = str(indedx_obj.get()["Body"].read(), "utf-8") + self.assertIn('../', index_content) + self.assertIn('4.5.6/', index_content) + self.assertIn( + '' + 'maven-metadata.xml', + index_content + ) + self.assertIn( + '' + 'maven-metadata.xml.md5', + index_content + ) + self.assertIn( + '' + 'maven-metadata.xml.sha1', + index_content + ) + self.assertIn( + '' + 'maven-metadata.xml.sha256', + index_content + ) + self.assertNotIn("4.5.7/", index_content) + + # insert new in commons-client + commons_client_root = "org/apache/httpcomponents/httpclient/" + commons_client_457_test = commons_client_root + "4.5.7/httpclient-4.5.7.txt" + self.mock_s3.Bucket(TEST_BUCKET).put_object( + Key=commons_client_457_test, + Body="Just a test content" + ) + re_index( + {"bucket": TEST_BUCKET, "prefix": ""}, + commons_client_root, "maven" + ) + indedx_obj = test_bucket.Object(COMMONS_CLIENT_INDEX) + index_content = str(indedx_obj.get()["Body"].read(), "utf-8") + self.assertIn('../', index_content) + self.assertIn('4.5.6/', index_content) + self.assertIn( + '' + 'maven-metadata.xml', + index_content + ) + self.assertIn( + '' + 'maven-metadata.xml.md5', + index_content + ) + self.assertIn( + '' + 'maven-metadata.xml.sha1', + index_content + ) + self.assertIn( + '' + 'maven-metadata.xml.sha256', + index_content + ) + self.assertIn("4.5.7/", index_content) + self.assertNotIn(PROD_INFO_SUFFIX, index_content) + def test_upload_index_with_short_prefix(self): self.__test_upload_index_with_prefix(SHORT_TEST_PREFIX) diff --git a/tests/test_maven_index_multi_tgts.py b/tests/test_maven_index_multi_tgts.py index a02707f2..ddd7bb12 100644 --- a/tests/test_maven_index_multi_tgts.py +++ b/tests/test_maven_index_multi_tgts.py @@ -23,13 +23,13 @@ COMMONS_LOGGING_INDEXES, COMMONS_CLIENT_INDEX, COMMONS_CLIENT_456_INDEX, COMMONS_LOGGING_INDEX, COMMONS_ROOT_INDEX, TEST_BUCKET_2 ) -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS -@mock_s3 +@mock_aws class MavenFileIndexMultiTgtsTest(PackageBaseTest): def setUp(self): super().setUp() diff --git a/tests/test_maven_sign.py b/tests/test_maven_sign.py index 41cab15e..52df5690 100644 --- a/tests/test_maven_sign.py +++ b/tests/test_maven_sign.py @@ -19,13 +19,13 @@ TEST_BUCKET, COMMONS_CLIENT_456_SIGNS, COMMONS_LOGGING_SIGNS, COMMONS_CLIENT_456_INDEX, COMMONS_CLIENT_459_SIGNS ) -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS -@mock_s3 +@mock_aws class MavenFileSignTest(PackageBaseTest): def test_uploading_sign(self): diff --git a/tests/test_maven_upload.py b/tests/test_maven_upload.py index 431475a8..c47d1695 100644 --- a/tests/test_maven_upload.py +++ b/tests/test_maven_upload.py @@ -23,13 +23,13 @@ COMMONS_CLIENT_456_MVN_NUM, COMMONS_CLIENT_MVN_NUM, COMMONS_CLIENT_META_NUM ) -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS -@mock_s3 +@mock_aws class MavenUploadTest(PackageBaseTest): def test_fresh_upload(self): self.__test_prefix_upload("") diff --git a/tests/test_maven_upload_multi_tgts.py b/tests/test_maven_upload_multi_tgts.py index ffb41d20..921e8a9d 100644 --- a/tests/test_maven_upload_multi_tgts.py +++ b/tests/test_maven_upload_multi_tgts.py @@ -24,13 +24,13 @@ COMMONS_CLIENT_456_MVN_NUM, COMMONS_CLIENT_MVN_NUM, COMMONS_CLIENT_META_NUM, TEST_BUCKET_2 ) -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS -@mock_s3 +@mock_aws class MavenUploadMultiTgtsTest(PackageBaseTest): def setUp(self): super().setUp() diff --git a/tests/test_npm_del.py b/tests/test_npm_del.py index 8d6a6df0..5f734b26 100644 --- a/tests/test_npm_del.py +++ b/tests/test_npm_del.py @@ -14,7 +14,7 @@ limitations under the License. """ import os -from moto import mock_s3 +from moto import mock_aws from charon.constants import PROD_INFO_SUFFIX, DEFAULT_REGISTRY from charon.pkgs.npm import handle_npm_uploading, handle_npm_del from charon.storage import CHECKSUM_META_KEY @@ -23,7 +23,7 @@ from tests.constants import INPUTS -@mock_s3 +@mock_aws class NPMDeleteTest(PackageBaseTest): def test_npm_deletion(self): self.__test_prefix() diff --git a/tests/test_npm_del_multi_tgts.py b/tests/test_npm_del_multi_tgts.py index ac1e2c32..a6401db6 100644 --- a/tests/test_npm_del_multi_tgts.py +++ b/tests/test_npm_del_multi_tgts.py @@ -14,7 +14,7 @@ limitations under the License. """ import os -from moto import mock_s3 +from moto import mock_aws from charon.constants import PROD_INFO_SUFFIX, DEFAULT_REGISTRY from charon.pkgs.npm import handle_npm_uploading, handle_npm_del from charon.storage import CHECKSUM_META_KEY @@ -23,7 +23,7 @@ from tests.constants import INPUTS -@mock_s3 +@mock_aws class NPMDeleteMultiTgtsTest(PackageBaseTest): def setUp(self): super().setUp() diff --git a/tests/test_npm_dist_gen.py b/tests/test_npm_dist_gen.py index 438cc094..7fbf58c0 100644 --- a/tests/test_npm_dist_gen.py +++ b/tests/test_npm_dist_gen.py @@ -15,7 +15,7 @@ """ import os import subresource_integrity -from moto import mock_s3 +from moto import mock_aws from charon.pkgs.npm import handle_npm_uploading from charon.utils.files import digest, HashType from tests.base import PackageBaseTest @@ -26,7 +26,7 @@ from tests.constants import INPUTS -@mock_s3 +@mock_aws class NPMUploadTest(PackageBaseTest): def setUp(self): super().setUp() diff --git a/tests/test_npm_index.py b/tests/test_npm_index.py index fa0ebc3a..b435f765 100644 --- a/tests/test_npm_index.py +++ b/tests/test_npm_index.py @@ -15,13 +15,14 @@ """ from charon.constants import PROD_INFO_SUFFIX, DEFAULT_REGISTRY from charon.pkgs.npm import handle_npm_uploading, handle_npm_del +from charon.pkgs.indexing import re_index from charon.storage import CHECKSUM_META_KEY from tests.base import LONG_TEST_PREFIX, SHORT_TEST_PREFIX, PackageBaseTest from tests.commons import ( TEST_BUCKET, CODE_FRAME_7_14_5_INDEXES, CODE_FRAME_7_15_8_INDEXES, COMMONS_ROOT_INDEX ) -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS @@ -29,7 +30,7 @@ NAMESPACE_BABEL_INDEX = "@babel/index.html" -@mock_s3 +@mock_aws class NpmFileIndexTest(PackageBaseTest): def test_uploading_index(self): self.__test_upload_prefix() @@ -182,3 +183,117 @@ def __prepare_content(self, prefix: str = None): buckets=[('', TEST_BUCKET, prefix, DEFAULT_REGISTRY)], dir_=self.tempdir ) + + def test_re_index(self): + test_tgz = os.path.join(INPUTS, "code-frame-7.14.5.tgz") + product_7_14_5 = "code-frame-7.14.5" + prefix = SHORT_TEST_PREFIX + + handle_npm_uploading( + test_tgz, product_7_14_5, + buckets=[('', TEST_BUCKET, SHORT_TEST_PREFIX, DEFAULT_REGISTRY)], + dir_=self.tempdir, + ) + + test_bucket = self.mock_s3.Bucket(TEST_BUCKET) + objs = list(test_bucket.objects.all()) + actual_files = [obj.key for obj in objs] + + prefixed_7158_indexes = [ + os.path.join(prefix, f) for f in CODE_FRAME_7_15_8_INDEXES + ] + prefixed_namespace_babel_index = os.path.join(prefix, NAMESPACE_BABEL_INDEX) + prefixed_root_index = os.path.join(prefix, COMMONS_ROOT_INDEX) + + for assert_file in prefixed_7158_indexes: + self.assertNotIn(assert_file, actual_files) + + # test package path + index_obj = test_bucket.Object(prefixed_namespace_babel_index) + index_content = str(index_obj.get()["Body"].read(), "utf-8") + self.assertIn('code-frame/', + index_content) + test_file_path = os.path.join(prefix, "@babel/test/test-file.txt") + self.assertNotIn( + '' + 'test/test-file.txt', index_content + ) + # Add entry and re-index package path + test_bucket.put_object( + Key=test_file_path, Body="test content" + ) + re_index( + {"bucket": TEST_BUCKET, "prefix": prefix}, + "@babel/", "npm" + ) + index_obj = test_bucket.Object(prefixed_namespace_babel_index) + index_content = str(index_obj.get()["Body"].read(), "utf-8") + self.assertIn( + 'code-frame/', index_content + ) + self.assertIn( + 'test/', index_content + ) + self.assertIn( + '../', index_content + ) + self.assertNotIn(PROD_INFO_SUFFIX, index_content) + + # test root path + index_obj = test_bucket.Object(prefixed_root_index) + index_content = str(index_obj.get()["Body"].read(), "utf-8") + self.assertIn('@babel/', index_content) + test_file_path = os.path.join(prefix, "test/test-file.txt") + self.assertNotIn( + '' + 'test/test-file.txt', index_content + ) + # Add entry and re-index root + test_bucket.put_object( + Key=test_file_path, Body="test content" + ) + re_index( + {"bucket": TEST_BUCKET, "prefix": prefix}, + "/", "npm" + ) + index_obj = test_bucket.Object(prefixed_root_index) + index_content = str(index_obj.get()["Body"].read(), "utf-8") + self.assertIn('@babel/', index_content) + self.assertIn( + '' + 'test/', index_content + ) + self.assertNotIn('../', index_content) + self.assertNotIn(PROD_INFO_SUFFIX, index_content) + + # Test metadata path + metadata_path = "@babel/code-frame/" + objs = list(test_bucket.objects.all()) + actual_files = [obj.key for obj in objs] + self.assertIn( + os.path.join(prefix, metadata_path, "package.json"), + actual_files + ) + self.assertNotIn( + os.path.join(prefix, metadata_path, "index.html"), + actual_files + ) + # Add entry and re-index metadata path + test_file_path = os.path.join(prefix, metadata_path, "test/test-file.txt") + test_bucket.put_object( + Key=test_file_path, Body="test content" + ) + re_index( + {"bucket": TEST_BUCKET, "prefix": prefix}, + metadata_path, "npm" + ) + objs = list(test_bucket.objects.all()) + actual_files = [obj.key for obj in objs] + self.assertIn( + os.path.join(prefix, metadata_path, "package.json"), + actual_files + ) + self.assertNotIn( + os.path.join(prefix, metadata_path, "index.html"), + actual_files + ) diff --git a/tests/test_npm_index_multi_tgts.py b/tests/test_npm_index_multi_tgts.py index ef653303..acb882a4 100644 --- a/tests/test_npm_index_multi_tgts.py +++ b/tests/test_npm_index_multi_tgts.py @@ -22,7 +22,7 @@ CODE_FRAME_7_15_8_INDEXES, COMMONS_ROOT_INDEX, TEST_BUCKET_2 ) -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS @@ -30,7 +30,7 @@ NAMESPACE_BABEL_INDEX = "@babel/index.html" -@mock_s3 +@mock_aws class NpmFileIndexMultiTgtsTest(PackageBaseTest): def setUp(self): super().setUp() diff --git a/tests/test_npm_meta.py b/tests/test_npm_meta.py index a0627e2b..6d112efd 100644 --- a/tests/test_npm_meta.py +++ b/tests/test_npm_meta.py @@ -16,7 +16,7 @@ import os import boto3 -from moto import mock_s3 +from moto import mock_aws from charon.pkgs.npm import handle_npm_uploading, read_package_metadata_from_content from charon.storage import S3Client @@ -27,7 +27,7 @@ MY_BUCKET = "npm_bucket" -@mock_s3 +@mock_aws class NPMMetadataOnS3Test(BaseTest): def setUp(self): super().setUp() diff --git a/tests/test_npm_upload.py b/tests/test_npm_upload.py index 3438ad61..53767301 100644 --- a/tests/test_npm_upload.py +++ b/tests/test_npm_upload.py @@ -15,7 +15,7 @@ """ import os -from moto import mock_s3 +from moto import mock_aws from charon.pkgs.npm import handle_npm_uploading from charon.pkgs.pkg_utils import is_metadata @@ -29,7 +29,7 @@ from tests.constants import INPUTS -@mock_s3 +@mock_aws class NPMUploadTest(PackageBaseTest): def test_npm_upload(self): diff --git a/tests/test_npm_upload_diff_pkgs.py b/tests/test_npm_upload_diff_pkgs.py new file mode 100644 index 00000000..0dab2e66 --- /dev/null +++ b/tests/test_npm_upload_diff_pkgs.py @@ -0,0 +1,109 @@ +""" +Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import os + +from moto import mock_aws + +from charon.pkgs.npm import handle_npm_uploading +from charon.constants import DEFAULT_REGISTRY +from tests.base import PackageBaseTest +from tests.commons import TEST_BUCKET +from tests.constants import INPUTS +import logging + +logger = logging.getLogger(f"charon.tests.{__name__}") + +CODE_FRAME_FILES_REDHAT = [ + "@redhat/code-frame/7.14.5/package.json", + "@redhat/code-frame/-/code-frame-7.14.5-multi-pkgs.tgz" +] + +CODE_FRAME_META_REDHAT = "@redhat/code-frame/package.json" + +CODE_FRAME_FILES_BABEL = [ + "@babel/code-frame/7.14.5/package.json", + "@babel/code-frame/-/code-frame-7.14.5-no-root-pkg.tgz" +] + +CODE_FRAME_META_BABEL = "@babel/code-frame/package.json" + + +@mock_aws +class NPMUploadTest(PackageBaseTest): + + def test_npm_uploads_multi_pkgjson_with_root(self): + test_tgz = os.path.join(INPUTS, "code-frame-7.14.5-multi-pkgs.tgz") + product_7_14_5 = "code-frame-7.14.5" + handle_npm_uploading( + test_tgz, product_7_14_5, + buckets=[('', TEST_BUCKET, '', DEFAULT_REGISTRY)], + dir_=self.tempdir, do_index=False + ) + test_bucket = self.mock_s3.Bucket(TEST_BUCKET) + objs = list(test_bucket.objects.all()) + actual_files = [obj.key for obj in objs] + logger.debug("actual_files: %s", actual_files) + self.assertEqual(5, len(actual_files)) + + for f in CODE_FRAME_FILES_REDHAT: + self.assertIn(f, actual_files) + self.check_product(f, [product_7_14_5]) + self.assertIn(CODE_FRAME_META_REDHAT, actual_files) + + meta_obj_client = test_bucket.Object(CODE_FRAME_META_REDHAT) + meta_content_client = str(meta_obj_client.get()["Body"].read(), "utf-8") + self.assertIn("\"name\": \"@redhat/code-frame\"", meta_content_client) + self.assertIn("\"description\": \"Generate errors that contain a code frame that point to " + "source locations.\"", meta_content_client) + self.assertIn("\"repository\": {\"type\": \"git\", \"url\": " + "\"https://github.com/babel/babel.git\"", meta_content_client) + self.assertIn("\"version\": \"7.14.5\"", meta_content_client) + self.assertIn("\"versions\": {", meta_content_client) + self.assertIn("\"7.14.5\": {\"name\":", meta_content_client) + self.assertIn("\"license\": \"MIT\"", meta_content_client) + self.assertNotIn("\"dist_tags\":", meta_content_client) + + def test_npm_uploads_multi_pkgjson_with_no_root(self): + test_tgz = os.path.join(INPUTS, "code-frame-7.14.5-no-root-pkg.tgz") + product_7_14_5 = "code-frame-7.14.5" + handle_npm_uploading( + test_tgz, product_7_14_5, + buckets=[('', TEST_BUCKET, '', DEFAULT_REGISTRY)], + dir_=self.tempdir, do_index=False + ) + test_bucket = self.mock_s3.Bucket(TEST_BUCKET) + objs = list(test_bucket.objects.all()) + actual_files = [obj.key for obj in objs] + logger.debug("actual_files: %s", actual_files) + self.assertEqual(5, len(actual_files)) + + for f in CODE_FRAME_FILES_BABEL: + self.assertIn(f, actual_files) + self.check_product(f, [product_7_14_5]) + self.assertIn(CODE_FRAME_META_BABEL, actual_files) + + meta_obj_client = test_bucket.Object(CODE_FRAME_META_BABEL) + meta_content_client = str(meta_obj_client.get()["Body"].read(), "utf-8") + self.assertIn("\"name\": \"@babel/code-frame\"", meta_content_client) + self.assertIn("\"description\": \"Generate errors that contain a code frame that point to " + "source locations.\"", meta_content_client) + self.assertIn("\"repository\": {\"type\": \"git\", \"url\": " + "\"https://github.com/babel/babel.git\"", meta_content_client) + self.assertIn("\"version\": \"7.14.5\"", meta_content_client) + self.assertIn("\"versions\": {", meta_content_client) + self.assertIn("\"7.14.5\": {\"name\":", meta_content_client) + self.assertIn("\"license\": \"MIT\"", meta_content_client) + self.assertNotIn("\"dist_tags\":", meta_content_client) diff --git a/tests/test_npm_upload_multi_tgts.py b/tests/test_npm_upload_multi_tgts.py index 82a265f7..242937a7 100644 --- a/tests/test_npm_upload_multi_tgts.py +++ b/tests/test_npm_upload_multi_tgts.py @@ -15,7 +15,7 @@ """ import os -from moto import mock_s3 +from moto import mock_aws from charon.pkgs.npm import handle_npm_uploading from charon.pkgs.pkg_utils import is_metadata @@ -29,7 +29,7 @@ from tests.constants import INPUTS -@mock_s3 +@mock_aws class NPMUploadMultiTgtsTest(PackageBaseTest): def setUp(self): super().setUp() diff --git a/tests/test_pkgs_dryrun.py b/tests/test_pkgs_dryrun.py index 7f2b004e..3b82d1b4 100644 --- a/tests/test_pkgs_dryrun.py +++ b/tests/test_pkgs_dryrun.py @@ -18,13 +18,13 @@ from charon.constants import DEFAULT_REGISTRY from tests.base import PackageBaseTest from tests.commons import TEST_BUCKET -from moto import mock_s3 +from moto import mock_aws import os from tests.constants import INPUTS -@mock_s3 +@mock_aws class PkgsDryRunTest(PackageBaseTest): def test_maven_upload_dry_run(self): test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip") diff --git a/tests/test_s3client.py b/tests/test_s3client.py index 1c78db2b..48063daa 100644 --- a/tests/test_s3client.py +++ b/tests/test_s3client.py @@ -19,7 +19,7 @@ from charon.utils.files import overwrite_file, read_sha1 from charon.constants import PROD_INFO_SUFFIX from tests.base import BaseTest, SHORT_TEST_PREFIX -from moto import mock_s3 +from moto import mock_aws import boto3 import os import sys @@ -35,7 +35,7 @@ COMMONS_LANG3_ZIP_MVN_ENTRY = 26 -@mock_s3 +@mock_aws class S3ClientTest(BaseTest): def setUp(self): super().setUp() @@ -390,6 +390,79 @@ def test_exists_override_failing(self): file_obj = bucket.Object(path) self.assertEqual(sha1, file_obj.metadata[CHECKSUM_META_KEY]) + def test_simple_upload_file(self): + (temp_root, _, all_files) = self.__prepare_files() + for file_path in all_files: + file_key = file_path[len(temp_root) + 1:] + file_content = open(file_path, "rb").read() + sha1 = read_sha1(file_path) + self.s3_client.simple_upload_file( + file_path=file_key, + file_content=file_content, + check_sum_sha1=sha1, + target=(MY_BUCKET, '') + ) + bucket = self.mock_s3.Bucket(MY_BUCKET) + + objects = list(bucket.objects.all()) + self.assertEqual(len(all_files), len(objects)) + file_path = all_files[0] + file_key = file_path[len(temp_root) + 1:] + file_content = open(file_path, "rb").read() + sha1 = read_sha1(file_path) + obj = bucket.Object(file_key) + self.assertEqual(sha1, obj.metadata[CHECKSUM_META_KEY]) + self.assertEqual(file_key, obj.key) + self.assertEqual( + str(file_content, sys.getdefaultencoding()), + str(obj.get()["Body"].read(), sys.getdefaultencoding()) + ) + + # test upload exists + self.assertRaises( + FileExistsError, + self.s3_client.simple_upload_file, + file_path=file_key, + file_content="file_content", + check_sum_sha1=sha1, + target=(MY_BUCKET, '') + ) + + shutil.rmtree(temp_root) + + def test_simple_delete_file(self): + # prepare files + (temp_root, _, all_files) = self.__prepare_files() + for file_path in all_files: + file_key = file_path[len(temp_root) + 1:] + file_content = open(file_path, "rb").read() + sha1 = read_sha1(file_path) + self.s3_client.simple_upload_file( + file_path=file_key, + file_content=file_content, + check_sum_sha1=sha1, + target=(MY_BUCKET, '') + ) + bucket = self.mock_s3.Bucket(MY_BUCKET) + + objects = list(bucket.objects.all()) + self.assertEqual(len(all_files), len(objects)) + + # test delete file start + file_key = all_files[0][len(temp_root) + 1:] + objects = list(bucket.objects.all()) + self.assertIn(file_key, [o.key for o in objects]) + self.s3_client.simple_delete_file( + file_path=file_key, + target=(MY_BUCKET, "") + ) + + objects = list(bucket.objects.all()) + self.assertEqual(len(all_files) - 1, len(objects)) + self.assertNotIn(file_key, [o.key for o in objects]) + + shutil.rmtree(temp_root) + def __prepare_files(self): test_zip = zipfile.ZipFile( os.path.join(INPUTS, "commons-lang3.zip") diff --git a/tests/test_util.py b/tests/test_util.py index 35c9deff..7105491d 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ -from charon.utils.files import digest, read_sha1, HashType +from charon.utils.files import digest, digest_content, read_sha1, HashType import os import unittest @@ -29,6 +29,14 @@ def test_digest(self): digest(test_file, HashType.SHA256), ) + def test_digest_content(self): + test_content = "test common content" + self.assertEqual("8c7b70f25fb88bc6a0372f70f6805132e90e2029", digest_content(test_content)) + self.assertEqual( + "1a1c26da1f6830614ed0388bb30d9e849e05bba5de4031e2a2fa6b48032f5354", + digest_content(test_content, HashType.SHA256), + ) + def test_read_sha1(self): test_file = os.path.join(INPUTS, "commons-lang3.zip") # read the real sha1 hash