Merge e7cf9ef into 380df25

ligangty · web-flow · commit e46ae6ba0699 · 2024-02-22T09:10:57.000Z
diff --git a/.gitignore b/.gitignore
@@ -16,3 +16,6 @@ package/
 # Unit test
 __pytest_reports
 htmlcov
+
+# Generated when local run
+*.log
diff --git a/README.md b/README.md
@@ -96,3 +96,13 @@ This command will delete some paths from repo in S3.
   but not delete the artifacts themselves.
 * During or after the paths' deletion, regenerate the
   metadata files and index files for both types.
+
+### charon-index: refresh the index.html for the specified path
+
+```bash
+usage: charon index $PATH [-t, --target] [-D, --debug] [-q, --quiet]
+```
+
+This command will refresh the index.html for the specified path.
+
+* Note that if the path is a NPM metadata path which contains package.json, this refreshment will not work because this type of folder will display the package.json instead of the index.html in http request.
diff --git a/charon/cmd/__init__.py b/charon/cmd/__init__.py
@@ -16,6 +16,7 @@
 from click import group
 from charon.cmd.cmd_upload import upload
 from charon.cmd.cmd_delete import delete
+from charon.cmd.cmd_index import index
 
 
 @group()
@@ -29,3 +30,4 @@ def cli():
 # init group command
 cli.add_command(upload)
 cli.add_command(delete)
+cli.add_command(index)
diff --git a/charon/cmd/cmd_index.py b/charon/cmd/cmd_index.py
@@ -0,0 +1,120 @@
+"""
+Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+         http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from charon.config import get_config
+from charon.cmd.internal import _decide_mode
+from charon.pkgs.indexing import re_index
+from charon.constants import PACKAGE_TYPE_MAVEN, PACKAGE_TYPE_NPM
+from click import command, option, argument
+
+import traceback
+import logging
+import os
+import sys
+
+logger = logging.getLogger(__name__)
+
+
+@argument(
+    "path",
+    type=str,
+)
+@option(
+    "--target",
+    "-t",
+    help="""
+    The target to do the index refreshing, which will decide
+    which s3 bucket and what root path where all files will
+    be deleted from.
+    """,
+    required=True
+)
+@option(
+    "--debug",
+    "-D",
+    help="Debug mode, will print all debug logs for problem tracking.",
+    is_flag=True,
+    default=False
+)
+@option(
+    "--quiet",
+    "-q",
+    help="Quiet mode, will shrink most of the logs except warning and errors.",
+    is_flag=True,
+    default=False
+)
+@option("--dryrun", "-n", is_flag=True, default=False)
+@command()
+def index(
+    path: str,
+    target: str,
+    debug: bool = False,
+    quiet: bool = False,
+    dryrun: bool = False
+):
+    """This command will re-generate the index.html files for the
+    specified path.
+    """
+    _decide_mode(
+        "index-{}".format(target), path.replace("/", "_"),
+        is_quiet=quiet, is_debug=debug
+    )
+    try:
+        conf = get_config()
+        if not conf:
+            sys.exit(1)
+
+        aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile()
+        if not aws_profile:
+            logger.error("No AWS profile specified!")
+            sys.exit(1)
+
+        tgt = conf.get_target(target)
+        if not tgt:
+            # log is recorded get_target
+            sys.exit(1)
+
+        aws_bucket = None
+        prefix = None
+        for b in conf.get_target(target):
+            aws_bucket = b.get('bucket')
+            prefix = b.get('prefix', '')
+
+        package_type = None
+        if "maven" in aws_bucket:
+            logger.info(
+                "The target is a maven repository. Will refresh the index as maven package type"
+            )
+            package_type = PACKAGE_TYPE_MAVEN
+        elif "npm" in aws_bucket:
+            package_type = PACKAGE_TYPE_NPM
+            logger.info(
+                "The target is a npm repository. Will refresh the index as npm package type"
+            )
+        else:
+            logger.error(
+                "The target is not supported. Only maven or npm target is supported."
+            )
+            sys.exit(1)
+
+        if not aws_bucket:
+            logger.error("No bucket specified!")
+            sys.exit(1)
+
+        re_index(aws_bucket, prefix, path, package_type, aws_profile, dryrun)
+    except Exception:
+        print(traceback.format_exc())
+        sys.exit(2)  # distinguish between exception and bad config or bad state
diff --git a/charon/pkgs/indexing.py b/charon/pkgs/indexing.py
@@ -17,6 +17,7 @@
 from charon.storage import S3Client
 from charon.constants import (INDEX_HTML_TEMPLATE, NPM_INDEX_HTML_TEMPLATE,
                               PACKAGE_TYPE_MAVEN, PACKAGE_TYPE_NPM, PROD_INFO_SUFFIX)
+from charon.utils.files import digest_content
 from jinja2 import Template
 import os
 import logging
@@ -149,6 +150,17 @@ def __generate_index_html(
 
 
 def __to_html(package_type: str, contents: List[str], folder: str, top_level: str) -> str:
+    html_content = __to_html_content(package_type, contents, folder)
+    html_path = os.path.join(top_level, folder, "index.html")
+    if folder == "/":
+        html_path = os.path.join(top_level, "index.html")
+    os.makedirs(os.path.dirname(html_path), exist_ok=True)
+    with open(html_path, 'w', encoding='utf-8') as html:
+        html.write(html_content)
+    return html_path
+
+
+def __to_html_content(package_type: str, contents: List[str], folder: str) -> str:
     items = []
     if folder != "/":
         items.append("../")
@@ -160,13 +172,7 @@ def __to_html(package_type: str, contents: List[str], folder: str, top_level: st
         items.extend(contents)
     items = __sort_index_items(items)
     index = IndexedHTML(title=folder, header=folder, items=items)
-    html_path = os.path.join(top_level, folder, "index.html")
-    if folder == "/":
-        html_path = os.path.join(top_level, "index.html")
-    os.makedirs(os.path.dirname(html_path), exist_ok=True)
-    with open(html_path, 'w', encoding='utf-8') as html:
-        html.write(index.generate_index_file_content(package_type))
-    return html_path
+    return index.generate_index_file_content(package_type)
 
 
 def __sort_index_items(items):
@@ -250,3 +256,60 @@ def __compare(self, other) -> int:
             return -1
         else:
             return 0
+
+
+def re_index(
+    bucket: str,
+    prefix: str,
+    path: str,
+    package_type: str,
+    aws_profile: str = None,
+    dry_run: bool = False
+):
+    """Refresh the index.html for the specified folder in the bucket.
+    """
+    s3_client = S3Client(aws_profile=aws_profile, dry_run=dry_run)
+    s3_folder = os.path.join(prefix, path)
+    if path.strip() == "" or path.strip() == "/":
+        s3_folder = prefix
+    items: List[str] = s3_client.list_folder_content(bucket, s3_folder)
+    contents = [i for i in items if not i.endswith(PROD_INFO_SUFFIX)]
+    if PACKAGE_TYPE_NPM == package_type:
+        if any([True if "package.json" in c else False for c in contents]):
+            logger.warn(
+                "The path %s contains NPM package.json which will work as "
+                "package metadata for indexing. This indexing is ignored.",
+                path
+            )
+            return
+
+    if len(contents) >= 1:
+        real_contents = []
+        if prefix and prefix.strip() != "":
+            for c in contents:
+                if c.strip() != "":
+                    if c.startswith(prefix):
+                        real_c = remove_prefix(c, prefix)
+                        real_c = remove_prefix(real_c, "/")
+                        real_contents.append(real_c)
+                    else:
+                        real_contents.append(c)
+        else:
+            real_contents = contents
+        logger.debug(real_contents)
+        index_content = __to_html_content(package_type, real_contents, path)
+        if not dry_run:
+            index_path = os.path.join(path, "index.html")
+            if path == "/":
+                index_path = "index.html"
+            s3_client.simple_delete_file(index_path, (bucket, prefix))
+            s3_client.simple_upload_file(
+                index_path, index_content, (bucket, prefix),
+                "text/html", digest_content(index_content)
+            )
+    else:
+        logger.warning(
+            "The path %s does not contain any contents in bucket %s. "
+            "Will not do any re-indexing",
+            path, bucket
+        )
diff --git a/charon/storage.py b/charon/storage.py
@@ -530,10 +530,11 @@ def delete_files(
         self, file_paths: List[str], target: Tuple[str, str],
         product: Optional[str], root="/"
     ) -> List[str]:
-        """ Deletes a list of files to s3 bucket. * Use the cut down file path as s3 key. The cut
-        down way is move root from the file path if it starts with root. Example: if file_path is
-        /tmp/maven-repo/org/apache/.... and root is /tmp/maven-repo Then the key will be
-        org/apache/.....
+        """ Deletes a list of files to s3 bucket.
+            * Use the cut down file path as s3 key. The cut
+            down way is move root from the file path if it starts with root.
+            Example: if file_path is /tmp/maven-repo/org/apache/.... and
+            root is /tmp/maven-repo Then the key will be org/apache/.....
             * The removing will happen with conditions of product checking. First the deletion
             will remove The product from the file metadata "rh-products". After the metadata
             removing, if there still are extra products left in that metadata, the file will not
@@ -637,6 +638,90 @@ async def path_delete_handler(
 
         return failed_files
 
+    def simple_delete_file(
+        self, file_path: str, target: Tuple[str, str]
+    ):
+        """ Deletes file in s3 bucket, regardless of any extra
+            information like product and version info.
+            * Warning: this will directly delete the files even if
+            it has lots of product info, so please be careful to use.
+            If you want to delete product artifact files, please use
+            delete_files
+        """
+        bucket = target[0]
+        prefix = target[1]
+        bucket_obj = self.__get_bucket(bucket)
+        path_key = os.path.join(prefix, file_path)
+        file_object = bucket_obj.Object(path_key)
+        existed = False
+        try:
+            existed = self.__file_exists(file_object)
+            if existed:
+                bucket_obj.delete_objects(Delete={"Objects": [{"Key": path_key}]})
+            else:
+                logger.warning(
+                    'Warning: File %s does not exist in S3 bucket %s, will ignore its deleting',
+                    file_path, bucket
+                )
+        except (ClientError, HTTPClientError) as e:
+            logger.error(
+                "Error: file existence check failed due to error: %s", e
+            )
+
+    def simple_upload_file(
+        self, file_path: str, file_content: str,
+        target: Tuple[str, str],
+        mime_type: str = None,
+        check_sum_sha1: str = None
+    ):
+        """ Uploads file to s3 bucket, regardless of any extra
+            information like product and version info.
+            * Warning: this will directly delete the files even if
+            it has lots of product info, so please be careful to use.
+            If you want to upload product artifact files, please use
+            upload_files
+        """
+        bucket = target[0]
+        prefix = target[1]
+        bucket_obj = self.__get_bucket(bucket)
+        path_key = os.path.join(prefix, file_path)
+        file_object = bucket_obj.Object(path_key)
+        existed = False
+        logger.debug(
+            'Uploading %s to bucket %s', path_key, bucket
+        )
+        existed = False
+        try:
+            existed = self.__file_exists(file_object)
+        except (ClientError, HTTPClientError) as e:
+            logger.error(
+                "Error: file existence check failed due to error: %s", e
+            )
+            return
+
+        content_type = mime_type
+        if not content_type:
+            content_type = DEFAULT_MIME_TYPE
+        if not existed:
+            f_meta = {}
+            if check_sum_sha1 and check_sum_sha1.strip() != "":
+                f_meta[CHECKSUM_META_KEY] = check_sum_sha1
+            try:
+                if not self.__dry_run:
+                    file_object.put(
+                        Body=file_content,
+                        Metadata=f_meta,
+                        ContentType=content_type
+                    )
+                logger.debug('Uploaded %s to bucket %s', file_path, bucket)
+            except (ClientError, HTTPClientError) as e:
+                logger.error(
+                    "ERROR: file %s not uploaded to bucket %s due to error: %s ",
+                    file_path, bucket, e
+                    )
+        else:
+            raise FileExistsError("Error: file %s already exists, upload is forbiden.")
+
     def delete_manifest(self, product_key: str, target: str, manifest_bucket_name: str):
         if not manifest_bucket_name:
             logger.warning(
diff --git a/tests/test_maven_index.py b/tests/test_maven_index.py
diff --git a/tests/test_npm_index.py b/tests/test_npm_index.py
diff --git a/tests/test_s3client.py b/tests/test_s3client.py