diff --git a/.gitignore b/.gitignore
index 8ca90496..b32671f8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,3 +16,6 @@ package/
# Unit test
__pytest_reports
htmlcov
+
+# Generated when local run
+*.log
diff --git a/README.md b/README.md
index cdd1608d..887166c0 100644
--- a/README.md
+++ b/README.md
@@ -96,3 +96,13 @@ This command will delete some paths from repo in S3.
but not delete the artifacts themselves.
* During or after the paths' deletion, regenerate the
metadata files and index files for both types.
+
+### charon-index: refresh the index.html for the specified path
+
+```bash
+usage: charon index $PATH [-t, --target] [-D, --debug] [-q, --quiet]
+```
+
+This command will refresh the index.html for the specified path.
+
+* Note that if the path is a NPM metadata path which contains package.json, this refreshment will not work because this type of folder will display the package.json instead of the index.html in http request.
diff --git a/charon/cmd/__init__.py b/charon/cmd/__init__.py
index a9834e1a..9a3084d0 100644
--- a/charon/cmd/__init__.py
+++ b/charon/cmd/__init__.py
@@ -16,6 +16,7 @@
from click import group
from charon.cmd.cmd_upload import upload
from charon.cmd.cmd_delete import delete
+from charon.cmd.cmd_index import index
@group()
@@ -29,3 +30,4 @@ def cli():
# init group command
cli.add_command(upload)
cli.add_command(delete)
+cli.add_command(index)
diff --git a/charon/cmd/cmd_index.py b/charon/cmd/cmd_index.py
new file mode 100644
index 00000000..281ed876
--- /dev/null
+++ b/charon/cmd/cmd_index.py
@@ -0,0 +1,120 @@
+"""
+Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from charon.config import get_config
+from charon.cmd.internal import _decide_mode
+from charon.pkgs.indexing import re_index
+from charon.constants import PACKAGE_TYPE_MAVEN, PACKAGE_TYPE_NPM
+from click import command, option, argument
+
+import traceback
+import logging
+import os
+import sys
+
+logger = logging.getLogger(__name__)
+
+
+@argument(
+ "path",
+ type=str,
+)
+@option(
+ "--target",
+ "-t",
+ help="""
+ The target to do the index refreshing, which will decide
+ which s3 bucket and what root path where all files will
+ be deleted from.
+ """,
+ required=True
+)
+@option(
+ "--debug",
+ "-D",
+ help="Debug mode, will print all debug logs for problem tracking.",
+ is_flag=True,
+ default=False
+)
+@option(
+ "--quiet",
+ "-q",
+ help="Quiet mode, will shrink most of the logs except warning and errors.",
+ is_flag=True,
+ default=False
+)
+@option("--dryrun", "-n", is_flag=True, default=False)
+@command()
+def index(
+ path: str,
+ target: str,
+ debug: bool = False,
+ quiet: bool = False,
+ dryrun: bool = False
+):
+ """This command will re-generate the index.html files for the
+ specified path.
+ """
+ _decide_mode(
+ "index-{}".format(target), path.replace("/", "_"),
+ is_quiet=quiet, is_debug=debug
+ )
+ try:
+ conf = get_config()
+ if not conf:
+ sys.exit(1)
+
+ aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile()
+ if not aws_profile:
+ logger.error("No AWS profile specified!")
+ sys.exit(1)
+
+ tgt = conf.get_target(target)
+ if not tgt:
+ # log is recorded get_target
+ sys.exit(1)
+
+ aws_bucket = None
+ prefix = None
+ for b in conf.get_target(target):
+ aws_bucket = b.get('bucket')
+ prefix = b.get('prefix', '')
+
+ package_type = None
+ if "maven" in aws_bucket:
+ logger.info(
+ "The target is a maven repository. Will refresh the index as maven package type"
+ )
+ package_type = PACKAGE_TYPE_MAVEN
+ elif "npm" in aws_bucket:
+ package_type = PACKAGE_TYPE_NPM
+ logger.info(
+ "The target is a npm repository. Will refresh the index as npm package type"
+ )
+ else:
+ logger.error(
+ "The target is not supported. Only maven or npm target is supported."
+ )
+ sys.exit(1)
+
+ if not aws_bucket:
+ logger.error("No bucket specified!")
+ sys.exit(1)
+
+ re_index(aws_bucket, prefix, path, package_type, aws_profile, dryrun)
+ except Exception:
+ print(traceback.format_exc())
+ sys.exit(2) # distinguish between exception and bad config or bad state
diff --git a/charon/pkgs/indexing.py b/charon/pkgs/indexing.py
index f478e0a5..b342c071 100644
--- a/charon/pkgs/indexing.py
+++ b/charon/pkgs/indexing.py
@@ -17,6 +17,7 @@
from charon.storage import S3Client
from charon.constants import (INDEX_HTML_TEMPLATE, NPM_INDEX_HTML_TEMPLATE,
PACKAGE_TYPE_MAVEN, PACKAGE_TYPE_NPM, PROD_INFO_SUFFIX)
+from charon.utils.files import digest_content
from jinja2 import Template
import os
import logging
@@ -149,6 +150,17 @@ def __generate_index_html(
def __to_html(package_type: str, contents: List[str], folder: str, top_level: str) -> str:
+ html_content = __to_html_content(package_type, contents, folder)
+ html_path = os.path.join(top_level, folder, "index.html")
+ if folder == "/":
+ html_path = os.path.join(top_level, "index.html")
+ os.makedirs(os.path.dirname(html_path), exist_ok=True)
+ with open(html_path, 'w', encoding='utf-8') as html:
+ html.write(html_content)
+ return html_path
+
+
+def __to_html_content(package_type: str, contents: List[str], folder: str) -> str:
items = []
if folder != "/":
items.append("../")
@@ -160,13 +172,7 @@ def __to_html(package_type: str, contents: List[str], folder: str, top_level: st
items.extend(contents)
items = __sort_index_items(items)
index = IndexedHTML(title=folder, header=folder, items=items)
- html_path = os.path.join(top_level, folder, "index.html")
- if folder == "/":
- html_path = os.path.join(top_level, "index.html")
- os.makedirs(os.path.dirname(html_path), exist_ok=True)
- with open(html_path, 'w', encoding='utf-8') as html:
- html.write(index.generate_index_file_content(package_type))
- return html_path
+ return index.generate_index_file_content(package_type)
def __sort_index_items(items):
@@ -250,3 +256,60 @@ def __compare(self, other) -> int:
return -1
else:
return 0
+
+
+def re_index(
+ bucket: str,
+ prefix: str,
+ path: str,
+ package_type: str,
+ aws_profile: str = None,
+ dry_run: bool = False
+):
+ """Refresh the index.html for the specified folder in the bucket.
+ """
+ s3_client = S3Client(aws_profile=aws_profile, dry_run=dry_run)
+ s3_folder = os.path.join(prefix, path)
+ if path.strip() == "" or path.strip() == "/":
+ s3_folder = prefix
+ items: List[str] = s3_client.list_folder_content(bucket, s3_folder)
+ contents = [i for i in items if not i.endswith(PROD_INFO_SUFFIX)]
+ if PACKAGE_TYPE_NPM == package_type:
+ if any([True if "package.json" in c else False for c in contents]):
+ logger.warn(
+ "The path %s contains NPM package.json which will work as "
+ "package metadata for indexing. This indexing is ignored.",
+ path
+ )
+ return
+
+ if len(contents) >= 1:
+ real_contents = []
+ if prefix and prefix.strip() != "":
+ for c in contents:
+ if c.strip() != "":
+ if c.startswith(prefix):
+ real_c = remove_prefix(c, prefix)
+ real_c = remove_prefix(real_c, "/")
+ real_contents.append(real_c)
+ else:
+ real_contents.append(c)
+ else:
+ real_contents = contents
+ logger.debug(real_contents)
+ index_content = __to_html_content(package_type, real_contents, path)
+ if not dry_run:
+ index_path = os.path.join(path, "index.html")
+ if path == "/":
+ index_path = "index.html"
+ s3_client.simple_delete_file(index_path, (bucket, prefix))
+ s3_client.simple_upload_file(
+ index_path, index_content, (bucket, prefix),
+ "text/html", digest_content(index_content)
+ )
+ else:
+ logger.warning(
+ "The path %s does not contain any contents in bucket %s. "
+ "Will not do any re-indexing",
+ path, bucket
+ )
diff --git a/charon/storage.py b/charon/storage.py
index 45963c42..f07b1c50 100644
--- a/charon/storage.py
+++ b/charon/storage.py
@@ -530,10 +530,11 @@ def delete_files(
self, file_paths: List[str], target: Tuple[str, str],
product: Optional[str], root="/"
) -> List[str]:
- """ Deletes a list of files to s3 bucket. * Use the cut down file path as s3 key. The cut
- down way is move root from the file path if it starts with root. Example: if file_path is
- /tmp/maven-repo/org/apache/.... and root is /tmp/maven-repo Then the key will be
- org/apache/.....
+ """ Deletes a list of files to s3 bucket.
+ * Use the cut down file path as s3 key. The cut
+ down way is move root from the file path if it starts with root.
+ Example: if file_path is /tmp/maven-repo/org/apache/.... and
+ root is /tmp/maven-repo Then the key will be org/apache/.....
* The removing will happen with conditions of product checking. First the deletion
will remove The product from the file metadata "rh-products". After the metadata
removing, if there still are extra products left in that metadata, the file will not
@@ -637,6 +638,90 @@ async def path_delete_handler(
return failed_files
+ def simple_delete_file(
+ self, file_path: str, target: Tuple[str, str]
+ ):
+ """ Deletes file in s3 bucket, regardless of any extra
+ information like product and version info.
+ * Warning: this will directly delete the files even if
+ it has lots of product info, so please be careful to use.
+ If you want to delete product artifact files, please use
+ delete_files
+ """
+ bucket = target[0]
+ prefix = target[1]
+ bucket_obj = self.__get_bucket(bucket)
+ path_key = os.path.join(prefix, file_path)
+ file_object = bucket_obj.Object(path_key)
+ existed = False
+ try:
+ existed = self.__file_exists(file_object)
+ if existed:
+ bucket_obj.delete_objects(Delete={"Objects": [{"Key": path_key}]})
+ else:
+ logger.warning(
+ 'Warning: File %s does not exist in S3 bucket %s, will ignore its deleting',
+ file_path, bucket
+ )
+ except (ClientError, HTTPClientError) as e:
+ logger.error(
+ "Error: file existence check failed due to error: %s", e
+ )
+
+ def simple_upload_file(
+ self, file_path: str, file_content: str,
+ target: Tuple[str, str],
+ mime_type: str = None,
+ check_sum_sha1: str = None
+ ):
+ """ Uploads file to s3 bucket, regardless of any extra
+ information like product and version info.
+ * Warning: this will directly delete the files even if
+ it has lots of product info, so please be careful to use.
+ If you want to upload product artifact files, please use
+ upload_files
+ """
+ bucket = target[0]
+ prefix = target[1]
+ bucket_obj = self.__get_bucket(bucket)
+ path_key = os.path.join(prefix, file_path)
+ file_object = bucket_obj.Object(path_key)
+ existed = False
+ logger.debug(
+ 'Uploading %s to bucket %s', path_key, bucket
+ )
+ existed = False
+ try:
+ existed = self.__file_exists(file_object)
+ except (ClientError, HTTPClientError) as e:
+ logger.error(
+ "Error: file existence check failed due to error: %s", e
+ )
+ return
+
+ content_type = mime_type
+ if not content_type:
+ content_type = DEFAULT_MIME_TYPE
+ if not existed:
+ f_meta = {}
+ if check_sum_sha1 and check_sum_sha1.strip() != "":
+ f_meta[CHECKSUM_META_KEY] = check_sum_sha1
+ try:
+ if not self.__dry_run:
+ file_object.put(
+ Body=file_content,
+ Metadata=f_meta,
+ ContentType=content_type
+ )
+ logger.debug('Uploaded %s to bucket %s', file_path, bucket)
+ except (ClientError, HTTPClientError) as e:
+ logger.error(
+ "ERROR: file %s not uploaded to bucket %s due to error: %s ",
+ file_path, bucket, e
+ )
+ else:
+ raise FileExistsError("Error: file %s already exists, upload is forbiden.")
+
def delete_manifest(self, product_key: str, target: str, manifest_bucket_name: str):
if not manifest_bucket_name:
logger.warning(
diff --git a/tests/test_maven_index.py b/tests/test_maven_index.py
index d5647ecd..7468310d 100644
--- a/tests/test_maven_index.py
+++ b/tests/test_maven_index.py
@@ -15,6 +15,7 @@
"""
from charon.constants import PROD_INFO_SUFFIX
from charon.pkgs.maven import handle_maven_uploading, handle_maven_del
+from charon.pkgs.indexing import re_index
from charon.storage import CHECKSUM_META_KEY
from charon.utils.strings import remove_prefix
from tests.base import LONG_TEST_PREFIX, SHORT_TEST_PREFIX, PackageBaseTest
@@ -45,8 +46,6 @@ def test_uploading_index(self):
objs = list(test_bucket.objects.all())
actual_files = [obj.key for obj in objs]
- self.assertEqual(41, len(actual_files))
-
for f in COMMONS_LOGGING_INDEXES:
self.assertIn(f, actual_files)
@@ -127,6 +126,85 @@ def test_overlap_upload_index(self):
self.assertNotIn("../", index_content)
self.assertNotIn(PROD_INFO_SUFFIX, index_content)
+ def test_re_index(self):
+ test_zip = os.path.join(INPUTS, "commons-client-4.5.6.zip")
+ product = "commons-client-4.5.6"
+ handle_maven_uploading(
+ test_zip, product,
+ buckets=[('', TEST_BUCKET, '', '')],
+ dir_=self.tempdir
+ )
+
+ test_bucket = self.mock_s3.Bucket(TEST_BUCKET)
+ objs = list(test_bucket.objects.all())
+ actual_files = [obj.key for obj in objs]
+
+ for f in COMMONS_CLIENT_456_INDEXES:
+ self.assertIn(f, actual_files)
+
+ self.check_content(objs, [product])
+
+ indedx_obj = test_bucket.Object(COMMONS_CLIENT_INDEX)
+ index_content = str(indedx_obj.get()["Body"].read(), "utf-8")
+ self.assertIn('../', index_content)
+ self.assertIn('4.5.6/', index_content)
+ self.assertIn(
+ ''
+ 'maven-metadata.xml',
+ index_content
+ )
+ self.assertIn(
+ ''
+ 'maven-metadata.xml.md5',
+ index_content
+ )
+ self.assertIn(
+ ''
+ 'maven-metadata.xml.sha1',
+ index_content
+ )
+ self.assertIn(
+ ''
+ 'maven-metadata.xml.sha256',
+ index_content
+ )
+ self.assertNotIn("4.5.7/", index_content)
+
+ # insert new in commons-client
+ commons_client_root = "org/apache/httpcomponents/httpclient/"
+ commons_client_457_test = commons_client_root + "4.5.7/httpclient-4.5.7.txt"
+ self.mock_s3.Bucket(TEST_BUCKET).put_object(
+ Key=commons_client_457_test,
+ Body="Just a test content"
+ )
+ re_index(TEST_BUCKET, "", commons_client_root, "maven")
+ indedx_obj = test_bucket.Object(COMMONS_CLIENT_INDEX)
+ index_content = str(indedx_obj.get()["Body"].read(), "utf-8")
+ self.assertIn('../', index_content)
+ self.assertIn('4.5.6/', index_content)
+ self.assertIn(
+ ''
+ 'maven-metadata.xml',
+ index_content
+ )
+ self.assertIn(
+ ''
+ 'maven-metadata.xml.md5',
+ index_content
+ )
+ self.assertIn(
+ ''
+ 'maven-metadata.xml.sha1',
+ index_content
+ )
+ self.assertIn(
+ ''
+ 'maven-metadata.xml.sha256',
+ index_content
+ )
+ self.assertIn("4.5.7/", index_content)
+ self.assertNotIn(PROD_INFO_SUFFIX, index_content)
+
def test_upload_index_with_short_prefix(self):
self.__test_upload_index_with_prefix(SHORT_TEST_PREFIX)
diff --git a/tests/test_npm_index.py b/tests/test_npm_index.py
index fa0ebc3a..02dc64e0 100644
--- a/tests/test_npm_index.py
+++ b/tests/test_npm_index.py
@@ -15,6 +15,7 @@
"""
from charon.constants import PROD_INFO_SUFFIX, DEFAULT_REGISTRY
from charon.pkgs.npm import handle_npm_uploading, handle_npm_del
+from charon.pkgs.indexing import re_index
from charon.storage import CHECKSUM_META_KEY
from tests.base import LONG_TEST_PREFIX, SHORT_TEST_PREFIX, PackageBaseTest
from tests.commons import (
@@ -182,3 +183,108 @@ def __prepare_content(self, prefix: str = None):
buckets=[('', TEST_BUCKET, prefix, DEFAULT_REGISTRY)],
dir_=self.tempdir
)
+
+ def test_re_index(self):
+ test_tgz = os.path.join(INPUTS, "code-frame-7.14.5.tgz")
+ product_7_14_5 = "code-frame-7.14.5"
+ prefix = SHORT_TEST_PREFIX
+
+ handle_npm_uploading(
+ test_tgz, product_7_14_5,
+ buckets=[('', TEST_BUCKET, SHORT_TEST_PREFIX, DEFAULT_REGISTRY)],
+ dir_=self.tempdir,
+ )
+
+ test_bucket = self.mock_s3.Bucket(TEST_BUCKET)
+ objs = list(test_bucket.objects.all())
+ actual_files = [obj.key for obj in objs]
+
+ prefixed_7158_indexes = [
+ os.path.join(prefix, f) for f in CODE_FRAME_7_15_8_INDEXES
+ ]
+ prefixed_namespace_babel_index = os.path.join(prefix, NAMESPACE_BABEL_INDEX)
+ prefixed_root_index = os.path.join(prefix, COMMONS_ROOT_INDEX)
+
+ for assert_file in prefixed_7158_indexes:
+ self.assertNotIn(assert_file, actual_files)
+
+ # test package path
+ index_obj = test_bucket.Object(prefixed_namespace_babel_index)
+ index_content = str(index_obj.get()["Body"].read(), "utf-8")
+ self.assertIn('code-frame/',
+ index_content)
+ test_file_path = os.path.join(prefix, "@babel/test/test-file.txt")
+ self.assertNotIn(
+ ''
+ 'test/test-file.txt', index_content
+ )
+ # Add entry and re-index package path
+ test_bucket.put_object(
+ Key=test_file_path, Body="test content"
+ )
+ re_index(TEST_BUCKET, prefix, "@babel/", "npm")
+ index_obj = test_bucket.Object(prefixed_namespace_babel_index)
+ index_content = str(index_obj.get()["Body"].read(), "utf-8")
+ self.assertIn(
+ 'code-frame/', index_content
+ )
+ self.assertIn(
+ 'test/', index_content
+ )
+ self.assertIn(
+ '../', index_content
+ )
+ self.assertNotIn(PROD_INFO_SUFFIX, index_content)
+
+ # test root path
+ index_obj = test_bucket.Object(prefixed_root_index)
+ index_content = str(index_obj.get()["Body"].read(), "utf-8")
+ self.assertIn('@babel/', index_content)
+ test_file_path = os.path.join(prefix, "test/test-file.txt")
+ self.assertNotIn(
+ ''
+ 'test/test-file.txt', index_content
+ )
+ # Add entry and re-index root
+ test_bucket.put_object(
+ Key=test_file_path, Body="test content"
+ )
+ re_index(TEST_BUCKET, prefix, "/", "npm")
+ index_obj = test_bucket.Object(prefixed_root_index)
+ index_content = str(index_obj.get()["Body"].read(), "utf-8")
+ self.assertIn('@babel/', index_content)
+ self.assertIn(
+ ''
+ 'test/', index_content
+ )
+ self.assertNotIn('../', index_content)
+ self.assertNotIn(PROD_INFO_SUFFIX, index_content)
+
+ # Test metadata path
+ metadata_path = "@babel/code-frame/"
+ objs = list(test_bucket.objects.all())
+ actual_files = [obj.key for obj in objs]
+ self.assertIn(
+ os.path.join(prefix, metadata_path, "package.json"),
+ actual_files
+ )
+ self.assertNotIn(
+ os.path.join(prefix, metadata_path, "index.html"),
+ actual_files
+ )
+ # Add entry and re-index metadata path
+ test_file_path = os.path.join(prefix, metadata_path, "test/test-file.txt")
+ test_bucket.put_object(
+ Key=test_file_path, Body="test content"
+ )
+ re_index(TEST_BUCKET, prefix, metadata_path, "npm")
+ objs = list(test_bucket.objects.all())
+ actual_files = [obj.key for obj in objs]
+ self.assertIn(
+ os.path.join(prefix, metadata_path, "package.json"),
+ actual_files
+ )
+ self.assertNotIn(
+ os.path.join(prefix, metadata_path, "index.html"),
+ actual_files
+ )
diff --git a/tests/test_s3client.py b/tests/test_s3client.py
index 1c78db2b..b33e68d7 100644
--- a/tests/test_s3client.py
+++ b/tests/test_s3client.py
@@ -390,6 +390,79 @@ def test_exists_override_failing(self):
file_obj = bucket.Object(path)
self.assertEqual(sha1, file_obj.metadata[CHECKSUM_META_KEY])
+ def test_simple_upload_file(self):
+ (temp_root, _, all_files) = self.__prepare_files()
+ for file_path in all_files:
+ file_key = file_path[len(temp_root) + 1:]
+ file_content = open(file_path, "rb").read()
+ sha1 = read_sha1(file_path)
+ self.s3_client.simple_upload_file(
+ file_path=file_key,
+ file_content=file_content,
+ check_sum_sha1=sha1,
+ target=(MY_BUCKET, '')
+ )
+ bucket = self.mock_s3.Bucket(MY_BUCKET)
+
+ objects = list(bucket.objects.all())
+ self.assertEqual(len(all_files), len(objects))
+ file_path = all_files[0]
+ file_key = file_path[len(temp_root) + 1:]
+ file_content = open(file_path, "rb").read()
+ sha1 = read_sha1(file_path)
+ obj = bucket.Object(file_key)
+ self.assertEqual(sha1, obj.metadata[CHECKSUM_META_KEY])
+ self.assertEqual(file_key, obj.key)
+ self.assertEqual(
+ str(file_content, sys.getdefaultencoding()),
+ str(obj.get()["Body"].read(), sys.getdefaultencoding())
+ )
+
+ # test upload exists
+ self.assertRaises(
+ FileExistsError,
+ self.s3_client.simple_upload_file,
+ file_path=file_key,
+ file_content="file_content",
+ check_sum_sha1=sha1,
+ target=(MY_BUCKET, '')
+ )
+
+ shutil.rmtree(temp_root)
+
+ def test_simple_delete_file(self):
+ # prepare files
+ (temp_root, _, all_files) = self.__prepare_files()
+ for file_path in all_files:
+ file_key = file_path[len(temp_root) + 1:]
+ file_content = open(file_path, "rb").read()
+ sha1 = read_sha1(file_path)
+ self.s3_client.simple_upload_file(
+ file_path=file_key,
+ file_content=file_content,
+ check_sum_sha1=sha1,
+ target=(MY_BUCKET, '')
+ )
+ bucket = self.mock_s3.Bucket(MY_BUCKET)
+
+ objects = list(bucket.objects.all())
+ self.assertEqual(len(all_files), len(objects))
+
+ # test delete file start
+ file_key = all_files[0][len(temp_root) + 1:]
+ objects = list(bucket.objects.all())
+ self.assertIn(file_key, [o.key for o in objects])
+ self.s3_client.simple_delete_file(
+ file_path=file_key,
+ target=(MY_BUCKET, "")
+ )
+
+ objects = list(bucket.objects.all())
+ self.assertEqual(len(all_files) - 1, len(objects))
+ self.assertNotIn(file_key, [o.key for o in objects])
+
+ shutil.rmtree(temp_root)
+
def __prepare_files(self):
test_zip = zipfile.ZipFile(
os.path.join(INPUTS, "commons-lang3.zip")