Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@ package/
# Unit test
__pytest_reports
htmlcov

# Generated when local run
*.log
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,13 @@ This command will delete some paths from repo in S3.
but not delete the artifacts themselves.
* During or after the paths' deletion, regenerate the
metadata files and index files for both types.

### charon-index: refresh the index.html for the specified path

```bash
usage: charon index $PATH [-t, --target] [-D, --debug] [-q, --quiet]
```

This command will refresh the index.html for the specified path.

* Note that if the path is a NPM metadata path which contains package.json, this refreshment will not work because this type of folder will display the package.json instead of the index.html in http request.
2 changes: 2 additions & 0 deletions charon/cmd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from click import group
from charon.cmd.cmd_upload import upload
from charon.cmd.cmd_delete import delete
from charon.cmd.cmd_index import index


@group()
Expand All @@ -29,3 +30,4 @@ def cli():
# init group command
cli.add_command(upload)
cli.add_command(delete)
cli.add_command(index)
120 changes: 120 additions & 0 deletions charon/cmd/cmd_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
"""
Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from charon.config import get_config
from charon.cmd.internal import _decide_mode
from charon.pkgs.indexing import re_index
from charon.constants import PACKAGE_TYPE_MAVEN, PACKAGE_TYPE_NPM
from click import command, option, argument

import traceback
import logging
import os
import sys

logger = logging.getLogger(__name__)


@argument(
"path",
type=str,
)
@option(
"--target",
"-t",
help="""
The target to do the index refreshing, which will decide
which s3 bucket and what root path where all files will
be deleted from.
""",
required=True
)
@option(
"--debug",
"-D",
help="Debug mode, will print all debug logs for problem tracking.",
is_flag=True,
default=False
)
@option(
"--quiet",
"-q",
help="Quiet mode, will shrink most of the logs except warning and errors.",
is_flag=True,
default=False
)
@option("--dryrun", "-n", is_flag=True, default=False)
@command()
def index(
path: str,
target: str,
debug: bool = False,
quiet: bool = False,
dryrun: bool = False
):
"""This command will re-generate the index.html files for the
specified path.
"""
_decide_mode(
"index-{}".format(target), path.replace("/", "_"),
is_quiet=quiet, is_debug=debug
)
try:
conf = get_config()
if not conf:
sys.exit(1)

aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile()
if not aws_profile:
logger.error("No AWS profile specified!")
sys.exit(1)

tgt = conf.get_target(target)
if not tgt:
# log is recorded get_target
sys.exit(1)

aws_bucket = None
prefix = None
for b in conf.get_target(target):
aws_bucket = b.get('bucket')
prefix = b.get('prefix', '')

package_type = None
if "maven" in aws_bucket:
logger.info(
"The target is a maven repository. Will refresh the index as maven package type"
)
package_type = PACKAGE_TYPE_MAVEN
elif "npm" in aws_bucket:
package_type = PACKAGE_TYPE_NPM
logger.info(
"The target is a npm repository. Will refresh the index as npm package type"
)
else:
logger.error(
"The target is not supported. Only maven or npm target is supported."
)
sys.exit(1)

if not aws_bucket:
logger.error("No bucket specified!")
sys.exit(1)

re_index(aws_bucket, prefix, path, package_type, aws_profile, dryrun)
except Exception:
print(traceback.format_exc())
sys.exit(2) # distinguish between exception and bad config or bad state
77 changes: 70 additions & 7 deletions charon/pkgs/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from charon.storage import S3Client
from charon.constants import (INDEX_HTML_TEMPLATE, NPM_INDEX_HTML_TEMPLATE,
PACKAGE_TYPE_MAVEN, PACKAGE_TYPE_NPM, PROD_INFO_SUFFIX)
from charon.utils.files import digest_content
from jinja2 import Template
import os
import logging
Expand Down Expand Up @@ -149,6 +150,17 @@ def __generate_index_html(


def __to_html(package_type: str, contents: List[str], folder: str, top_level: str) -> str:
html_content = __to_html_content(package_type, contents, folder)
html_path = os.path.join(top_level, folder, "index.html")
if folder == "/":
html_path = os.path.join(top_level, "index.html")
os.makedirs(os.path.dirname(html_path), exist_ok=True)
with open(html_path, 'w', encoding='utf-8') as html:
html.write(html_content)
return html_path


def __to_html_content(package_type: str, contents: List[str], folder: str) -> str:
items = []
if folder != "/":
items.append("../")
Expand All @@ -160,13 +172,7 @@ def __to_html(package_type: str, contents: List[str], folder: str, top_level: st
items.extend(contents)
items = __sort_index_items(items)
index = IndexedHTML(title=folder, header=folder, items=items)
html_path = os.path.join(top_level, folder, "index.html")
if folder == "/":
html_path = os.path.join(top_level, "index.html")
os.makedirs(os.path.dirname(html_path), exist_ok=True)
with open(html_path, 'w', encoding='utf-8') as html:
html.write(index.generate_index_file_content(package_type))
return html_path
return index.generate_index_file_content(package_type)


def __sort_index_items(items):
Expand Down Expand Up @@ -250,3 +256,60 @@ def __compare(self, other) -> int:
return -1
else:
return 0


def re_index(
bucket: str,
prefix: str,
path: str,
package_type: str,
aws_profile: str = None,
dry_run: bool = False
):
"""Refresh the index.html for the specified folder in the bucket.
"""
s3_client = S3Client(aws_profile=aws_profile, dry_run=dry_run)
s3_folder = os.path.join(prefix, path)
if path.strip() == "" or path.strip() == "/":
s3_folder = prefix
items: List[str] = s3_client.list_folder_content(bucket, s3_folder)
contents = [i for i in items if not i.endswith(PROD_INFO_SUFFIX)]
if PACKAGE_TYPE_NPM == package_type:
if any([True if "package.json" in c else False for c in contents]):
logger.warn(
"The path %s contains NPM package.json which will work as "
"package metadata for indexing. This indexing is ignored.",
path
)
return

if len(contents) >= 1:
real_contents = []
if prefix and prefix.strip() != "":
for c in contents:
if c.strip() != "":
if c.startswith(prefix):
real_c = remove_prefix(c, prefix)
real_c = remove_prefix(real_c, "/")
real_contents.append(real_c)
else:
real_contents.append(c)
else:
real_contents = contents
logger.debug(real_contents)
index_content = __to_html_content(package_type, real_contents, path)
if not dry_run:
index_path = os.path.join(path, "index.html")
if path == "/":
index_path = "index.html"
s3_client.simple_delete_file(index_path, (bucket, prefix))
s3_client.simple_upload_file(
index_path, index_content, (bucket, prefix),
"text/html", digest_content(index_content)
)
else:
logger.warning(
"The path %s does not contain any contents in bucket %s. "
"Will not do any re-indexing",
path, bucket
)
93 changes: 89 additions & 4 deletions charon/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,10 +530,11 @@ def delete_files(
self, file_paths: List[str], target: Tuple[str, str],
product: Optional[str], root="/"
) -> List[str]:
""" Deletes a list of files to s3 bucket. * Use the cut down file path as s3 key. The cut
down way is move root from the file path if it starts with root. Example: if file_path is
/tmp/maven-repo/org/apache/.... and root is /tmp/maven-repo Then the key will be
org/apache/.....
""" Deletes a list of files to s3 bucket.
* Use the cut down file path as s3 key. The cut
down way is move root from the file path if it starts with root.
Example: if file_path is /tmp/maven-repo/org/apache/.... and
root is /tmp/maven-repo Then the key will be org/apache/.....
* The removing will happen with conditions of product checking. First the deletion
will remove The product from the file metadata "rh-products". After the metadata
removing, if there still are extra products left in that metadata, the file will not
Expand Down Expand Up @@ -637,6 +638,90 @@ async def path_delete_handler(

return failed_files

def simple_delete_file(
self, file_path: str, target: Tuple[str, str]
):
""" Deletes file in s3 bucket, regardless of any extra
information like product and version info.
* Warning: this will directly delete the files even if
it has lots of product info, so please be careful to use.
If you want to delete product artifact files, please use
delete_files
"""
bucket = target[0]
prefix = target[1]
bucket_obj = self.__get_bucket(bucket)
path_key = os.path.join(prefix, file_path)
file_object = bucket_obj.Object(path_key)
existed = False
try:
existed = self.__file_exists(file_object)
if existed:
bucket_obj.delete_objects(Delete={"Objects": [{"Key": path_key}]})
else:
logger.warning(
'Warning: File %s does not exist in S3 bucket %s, will ignore its deleting',
file_path, bucket
)
except (ClientError, HTTPClientError) as e:
logger.error(
"Error: file existence check failed due to error: %s", e
)

def simple_upload_file(
self, file_path: str, file_content: str,
target: Tuple[str, str],
mime_type: str = None,
check_sum_sha1: str = None
):
""" Uploads file to s3 bucket, regardless of any extra
information like product and version info.
* Warning: this will directly delete the files even if
it has lots of product info, so please be careful to use.
If you want to upload product artifact files, please use
upload_files
"""
bucket = target[0]
prefix = target[1]
bucket_obj = self.__get_bucket(bucket)
path_key = os.path.join(prefix, file_path)
file_object = bucket_obj.Object(path_key)
existed = False
logger.debug(
'Uploading %s to bucket %s', path_key, bucket
)
existed = False
try:
existed = self.__file_exists(file_object)
except (ClientError, HTTPClientError) as e:
logger.error(
"Error: file existence check failed due to error: %s", e
)
return

content_type = mime_type
if not content_type:
content_type = DEFAULT_MIME_TYPE
if not existed:
f_meta = {}
if check_sum_sha1 and check_sum_sha1.strip() != "":
f_meta[CHECKSUM_META_KEY] = check_sum_sha1
try:
if not self.__dry_run:
file_object.put(
Body=file_content,
Metadata=f_meta,
ContentType=content_type
)
logger.debug('Uploaded %s to bucket %s', file_path, bucket)
except (ClientError, HTTPClientError) as e:
logger.error(
"ERROR: file %s not uploaded to bucket %s due to error: %s ",
file_path, bucket, e
)
else:
raise FileExistsError("Error: file %s already exists, upload is forbiden.")

def delete_manifest(self, product_key: str, target: str, manifest_bucket_name: str):
if not manifest_bucket_name:
logger.warning(
Expand Down
Loading