Skip to content

Commit e46ae6b

Browse files
authored
Merge e7cf9ef into 380df25
2 parents 380df25 + e7cf9ef commit e46ae6b

File tree

9 files changed

+553
-13
lines changed

9 files changed

+553
-13
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,6 @@ package/
1616
# Unit test
1717
__pytest_reports
1818
htmlcov
19+
20+
# Generated when local run
21+
*.log

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,3 +96,13 @@ This command will delete some paths from repo in S3.
9696
but not delete the artifacts themselves.
9797
* During or after the paths' deletion, regenerate the
9898
metadata files and index files for both types.
99+
100+
### charon-index: refresh the index.html for the specified path
101+
102+
```bash
103+
usage: charon index $PATH [-t, --target] [-D, --debug] [-q, --quiet]
104+
```
105+
106+
This command will refresh the index.html for the specified path.
107+
108+
* Note that if the path is a NPM metadata path which contains package.json, this refreshment will not work because this type of folder will display the package.json instead of the index.html in http request.

charon/cmd/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from click import group
1717
from charon.cmd.cmd_upload import upload
1818
from charon.cmd.cmd_delete import delete
19+
from charon.cmd.cmd_index import index
1920

2021

2122
@group()
@@ -29,3 +30,4 @@ def cli():
2930
# init group command
3031
cli.add_command(upload)
3132
cli.add_command(delete)
33+
cli.add_command(index)

charon/cmd/cmd_index.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
"""
2+
Copyright (C) 2022 Red Hat, Inc. (https://github.com/Commonjava/charon)
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
"""
16+
17+
from charon.config import get_config
18+
from charon.cmd.internal import _decide_mode
19+
from charon.pkgs.indexing import re_index
20+
from charon.constants import PACKAGE_TYPE_MAVEN, PACKAGE_TYPE_NPM
21+
from click import command, option, argument
22+
23+
import traceback
24+
import logging
25+
import os
26+
import sys
27+
28+
logger = logging.getLogger(__name__)
29+
30+
31+
@argument(
32+
"path",
33+
type=str,
34+
)
35+
@option(
36+
"--target",
37+
"-t",
38+
help="""
39+
The target to do the index refreshing, which will decide
40+
which s3 bucket and what root path where all files will
41+
be deleted from.
42+
""",
43+
required=True
44+
)
45+
@option(
46+
"--debug",
47+
"-D",
48+
help="Debug mode, will print all debug logs for problem tracking.",
49+
is_flag=True,
50+
default=False
51+
)
52+
@option(
53+
"--quiet",
54+
"-q",
55+
help="Quiet mode, will shrink most of the logs except warning and errors.",
56+
is_flag=True,
57+
default=False
58+
)
59+
@option("--dryrun", "-n", is_flag=True, default=False)
60+
@command()
61+
def index(
62+
path: str,
63+
target: str,
64+
debug: bool = False,
65+
quiet: bool = False,
66+
dryrun: bool = False
67+
):
68+
"""This command will re-generate the index.html files for the
69+
specified path.
70+
"""
71+
_decide_mode(
72+
"index-{}".format(target), path.replace("/", "_"),
73+
is_quiet=quiet, is_debug=debug
74+
)
75+
try:
76+
conf = get_config()
77+
if not conf:
78+
sys.exit(1)
79+
80+
aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile()
81+
if not aws_profile:
82+
logger.error("No AWS profile specified!")
83+
sys.exit(1)
84+
85+
tgt = conf.get_target(target)
86+
if not tgt:
87+
# log is recorded get_target
88+
sys.exit(1)
89+
90+
aws_bucket = None
91+
prefix = None
92+
for b in conf.get_target(target):
93+
aws_bucket = b.get('bucket')
94+
prefix = b.get('prefix', '')
95+
96+
package_type = None
97+
if "maven" in aws_bucket:
98+
logger.info(
99+
"The target is a maven repository. Will refresh the index as maven package type"
100+
)
101+
package_type = PACKAGE_TYPE_MAVEN
102+
elif "npm" in aws_bucket:
103+
package_type = PACKAGE_TYPE_NPM
104+
logger.info(
105+
"The target is a npm repository. Will refresh the index as npm package type"
106+
)
107+
else:
108+
logger.error(
109+
"The target is not supported. Only maven or npm target is supported."
110+
)
111+
sys.exit(1)
112+
113+
if not aws_bucket:
114+
logger.error("No bucket specified!")
115+
sys.exit(1)
116+
117+
re_index(aws_bucket, prefix, path, package_type, aws_profile, dryrun)
118+
except Exception:
119+
print(traceback.format_exc())
120+
sys.exit(2) # distinguish between exception and bad config or bad state

charon/pkgs/indexing.py

Lines changed: 70 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from charon.storage import S3Client
1818
from charon.constants import (INDEX_HTML_TEMPLATE, NPM_INDEX_HTML_TEMPLATE,
1919
PACKAGE_TYPE_MAVEN, PACKAGE_TYPE_NPM, PROD_INFO_SUFFIX)
20+
from charon.utils.files import digest_content
2021
from jinja2 import Template
2122
import os
2223
import logging
@@ -149,6 +150,17 @@ def __generate_index_html(
149150

150151

151152
def __to_html(package_type: str, contents: List[str], folder: str, top_level: str) -> str:
153+
html_content = __to_html_content(package_type, contents, folder)
154+
html_path = os.path.join(top_level, folder, "index.html")
155+
if folder == "/":
156+
html_path = os.path.join(top_level, "index.html")
157+
os.makedirs(os.path.dirname(html_path), exist_ok=True)
158+
with open(html_path, 'w', encoding='utf-8') as html:
159+
html.write(html_content)
160+
return html_path
161+
162+
163+
def __to_html_content(package_type: str, contents: List[str], folder: str) -> str:
152164
items = []
153165
if folder != "/":
154166
items.append("../")
@@ -160,13 +172,7 @@ def __to_html(package_type: str, contents: List[str], folder: str, top_level: st
160172
items.extend(contents)
161173
items = __sort_index_items(items)
162174
index = IndexedHTML(title=folder, header=folder, items=items)
163-
html_path = os.path.join(top_level, folder, "index.html")
164-
if folder == "/":
165-
html_path = os.path.join(top_level, "index.html")
166-
os.makedirs(os.path.dirname(html_path), exist_ok=True)
167-
with open(html_path, 'w', encoding='utf-8') as html:
168-
html.write(index.generate_index_file_content(package_type))
169-
return html_path
175+
return index.generate_index_file_content(package_type)
170176

171177

172178
def __sort_index_items(items):
@@ -250,3 +256,60 @@ def __compare(self, other) -> int:
250256
return -1
251257
else:
252258
return 0
259+
260+
261+
def re_index(
262+
bucket: str,
263+
prefix: str,
264+
path: str,
265+
package_type: str,
266+
aws_profile: str = None,
267+
dry_run: bool = False
268+
):
269+
"""Refresh the index.html for the specified folder in the bucket.
270+
"""
271+
s3_client = S3Client(aws_profile=aws_profile, dry_run=dry_run)
272+
s3_folder = os.path.join(prefix, path)
273+
if path.strip() == "" or path.strip() == "/":
274+
s3_folder = prefix
275+
items: List[str] = s3_client.list_folder_content(bucket, s3_folder)
276+
contents = [i for i in items if not i.endswith(PROD_INFO_SUFFIX)]
277+
if PACKAGE_TYPE_NPM == package_type:
278+
if any([True if "package.json" in c else False for c in contents]):
279+
logger.warn(
280+
"The path %s contains NPM package.json which will work as "
281+
"package metadata for indexing. This indexing is ignored.",
282+
path
283+
)
284+
return
285+
286+
if len(contents) >= 1:
287+
real_contents = []
288+
if prefix and prefix.strip() != "":
289+
for c in contents:
290+
if c.strip() != "":
291+
if c.startswith(prefix):
292+
real_c = remove_prefix(c, prefix)
293+
real_c = remove_prefix(real_c, "/")
294+
real_contents.append(real_c)
295+
else:
296+
real_contents.append(c)
297+
else:
298+
real_contents = contents
299+
logger.debug(real_contents)
300+
index_content = __to_html_content(package_type, real_contents, path)
301+
if not dry_run:
302+
index_path = os.path.join(path, "index.html")
303+
if path == "/":
304+
index_path = "index.html"
305+
s3_client.simple_delete_file(index_path, (bucket, prefix))
306+
s3_client.simple_upload_file(
307+
index_path, index_content, (bucket, prefix),
308+
"text/html", digest_content(index_content)
309+
)
310+
else:
311+
logger.warning(
312+
"The path %s does not contain any contents in bucket %s. "
313+
"Will not do any re-indexing",
314+
path, bucket
315+
)

charon/storage.py

Lines changed: 89 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -530,10 +530,11 @@ def delete_files(
530530
self, file_paths: List[str], target: Tuple[str, str],
531531
product: Optional[str], root="/"
532532
) -> List[str]:
533-
""" Deletes a list of files to s3 bucket. * Use the cut down file path as s3 key. The cut
534-
down way is move root from the file path if it starts with root. Example: if file_path is
535-
/tmp/maven-repo/org/apache/.... and root is /tmp/maven-repo Then the key will be
536-
org/apache/.....
533+
""" Deletes a list of files to s3 bucket.
534+
* Use the cut down file path as s3 key. The cut
535+
down way is move root from the file path if it starts with root.
536+
Example: if file_path is /tmp/maven-repo/org/apache/.... and
537+
root is /tmp/maven-repo Then the key will be org/apache/.....
537538
* The removing will happen with conditions of product checking. First the deletion
538539
will remove The product from the file metadata "rh-products". After the metadata
539540
removing, if there still are extra products left in that metadata, the file will not
@@ -637,6 +638,90 @@ async def path_delete_handler(
637638

638639
return failed_files
639640

641+
def simple_delete_file(
642+
self, file_path: str, target: Tuple[str, str]
643+
):
644+
""" Deletes file in s3 bucket, regardless of any extra
645+
information like product and version info.
646+
* Warning: this will directly delete the files even if
647+
it has lots of product info, so please be careful to use.
648+
If you want to delete product artifact files, please use
649+
delete_files
650+
"""
651+
bucket = target[0]
652+
prefix = target[1]
653+
bucket_obj = self.__get_bucket(bucket)
654+
path_key = os.path.join(prefix, file_path)
655+
file_object = bucket_obj.Object(path_key)
656+
existed = False
657+
try:
658+
existed = self.__file_exists(file_object)
659+
if existed:
660+
bucket_obj.delete_objects(Delete={"Objects": [{"Key": path_key}]})
661+
else:
662+
logger.warning(
663+
'Warning: File %s does not exist in S3 bucket %s, will ignore its deleting',
664+
file_path, bucket
665+
)
666+
except (ClientError, HTTPClientError) as e:
667+
logger.error(
668+
"Error: file existence check failed due to error: %s", e
669+
)
670+
671+
def simple_upload_file(
672+
self, file_path: str, file_content: str,
673+
target: Tuple[str, str],
674+
mime_type: str = None,
675+
check_sum_sha1: str = None
676+
):
677+
""" Uploads file to s3 bucket, regardless of any extra
678+
information like product and version info.
679+
* Warning: this will directly delete the files even if
680+
it has lots of product info, so please be careful to use.
681+
If you want to upload product artifact files, please use
682+
upload_files
683+
"""
684+
bucket = target[0]
685+
prefix = target[1]
686+
bucket_obj = self.__get_bucket(bucket)
687+
path_key = os.path.join(prefix, file_path)
688+
file_object = bucket_obj.Object(path_key)
689+
existed = False
690+
logger.debug(
691+
'Uploading %s to bucket %s', path_key, bucket
692+
)
693+
existed = False
694+
try:
695+
existed = self.__file_exists(file_object)
696+
except (ClientError, HTTPClientError) as e:
697+
logger.error(
698+
"Error: file existence check failed due to error: %s", e
699+
)
700+
return
701+
702+
content_type = mime_type
703+
if not content_type:
704+
content_type = DEFAULT_MIME_TYPE
705+
if not existed:
706+
f_meta = {}
707+
if check_sum_sha1 and check_sum_sha1.strip() != "":
708+
f_meta[CHECKSUM_META_KEY] = check_sum_sha1
709+
try:
710+
if not self.__dry_run:
711+
file_object.put(
712+
Body=file_content,
713+
Metadata=f_meta,
714+
ContentType=content_type
715+
)
716+
logger.debug('Uploaded %s to bucket %s', file_path, bucket)
717+
except (ClientError, HTTPClientError) as e:
718+
logger.error(
719+
"ERROR: file %s not uploaded to bucket %s due to error: %s ",
720+
file_path, bucket, e
721+
)
722+
else:
723+
raise FileExistsError("Error: file %s already exists, upload is forbiden.")
724+
640725
def delete_manifest(self, product_key: str, target: str, manifest_bucket_name: str):
641726
if not manifest_bucket_name:
642727
logger.warning(

0 commit comments

Comments
 (0)