Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 38 additions & 27 deletions charon/cmd/cmd_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
from typing import List

from charon.config import get_config
from charon.utils.archive import detect_npm_archive, NpmArchiveType
from charon.utils.archive import detect_npm_archives, NpmArchiveType
from charon.pkgs.maven import handle_maven_uploading
from charon.pkgs.npm import handle_npm_uploading
from charon.cmd.internal import (
_decide_mode, _validate_prod_key,
_get_local_repo, _get_targets,
_get_local_repos, _get_targets,
_get_ignore_patterns, _safe_delete
)
from click import command, option, argument
Expand All @@ -35,8 +35,9 @@


@argument(
"repo",
"repos",
type=str,
nargs=-1 # This allows multiple arguments for zip urls
)
@option(
"--product",
Expand Down Expand Up @@ -138,7 +139,7 @@
@option("--dryrun", "-n", is_flag=True, default=False)
@command()
def upload(
repo: str,
repos: List[str],
product: str,
version: str,
targets: List[str],
Expand All @@ -152,9 +153,10 @@ def upload(
quiet=False,
dryrun=False
):
"""Upload all files from a released product REPO to Ronda
Service. The REPO points to a product released tarball which
is hosted in a remote url or a local path.
"""Upload all files from released product REPOs to Ronda
Service. The REPOs point to a product released tarballs which
are hosted in remote urls or local paths.
Notes: It does not support multiple repos for NPM archives
"""
tmp_dir = work_dir
try:
Expand All @@ -173,8 +175,8 @@ def upload(
logger.error("No AWS profile specified!")
sys.exit(1)

archive_path = _get_local_repo(repo)
npm_archive_type = detect_npm_archive(archive_path)
archive_paths = _get_local_repos(repos)
archive_types = detect_npm_archives(archive_paths)
product_key = f"{product}-{version}"
manifest_bucket_name = conf.get_manifest_bucket()
targets_ = _get_targets(targets, conf)
Expand All @@ -185,31 +187,18 @@ def upload(
" are set correctly.", targets_
)
sys.exit(1)
if npm_archive_type != NpmArchiveType.NOT_NPM:
logger.info("This is a npm archive")
tmp_dir, succeeded = handle_npm_uploading(
archive_path,
product_key,
targets=targets_,
aws_profile=aws_profile,
dir_=work_dir,
gen_sign=contain_signature,
cf_enable=conf.is_aws_cf_enable(),
key=sign_key,
dry_run=dryrun,
manifest_bucket_name=manifest_bucket_name
)
if not succeeded:
sys.exit(1)
else:

maven_count = archive_types.count(NpmArchiveType.NOT_NPM)
npm_count = len(archive_types) - maven_count
if maven_count == len(archive_types):
ignore_patterns_list = None
if ignore_patterns:
ignore_patterns_list = ignore_patterns
else:
ignore_patterns_list = _get_ignore_patterns(conf)
logger.info("This is a maven archive")
tmp_dir, succeeded = handle_maven_uploading(
archive_path,
archive_paths,
product_key,
ignore_patterns_list,
root=root_path,
Expand All @@ -225,6 +214,28 @@ def upload(
)
if not succeeded:
sys.exit(1)
elif npm_count == len(archive_types) and len(archive_types) == 1:
logger.info("This is a npm archive")
tmp_dir, succeeded = handle_npm_uploading(
archive_paths[0],
product_key,
targets=targets_,
aws_profile=aws_profile,
dir_=work_dir,
gen_sign=contain_signature,
cf_enable=conf.is_aws_cf_enable(),
key=sign_key,
dry_run=dryrun,
manifest_bucket_name=manifest_bucket_name
)
if not succeeded:
sys.exit(1)
elif npm_count == len(archive_types) and len(archive_types) > 1:
logger.error("Doesn't support multiple upload for npm")
sys.exit(1)
else:
logger.error("Upload types are not consistent")
sys.exit(1)
except Exception:
print(traceback.format_exc())
sys.exit(2) # distinguish between exception and bad config or bad state
Expand Down
8 changes: 8 additions & 0 deletions charon/cmd/internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,14 @@ def _get_local_repo(url: str) -> str:
return archive_path


def _get_local_repos(urls: list) -> list:
archive_paths = []
for url in urls:
archive_path = _get_local_repo(url)
archive_paths.append(archive_path)
return archive_paths


def _validate_prod_key(product: str, version: str) -> bool:
if not product or product.strip() == "":
logger.error("Error: product can not be empty!")
Expand Down
197 changes: 194 additions & 3 deletions charon/pkgs/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from datetime import datetime
from zipfile import ZipFile, BadZipFile
from tempfile import mkdtemp
from shutil import rmtree, copy2
from defusedxml import ElementTree

import os
Expand Down Expand Up @@ -261,7 +262,7 @@ def __gen_digest_file(hash_file_path, meta_file_path: str, hashtype: HashType) -


def handle_maven_uploading(
repo: str,
repos: List[str],
prod_key: str,
ignore_patterns=None,
root="maven-repository",
Expand Down Expand Up @@ -294,8 +295,9 @@ def handle_maven_uploading(
"""
if targets is None:
targets = []
# 1. extract tarball
tmp_root = _extract_tarball(repo, prod_key, dir__=dir_)

# 1. extract tarballs
tmp_root = _extract_tarballs(repos, root, prod_key, dir__=dir_)

# 2. scan for paths and filter out the ignored paths,
# and also collect poms for later metadata generation
Expand Down Expand Up @@ -673,6 +675,195 @@ def _extract_tarball(repo: str, prefix="", dir__=None) -> str:
sys.exit(1)


def _extract_tarballs(repos: List[str], root: str, prefix="", dir__=None) -> str:
""" Extract multiple zip archives to a temporary directory.
* repos are the list of repo paths to extract
* root is a prefix in the tarball to identify which path is
the beginning of the maven GAV path
* prefix is the prefix for temporary directory name
* dir__ is the directory where temporary directories will be created.

Returns the path to the merged temporary directory containing all extracted files
"""
# Create final merge directory
final_tmp_root = mkdtemp(prefix=f"charon-{prefix}-final-", dir=dir__)

total_copied = 0
total_duplicated = 0
total_merged = 0
total_processed = 0

# Collect all extracted directories first
extracted_dirs = []

for repo in repos:
if os.path.exists(repo):
try:
logger.info("Extracting tarball %s", repo)
repo_zip = ZipFile(repo)
tmp_root = mkdtemp(prefix=f"charon-{prefix}-", dir=dir__)
extract_zip_all(repo_zip, tmp_root)
extracted_dirs.append(tmp_root)

except BadZipFile as e:
logger.error("Tarball extraction error for repo %s: %s", repo, e)
sys.exit(1)
else:
logger.error("Error: archive %s does not exist", repo)
sys.exit(1)

# Merge all extracted directories
if extracted_dirs:
# Create merged directory name
merged_dir_name = "merged_repositories"
merged_dest_dir = os.path.join(final_tmp_root, merged_dir_name)

# Merge content from all extracted directories
for extracted_dir in extracted_dirs:
copied, duplicated, merged, processed = _merge_directories_with_rename(
extracted_dir, merged_dest_dir, root
)
total_copied += copied
total_duplicated += duplicated
total_merged += merged
total_processed += processed

# Clean up temporary extraction directory
rmtree(extracted_dir)

logger.info(
"All zips merged! Total copied: %s, Total duplicated: %s, "
"Total merged: %s, Total processed: %s",
total_copied,
total_duplicated,
total_merged,
total_processed,
)
return final_tmp_root


def _merge_directories_with_rename(src_dir: str, dest_dir: str, root: str):
""" Recursively copy files from src_dir to dest_dir, overwriting existing files.
* src_dir is the source directory to copy from
* dest_dir is the destination directory to copy to.

Returns Tuple of (copied_count, duplicated_count, merged_count, processed_count)
"""
copied_count = 0
duplicated_count = 0
merged_count = 0
processed_count = 0

# Find the actual content directory
content_root = src_dir
for item in os.listdir(src_dir):
item_path = os.path.join(src_dir, item)
# Check the root maven-repository subdirectory existence
maven_repo_path = os.path.join(item_path, root)
if os.path.isdir(item_path) and os.path.exists(maven_repo_path):
content_root = item_path
break

# pylint: disable=unused-variable
for root_dir, dirs, files in os.walk(content_root):
# Calculate relative path from content root
rel_path = os.path.relpath(root_dir, content_root)
dest_root = os.path.join(dest_dir, rel_path) if rel_path != '.' else dest_dir

# Create destination directory if it doesn't exist
os.makedirs(dest_root, exist_ok=True)

# Copy all files, skip existing ones
for file in files:
src_file = os.path.join(root_dir, file)
dest_file = os.path.join(dest_root, file)

if file == ARCHETYPE_CATALOG_FILENAME:
_handle_archetype_catalog_merge(src_file, dest_file)
merged_count += 1
logger.debug("Merged archetype catalog: %s -> %s", src_file, dest_file)
if os.path.exists(dest_file):
duplicated_count += 1
logger.debug("Duplicated: %s, skipped", dest_file)
else:
copy2(src_file, dest_file)
copied_count += 1
logger.debug("Copied: %s -> %s", src_file, dest_file)

processed_count += 1

logger.info(
"One zip merged! Files copied: %s, Files duplicated: %s, "
"Files merged: %s, Total files processed: %s",
copied_count,
duplicated_count,
merged_count,
processed_count,
)
return copied_count, duplicated_count, merged_count, processed_count


def _handle_archetype_catalog_merge(src_catalog: str, dest_catalog: str):
"""
Handle merging of archetype-catalog.xml files during directory merge.

Args:
src_catalog: Source archetype-catalog.xml file path
dest_catalog: Destination archetype-catalog.xml file path
"""
try:
with open(src_catalog, "rb") as sf:
src_archetypes = _parse_archetypes(sf.read())
except ElementTree.ParseError as e:
logger.warning("Failed to read source archetype catalog %s: %s", src_catalog, e)
return

if len(src_archetypes) < 1:
logger.warning(
"No archetypes found in source archetype-catalog.xml: %s, "
"even though the file exists! Skipping.",
src_catalog
)
return

# Copy directly if dest_catalog doesn't exist
if not os.path.exists(dest_catalog):
copy2(src_catalog, dest_catalog)
return

try:
with open(dest_catalog, "rb") as df:
dest_archetypes = _parse_archetypes(df.read())
except ElementTree.ParseError as e:
logger.warning("Failed to read dest archetype catalog %s: %s", dest_catalog, e)
return

if len(dest_archetypes) < 1:
logger.warning(
"No archetypes found in dest archetype-catalog.xml: %s, "
"even though the file exists! Copy directly from the src_catalog, %s.",
dest_catalog, src_catalog
)
copy2(src_catalog, dest_catalog)
return

else:
original_dest_size = len(dest_archetypes)
for sa in src_archetypes:
if sa not in dest_archetypes:
dest_archetypes.append(sa)
else:
logger.debug("DUPLICATE ARCHETYPE: %s", sa)

if len(dest_archetypes) != original_dest_size:
content = MavenArchetypeCatalog(dest_archetypes).generate_meta_file_content()
try:
overwrite_file(dest_catalog, content)
except Exception as e:
logger.error("Failed to merge archetype catalog: %s", dest_catalog)
raise e


def _scan_paths(files_root: str, ignore_patterns: List[str],
root: str) -> Tuple[str, List[str], List[str], List[str]]:
# 2. scan for paths and filter out the ignored paths,
Expand Down
13 changes: 13 additions & 0 deletions charon/utils/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,19 @@ def detect_npm_archive(repo):
return NpmArchiveType.NOT_NPM


def detect_npm_archives(repos):
"""Detects, if the archives need to have npm workflow.
:parameter repos list of repository directories
:return list of NpmArchiveType values
"""
results = []
for repo in repos:
result = detect_npm_archive(repo)
results.append(result)

return results


def download_archive(url: str, base_dir=None) -> str:
dir_ = base_dir
if not dir_ or not os.path.isdir(dir_):
Expand Down
Loading