Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ RUN if [ "$DOTNET_VERSION" = "6" ]; then \
fi

# Install additional tools
RUN npm install @coana-tech/cli -g && \
RUN npm install @coana-tech/cli socket -g && \
gem install bundler && \
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y && \
. ~/.cargo/env && \
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "hatchling.build"

[project]
name = "socketsecurity"
version = "2.2.51"
version = "2.2.55"
requires-python = ">= 3.10"
license = {"file" = "LICENSE"}
dependencies = [
Expand Down
2 changes: 1 addition & 1 deletion socketsecurity/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
__author__ = 'socket.dev'
__version__ = '2.2.51'
__version__ = '2.2.55'
USER_AGENT = f'SocketPythonCLI/{__version__}'
15 changes: 15 additions & 0 deletions socketsecurity/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ class CliConfig:
reach_additional_params: Optional[List[str]] = None
only_facts_file: bool = False
reach_use_only_pregenerated_sboms: bool = False
max_purl_batch_size: int = 5000

@classmethod
def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig':
Expand Down Expand Up @@ -106,6 +107,7 @@ def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig':
'commit_sha': args.commit_sha,
'generate_license': args.generate_license,
'enable_debug': args.enable_debug,
'enable_diff': args.enable_diff,
'allow_unverified': args.allow_unverified,
'enable_json': args.enable_json,
'enable_sarif': args.enable_sarif,
Expand Down Expand Up @@ -141,6 +143,7 @@ def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig':
'reach_additional_params': args.reach_additional_params,
'only_facts_file': args.only_facts_file,
'reach_use_only_pregenerated_sboms': args.reach_use_only_pregenerated_sboms,
'max_purl_batch_size': args.max_purl_batch_size,
'version': __version__
}
try:
Expand Down Expand Up @@ -187,6 +190,11 @@ def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig':
logging.error("--reach-concurrency must be >= 1")
exit(1)

# Validate max_purl_batch_size is within allowed range
if args.max_purl_batch_size < 1 or args.max_purl_batch_size > 9999:
logging.error("--max-purl-batch-size must be between 1 and 9999")
exit(1)

return cls(**config_args)

def to_dict(self) -> dict:
Expand Down Expand Up @@ -446,6 +454,13 @@ def create_argument_parser() -> argparse.ArgumentParser:
action="store_true",
help="Exclude license details from the diff report (boosts performance for large repos)"
)
output_group.add_argument(
"--max-purl-batch-size",
dest="max_purl_batch_size",
type=int,
default=5000,
help="Maximum batch size for PURL endpoint calls when generating license info (default: 5000, min: 1, max: 9999)"
)

output_group.add_argument(
"--disable-security-issue",
Expand Down
126 changes: 54 additions & 72 deletions socketsecurity/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,54 +659,6 @@ def create_full_scan_with_report_url(
# Return result in the format expected by the user
return diff

def check_full_scans_status(self, head_full_scan_id: str, new_full_scan_id: str) -> bool:
is_ready = False
current_timeout = self.config.timeout
self.sdk.set_timeout(0.5)
try:
self.sdk.fullscans.stream(self.config.org_slug, head_full_scan_id)
except Exception:
log.debug(f"Queued up full scan for processing ({head_full_scan_id})")

try:
self.sdk.fullscans.stream(self.config.org_slug, new_full_scan_id)
except Exception:
log.debug(f"Queued up full scan for processing ({new_full_scan_id})")
self.sdk.set_timeout(current_timeout)
start_check = time.time()
head_is_ready = False
new_is_ready = False
while not is_ready:
head_full_scan_metadata = self.sdk.fullscans.metadata(self.config.org_slug, head_full_scan_id)
if head_full_scan_metadata:
head_state = head_full_scan_metadata.get("scan_state")
else:
head_state = None
new_full_scan_metadata = self.sdk.fullscans.metadata(self.config.org_slug, new_full_scan_id)
if new_full_scan_metadata:
new_state = new_full_scan_metadata.get("scan_state")
else:
new_state = None
if head_state and head_state == "resolve":
head_is_ready = True
if new_state and new_state == "resolve":
new_is_ready = True
if head_is_ready and new_is_ready:
is_ready = True
current_time = time.time()
if current_time - start_check >= self.config.timeout:
log.debug(
f"Timeout reached while waiting for full scans to be ready "
f"({head_full_scan_id}, {new_full_scan_id})"
)
break
total_time = time.time() - start_check
if is_ready:
log.info(f"Full scans are ready in {total_time:.2f} seconds")
else:
log.warning(f"Full scans are not ready yet ({head_full_scan_id}, {new_full_scan_id})")
return is_ready

def get_full_scan(self, full_scan_id: str) -> FullScan:
"""
Get a FullScan object for an existing full scan including sbom_artifacts and packages.
Expand Down Expand Up @@ -846,28 +798,54 @@ def update_package_values(pkg: Package) -> Package:
pkg.url += f"/{pkg.name}/overview/{pkg.version}"
return pkg

def get_license_text_via_purl(self, packages: dict[str, Package]) -> dict:
components = []
def get_license_text_via_purl(self, packages: dict[str, Package], batch_size: int = 5000) -> dict:
"""Get license attribution and details via PURL endpoint in batches.

Args:
packages: Dictionary of packages to get license info for
batch_size: Maximum number of packages to process per API call (1-9999)

Returns:
Updated packages dictionary with licenseAttrib and licenseDetails populated
"""
# Validate batch size
batch_size = max(1, min(9999, batch_size))

# Build list of all components
all_components = []
for purl in packages:
full_purl = f"pkg:/{purl}"
components.append({"purl": full_purl})
results = self.sdk.purl.post(
license=True,
components=components,
licenseattrib=True,
licensedetails=True
)
purl_packages = []
for result in results:
ecosystem = result["type"]
name = result["name"]
package_version = result["version"]
licenseDetails = result.get("licenseDetails")
licenseAttrib = result.get("licenseAttrib")
purl = f"{ecosystem}/{name}@{package_version}"
if purl not in purl_packages and purl in packages:
packages[purl].licenseAttrib = licenseAttrib
packages[purl].licenseDetails = licenseDetails
all_components.append({"purl": full_purl})

# Process in batches
total_components = len(all_components)
log.debug(f"Processing {total_components} packages in batches of {batch_size}")

for i in range(0, total_components, batch_size):
batch_components = all_components[i:i + batch_size]
batch_num = (i // batch_size) + 1
total_batches = (total_components + batch_size - 1) // batch_size
log.debug(f"Processing batch {batch_num}/{total_batches} ({len(batch_components)} packages)")

results = self.sdk.purl.post(
license=True,
components=batch_components,
licenseattrib=True,
licensedetails=True
)

purl_packages = []
for result in results:
ecosystem = result["type"]
name = result["name"]
package_version = result["version"]
licenseDetails = result.get("licenseDetails")
licenseAttrib = result.get("licenseAttrib")
purl = f"{ecosystem}/{name}@{package_version}"
if purl not in purl_packages and purl in packages:
packages[purl].licenseAttrib = licenseAttrib
packages[purl].licenseDetails = licenseDetails

return packages

def get_added_and_removed_packages(
Expand Down Expand Up @@ -960,7 +938,14 @@ def get_added_and_removed_packages(
log.error(f"Artifact details - name: {artifact.name}, version: {artifact.version}")
log.error("No matching packages found in head_full_scan")

packages = self.get_license_text_via_purl(packages)
# Only fetch license details if generate_license is enabled
if self.cli_config and self.cli_config.generate_license:
log.debug("Fetching license details via PURL endpoint")
batch_size = self.cli_config.max_purl_batch_size if self.cli_config else 5000
packages = self.get_license_text_via_purl(packages, batch_size=batch_size)
else:
log.debug("Skipping PURL endpoint call (--generate-license not set)")

return added_packages, removed_packages, packages

def create_new_diff(
Expand Down Expand Up @@ -1092,9 +1077,6 @@ def create_new_diff(
log.warning(f"Failed to clean up temporary file {temp_file}: {e}")

# Handle diff generation - now we always have both scans
scans_ready = self.check_full_scans_status(head_full_scan_id, new_full_scan.id)
if scans_ready is False:
log.error(f"Full scans did not complete within {self.config.timeout} seconds")
(
added_packages,
removed_packages,
Expand Down
41 changes: 27 additions & 14 deletions socketsecurity/socketcli.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ def main_code():
# Determine files to check based on the new logic
files_to_check = []
force_api_mode = False
force_diff_mode = False

if files_explicitly_specified:
# Case 2: Files are specified - use them and don't check commit details
Expand All @@ -365,10 +366,21 @@ def main_code():
# Case 1: Files not specified and --ignore-commit-files not set - try to find changed files from commit
files_to_check = git_repo.changed_files
log.debug(f"Using changed files from commit: {files_to_check}")
elif config.ignore_commit_files and is_repo:
# Case 3: Git repo with --ignore-commit-files - force diff mode
files_to_check = []
force_diff_mode = True
log.debug("Git repo with --ignore-commit-files: forcing diff mode")
else:
# ignore_commit_files is set or not a repo - scan everything but force API mode if no supported files
# Case 4: Not a git repo (ignore_commit_files was auto-set to True)
files_to_check = []
log.debug("No files to check from commit (ignore_commit_files=True or not a repo)")
# If --enable-diff is set, force diff mode for non-git repos
log.debug(f"Case 4: Non-git repo - config.enable_diff={config.enable_diff}, type={type(config.enable_diff)}")
if config.enable_diff:
force_diff_mode = True
log.debug("Non-git repo with --enable-diff: forcing diff mode")
else:
log.debug("Non-git repo without --enable-diff: will use full scan mode")

# Check if we have supported manifest files
has_supported_files = files_to_check and core.has_manifest_files(files_to_check)
Expand All @@ -389,22 +401,21 @@ def main_code():
has_supported_files = False

# Case 3: If no supported files or files are empty, force API mode (no PR comments)
if not has_supported_files:
# BUT: Don't force API mode if we're in force_diff_mode
log.debug(f"files_to_check={files_to_check}, has_supported_files={has_supported_files}, force_diff_mode={force_diff_mode}, config.enable_diff={config.enable_diff}")
if not has_supported_files and not force_diff_mode:
force_api_mode = True
log.debug("No supported manifest files found, forcing API mode")
log.debug(f"force_api_mode={force_api_mode}")

# Determine scan behavior
should_skip_scan = False # Always perform scan, but behavior changes based on supported files
if config.ignore_commit_files and not files_explicitly_specified:
# Force full scan when ignoring commit files and no explicit files
should_skip_scan = False
log.debug("Forcing full scan due to ignore_commit_files")
elif not has_supported_files:
# No supported files - still scan but in API mode
if not has_supported_files and not force_diff_mode:
# No supported files and not forcing diff - still scan but in API mode
should_skip_scan = False
log.debug("No supported files but will scan in API mode")
else:
log.debug("Found supported manifest files, proceeding with normal scan")
log.debug("Found supported manifest files or forcing diff mode, proceeding with normal scan")

org_slug = core.config.org_slug
if config.repo_is_public:
Expand Down Expand Up @@ -457,6 +468,7 @@ def main_code():
diff.report_url = ""

# Handle SCM-specific flows
log.debug(f"Flow decision: scm={scm is not None}, force_diff_mode={force_diff_mode}, force_api_mode={force_api_mode}, enable_diff={config.enable_diff}")
if scm is not None and scm.check_event_type() == "comment":
# FIXME: This entire flow should be a separate command called "filter_ignored_alerts_in_comments"
# It's not related to scanning or diff generation - it just:
Expand Down Expand Up @@ -531,14 +543,15 @@ def main_code():

output_handler.handle_output(diff)

elif config.enable_diff and not force_api_mode:
# New logic: --enable-diff forces diff mode even with --integration api (no SCM)
elif (config.enable_diff or force_diff_mode) and not force_api_mode:
# New logic: --enable-diff or force_diff_mode (from --ignore-commit-files in git repos) forces diff mode
log.info("Diff mode enabled without SCM integration")
diff = core.create_new_diff(scan_paths, params, no_change=should_skip_scan, save_files_list_path=config.save_submitted_files_list, save_manifest_tar_path=config.save_manifest_tar, base_paths=base_paths, explicit_files=sbom_files_to_submit)
output_handler.handle_output(diff)

elif config.enable_diff and force_api_mode:
# User requested diff mode but no manifest files were detected
elif (config.enable_diff or force_diff_mode) and force_api_mode:
# User requested diff mode but no manifest files were detected - this should not happen with new logic
# but keeping as a safety net
log.warning("--enable-diff was specified but no supported manifest files were detected in the changed files. Falling back to full scan mode.")
log.info("Creating Socket Report (full scan)")
serializable_params = {
Expand Down
Loading