Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 64 additions & 18 deletions src/rsmetacheck/cli.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,76 @@
import argparse
import os
from pathlib import Path
from rsmetacheck.run_somef import run_somef_batch, run_somef_single, ensure_somef_configured

from rsmetacheck.run_analyzer import run_analysis
from rsmetacheck.run_somef import (
ensure_somef_configured,
run_somef_batch,
run_somef_single,
)


def cli():
parser = argparse.ArgumentParser(description="Detect metadata pitfalls in software repositories using SoMEF.")
parser = argparse.ArgumentParser(
description="Detect metadata pitfalls in software repositories using SoMEF."
)
parser.add_argument(
"--input",
nargs="+",
required=True,
help="One or more: GitHub/GitLab URLs, JSON files containing repositories, OR existing SoMEF output files when using --skip-somef."
help="One or more: GitHub/GitLab URLs, JSON files containing repositories, OR existing SoMEF output files when using --skip-somef.",
)
parser.add_argument(
"--skip-somef",
action="store_true",
help="Skip SoMEF execution and analyze existing SoMEF output files directly. --input should point to SoMEF JSON files."
help="Skip SoMEF execution and analyze existing SoMEF output files directly. --input should point to SoMEF JSON files.",
)
parser.add_argument(
"--pitfalls-output",
default=os.path.join(os.getcwd(), "pitfalls_outputs"),
help="Directory to store pitfall JSON-LD files (default: ./pitfalls_outputs)."
help="Directory to store pitfall JSON-LD files (default: ./pitfalls_outputs).",
)
parser.add_argument(
"--somef-output",
default=os.path.join(os.getcwd(), "somef_outputs"),
help="Directory to store SoMEF output files (default: ./somef_outputs)."
help="Directory to store SoMEF output files (default: ./somef_outputs).",
)
parser.add_argument(
"--analysis-output",
default=os.path.join(os.getcwd(), "analysis_results.json"),
help="File path for summary results (default: ./analysis_results.json)."
help="File path for summary results (default: ./analysis_results.json).",
)
parser.add_argument(
"--threshold",
type=float,
default=0.8,
help="SoMEF confidence threshold (default: 0.8). Only used when running SoMEF."
help="SoMEF confidence threshold (default: 0.8). Only used when running SoMEF.",
)
parser.add_argument(
"-b",
"--branch",
help="Branch of the repository to analyze. Overrides the default branch. Only used when running SoMEF.",
)

parser.add_argument(
"-b", "--branch",
help="Branch of the repository to analyze. Overrides the default branch. Only used when running SoMEF."
"-c",
"--generate-codemeta",
action="store_true",
help="Generate codemeta files for each repository. Only used when running SoMEF.",
)

parser.add_argument(
"--verbose",
action="store_true",
help="Include both detected AND undetected pitfalls in the output JSON-LD."
help="Include both detected AND undetected pitfalls in the output JSON-LD.",
)

args = parser.parse_args()

if args.skip_somef:
print(f"Skipping SoMEF execution. Analyzing {len(args.input)} existing SoMEF output files...")
print(
f"Skipping SoMEF execution. Analyzing {len(args.input)} existing SoMEF output files..."
)

somef_json_paths = []
for json_path in args.input:
Expand All @@ -67,29 +84,58 @@ def cli():
return

print(f"Analyzing {len(somef_json_paths)} SoMEF output files...")
run_analysis(somef_json_paths, args.pitfalls_output, args.analysis_output, verbose=args.verbose)
run_analysis(
somef_json_paths,
args.pitfalls_output,
args.analysis_output,
verbose=args.verbose,
)

else:
ensure_somef_configured()

threshold = args.threshold
somef_output_dir = args.somef_output
generate_codemeta = args.generate_codemeta

print(f"Detected {len(args.input)} input(s):")
if generate_codemeta:
print(
"Codemeta generation is ENABLED. Codemeta files will be created for each repository."
)

for input_item in args.input:
if input_item.startswith("http://") or input_item.startswith("https://"):
print(f"Processing repository URL: {input_item}")
run_somef_single(input_item, somef_output_dir, threshold, branch=args.branch)
run_somef_single(
input_item,
somef_output_dir,
threshold,
branch=args.branch,
generate_codemeta=generate_codemeta,
)
elif os.path.exists(input_item):
print(f"Processing repositories from file: {input_item}")
run_somef_batch(input_item, somef_output_dir, threshold, branch=args.branch)
run_somef_batch(
input_item,
somef_output_dir,
threshold,
branch=args.branch,
generate_codemeta=generate_codemeta,
)
else:
print(f"Warning: Skipping invalid input (not a URL or existing file): {input_item}")
print(
f"Warning: Skipping invalid input (not a URL or existing file): {input_item}"
)

print(f"\nRunning analysis on outputs in {somef_output_dir}...")
run_analysis(somef_output_dir, args.pitfalls_output, args.analysis_output, verbose=args.verbose)
run_analysis(
somef_output_dir,
args.pitfalls_output,
args.analysis_output,
verbose=args.verbose,
)


if __name__ == "__main__":
cli()
cli()
6 changes: 5 additions & 1 deletion src/rsmetacheck/detect_pitfalls_main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
from pathlib import Path
from typing import Iterable, Union
from rsmetacheck.run_somef import CODEMETA_DEFAULT_NAME
from rsmetacheck.utils.pitfall_utils import extract_programming_languages
from rsmetacheck.utils.json_ld_utils import create_pitfall_jsonld, save_individual_pitfall_jsonld
from rsmetacheck.utils.somef_compat import normalize_somef_data
Expand Down Expand Up @@ -467,7 +468,10 @@ def main(input_dir=None, somef_json_paths=None, pitfalls_dir=None, analysis_outp
if not input_dir.exists():
print(f"Error: Directory not found: {input_dir}")
return
json_files = list(input_dir.glob("*.json"))
json_files = [
f for f in input_dir.glob("*.json")
if not f.stem.endswith(CODEMETA_DEFAULT_NAME)
]
print(f"Found {len(json_files)} JSON files in {input_dir}")
else:
print("Error: No input directory or JSON file list provided.")
Expand Down
61 changes: 53 additions & 8 deletions src/rsmetacheck/run_somef.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import os
import json
import os
import subprocess

from pathlib import Path

CODEMETA_DEFAULT_NAME = "somef_generated_codemeta"


def ensure_somef_configured():
"""Run 'somef configure -a' only if it hasn't been configured yet."""
config_file = Path.home() / ".somef" / "config.json"
Expand All @@ -18,11 +20,14 @@ def ensure_somef_configured():
return False
return True

def run_somef(repo_url, output_file, threshold, branch=None):

def run_somef(repo_url, output_file, threshold, branch=None, codemeta_file=None):
"""Run SoMEF on a given repository and save results."""
cmd = ["somef", "describe", "-r", repo_url, "-o", output_file, "-t", str(threshold)]
if branch:
cmd.extend(["-b", branch])
if codemeta_file:
cmd.extend(["-c", codemeta_file])
try:
subprocess.run(cmd, check=True)
print(f"SoMEF finished for: {repo_url}")
Expand All @@ -31,16 +36,38 @@ def run_somef(repo_url, output_file, threshold, branch=None):
print(f"Error running SoMEF for {repo_url}: {e}")
return False

def run_somef_single(repo_url, output_dir="somef_outputs", threshold=0.8, branch=None):

def run_somef_single(
repo_url,
output_dir="somef_outputs",
threshold=0.8,
branch=None,
generate_codemeta=False,
):
"""Run SoMEF for a single repository."""
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, "output_1.json")
codemeta_file = os.path.join(output_dir, CODEMETA_DEFAULT_NAME + ".json")

print(f"Running SoMEF for {repo_url}...")
success = run_somef(repo_url, output_file, threshold, branch)

success = run_somef(
repo_url,
output_file,
threshold,
branch,
codemeta_file=codemeta_file if generate_codemeta else None,
)
return output_dir if success else None

def run_somef_batch(json_file, output_dir="somef_outputs", threshold=0.8, branch=None):

def run_somef_batch(
json_file,
output_dir="somef_outputs",
threshold=0.8,
branch=None,
generate_codemeta=False,
):
"""Run SoMEF for all repositories listed in a JSON file."""
os.makedirs(output_dir, exist_ok=True)

Expand All @@ -57,8 +84,26 @@ def run_somef_batch(json_file, output_dir="somef_outputs", threshold=0.8, branch

for idx, repo_url in enumerate(repos, start=1):
output_file = os.path.join(output_dir, f"{base_name}_output_{idx}.json")
codemeta_file = os.path.join(
output_dir, f"{base_name}_{CODEMETA_DEFAULT_NAME}_{idx}.json"
)
print(f"[{idx}/{len(repos)}] {repo_url}")
run_somef(repo_url, output_file, threshold, branch)
run_somef(
repo_url,
output_file,
threshold,
branch,
codemeta_file=codemeta_file if generate_codemeta else None,
)

print(f"Completed SoMEF for {base_name}. Results in {output_dir}")
return True
return True

success = run_somef(
repo_url,
output_file,
threshold,
branch,
codemeta_file=codemeta_file if generate_codemeta else None,
)
return output_dir if success else None
72 changes: 72 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""Unit tests to verify CLI behavior for codemeta generation."""

import importlib
from unittest.mock import MagicMock

cli_module = importlib.import_module("rsmetacheck.cli")


REPO_URL = "https://github.com/SoftwareUnderstanding/sw-metadata-bot"


def test_cli_with_generate_codemeta_adds_codemeta_output(monkeypatch, tmp_path):
"""Ensure --generate-codemeta requests codemeta output in SoMEF command."""
somef_output_dir = tmp_path / "somef_outputs"
expected_codemeta = str(somef_output_dir / "somef_generated_codemeta.json")

run_analysis_mock = MagicMock()
subprocess_run_mock = MagicMock()

monkeypatch.setattr(
"sys.argv",
[
"rsmetacheck",
"--input",
REPO_URL,
"--somef-output",
str(somef_output_dir),
"--generate-codemeta",
],
)
monkeypatch.setattr(cli_module, "ensure_somef_configured", lambda: True)
monkeypatch.setattr(cli_module, "run_analysis", run_analysis_mock)
monkeypatch.setattr("rsmetacheck.run_somef.subprocess.run", subprocess_run_mock)

cli_module.cli()

command = subprocess_run_mock.call_args.args[0]
assert command[0:2] == ["somef", "describe"]
assert "-c" in command
assert expected_codemeta in command

run_analysis_mock.assert_called_once()


def test_cli_without_generate_codemeta_keeps_default_behavior(monkeypatch, tmp_path):
"""Ensure default CLI call does not request codemeta output from SoMEF."""
somef_output_dir = tmp_path / "somef_outputs"

run_analysis_mock = MagicMock()
subprocess_run_mock = MagicMock()

monkeypatch.setattr(
"sys.argv",
[
"rsmetacheck",
"--input",
REPO_URL,
"--somef-output",
str(somef_output_dir),
],
)
monkeypatch.setattr(cli_module, "ensure_somef_configured", lambda: True)
monkeypatch.setattr(cli_module, "run_analysis", run_analysis_mock)
monkeypatch.setattr("rsmetacheck.run_somef.subprocess.run", subprocess_run_mock)

cli_module.cli()

command = subprocess_run_mock.call_args.args[0]
assert command[0:2] == ["somef", "describe"]
assert "-c" not in command

run_analysis_mock.assert_called_once()
Loading