POUTINE _____________________________________________________________________________________________

./poutine analyze_local . --format json > tools_output/poutine/findings.json

In [12]:
import json
from pathlib import Path
from collections import defaultdict

# Input and output paths
input_file = Path("../tools_output/poutine/findings.json")
output_dir = Path("../tools_output/poutine/workflow_with_issues")
output_dir.mkdir(parents=True, exist_ok=True)

# Load raw findings
with open(input_file, "r", encoding="utf-8") as f:
    data = json.load(f)

all_findings = data.get("findings", [])

# Group findings by workflow file
grouped = defaultdict(list)
for finding in all_findings:
    meta = finding.get("meta", {})
    path = meta.get("path")
    if not path:
        continue

    workflow_file = Path(path).name

    # Extract fields exactly as reported
    grouped[workflow_file].append({
        "rule_id": finding.get("rule_id", ""),
        "line": str(meta["line"]) if "line" in meta else "",
        "column": "",  # poutine does not give column
        "job": meta.get("job", ""),
        "step": meta.get("step", ""),
        "details": meta.get("details", ""),
        "event_triggers": meta.get("event_triggers", []),
    })

# Save per-workflow JSON files for workflows with issues
saved_files = 0
for workflow_file, findings in grouped.items():
    rule_summary = {}
    for f in findings:
        rule_id = f.get("rule_id")
        if rule_id:
            rule_summary[rule_id] = rule_summary.get(rule_id, 0) + 1

    output_data = {
        "workflow": workflow_file,
        "tool": "poutine",
        "summary": {
            "total_findings": len(findings),
            "by_rule": rule_summary
        },
        "findings": findings
    }

    output_path = output_dir / f"{workflow_file}.json"
    with open(output_path, "w", encoding="utf-8") as f_out:
        json.dump(output_data, f_out, indent=2)
    saved_files += 1

print(f"Saved {saved_files} workflow files to {output_dir}")


Saved 91 workflow files to ../tools_output/poutine/workflow_with_issues


In [13]:
import json
from pathlib import Path

with open("../tools_output/poutine/findings.json") as f:
    data = json.load(f)

for f in data["findings"]:
    if f["rule_id"] == "default_permissions_on_risky_events":
        print(f)


{'meta': {'event_triggers': ['issue_comment'], 'path': '.github/workflows/electron_electron__issue-commented.yml'}, 'purl': 'pkg:github/madjda32-del/github-actions-security', 'rule_id': 'default_permissions_on_risky_events'}
{'meta': {'event_triggers': ['issue_comment'], 'path': '.github/workflows/excalidraw_excalidraw__autorelease-preview.yml'}, 'purl': 'pkg:github/madjda32-del/github-actions-security', 'rule_id': 'default_permissions_on_risky_events'}
{'meta': {'event_triggers': ['issue_comment'], 'path': '.github/workflows/facebook_facebook-android-sdk__needs-attention.yml'}, 'purl': 'pkg:github/madjda32-del/github-actions-security', 'rule_id': 'default_permissions_on_risky_events'}
{'meta': {'event_triggers': ['issue_comment'], 'path': '.github/workflows/facebook_facebook-ios-sdk__needs-attention.yml'}, 'purl': 'pkg:github/madjda32-del/github-actions-security', 'rule_id': 'default_permissions_on_risky_events'}
{'meta': {'event_triggers': ['issue_comment', 'workflow_dispatch'], 'pat

ACTIONLINT ____________________________________________________________________________________

./actionlint .github/workflows/*.yml > tools_output/actionlint/findings.txt

In [1]:
import os
import re
import json
import subprocess
from pathlib import Path
from collections import defaultdict

# Paths
WORKFLOW_DIR = "../.github/workflows"
ACTIONLINT = "./../../tools/actionlint/actionlint"
OUTPUT_DIR = Path("../tools_output/actionlint/workflow_with_issues")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

total_files_with_findings = 0

for file in sorted(os.listdir(WORKFLOW_DIR)):
    if file.endswith((".yml", ".yaml")):
        file_path = os.path.join(WORKFLOW_DIR, file)

        # Run actionlint on a single file
        try:
            completed = subprocess.run(
                [ACTIONLINT, file_path],
                capture_output=True,
                text=True
            )
            stdout = completed.stdout.strip()
            if not stdout:
                continue  # No findings, skip

            findings = []
            for line in stdout.splitlines():
                # Example: ../.github/workflows/foo.yml:8:1: Some issue
                match = re.match(rf".+?{re.escape(file)}:(\d+):(\d+):\s+(.*)", line)
                if match:
                    line_num, col_num, message = match.groups()
                    findings.append({
                        "line": int(line_num),
                        "column": int(col_num),
                        "message": message.strip()
                    })

            if findings:
                output_data = {
                    "workflow": file,
                    "tool": "actionlint",
                    "summary": {
                        "total_findings": len(findings)
                    },
                    "findings": findings
                }

                output_path = OUTPUT_DIR / f"{file}.json"
                with open(output_path, "w", encoding="utf-8") as f_out:
                    json.dump(output_data, f_out, indent=2)
                total_files_with_findings += 1

        except Exception as e:
            print(f"Error processing {file}: {e}")

print(f"Saved findings for {total_files_with_findings} workflow files to {OUTPUT_DIR}")


Saved findings for 162 workflow files to ../tools_output/actionlint/workflow_with_issues


FRIZBEE _____________________________________________________________________________

./frizbee actions -n > tools_output/frizbee/findings.txt

In [72]:
from pathlib import Path

# Input folder and output file
input_dir = Path("../tools_output/frizbee")
output_file = Path("../tools_output/frizbee/findings.txt")

# Collect all unique lines from all txt files
all_lines = set()
for file_path in sorted(input_dir.glob("*.txt")):
    with open(file_path, "r", encoding="utf-8") as f:
        for line in f:
            all_lines.add(line.rstrip())

# Write the unified lines to findings.txt
with open(output_file, "w", encoding="utf-8") as f_out:
    for line in sorted(all_lines):
        f_out.write(line + "\n")

print(f"[‚úì] Union completed. {len(all_lines)} unique lines saved to {output_file}")


[‚úì] Union completed. 369 unique lines saved to ../tools_output/frizbee/findings.txt


In [42]:
import re
import json
from pathlib import Path

# Paths
original_dir = Path("../.github/workflows")
modified_dir = Path("../tools_output/frizbee/modified_workflows")
output_dir = Path("../tools_output/frizbee/workflow_with_issues")
output_dir.mkdir(parents=True, exist_ok=True)

# Pattern to detect pinned actions with hash and version comment
pattern = re.compile(r"uses:\s+([\w\-./]+)@([a-f0-9]{10,})\s+#\s*(\S+)", re.IGNORECASE)

# Gather workflow names directly from modified_workflows directory
modified_files = [p.name for p in modified_dir.glob("*.yml")] + [p.name for p in modified_dir.glob("*.yaml")]

saved = 0

for wf_name in modified_files:
    original_path = original_dir / wf_name
    modified_path = modified_dir / wf_name

    if not original_path.exists() or not modified_path.exists():
        print(f"[!] Skipped: {wf_name} (missing original or modified)")
        continue

    original_lines = original_path.read_text(encoding="utf-8").splitlines()
    modified_lines = modified_path.read_text(encoding="utf-8").splitlines()

    findings = []

    # Compare line by line
    for i, (orig, mod) in enumerate(zip(original_lines, modified_lines)):
        match = pattern.search(mod)
        if match:
            action, sha, tag = match.groups()
            expected_unpinned = f"{action}@{tag}"
            actual_pinned = f"{action}@{sha}"
            if expected_unpinned in orig:
                findings.append({
                    "rule": "unpinned-github-actions",
                    "original": expected_unpinned,
                    "pinned": actual_pinned,
                    "line": i + 1
                })

    if not findings:
        continue

    summary = {
        "total_findings": len(findings),
        "by_rule": {"unpinned-github-actions": len(findings)}
    }

    result = {
        "workflow": wf_name,
        "tool": "frizbee",
        "summary": summary,
        "findings": findings
    }

    with open(output_dir / f"{wf_name}.json", "w", encoding="utf-8") as out_f:
        json.dump(result, out_f, indent=2)
        saved += 1

print(f"Saved {saved} normalized workflow result files to {output_dir}")


Saved 384 normalized workflow result files to ../tools_output/frizbee/workflow_with_issues


SCHARF _______________________________________________________________________________

./scharf audit . > tools_output/scharf/findings.txt

In [5]:
import re
import json
from pathlib import Path
from collections import defaultdict

# Input/output paths
input_file = Path("../tools_output/scharf/findings.txt")
output_dir = Path("../tools_output/scharf/workflow_with_issues")
output_dir.mkdir(parents=True, exist_ok=True)

# Regex to remove ANSI escape codes
ansi_escape = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")

# Read and clean lines
lines = []
with open(input_file, "r", encoding="utf-8") as f:
    for raw in f:
        clean = ansi_escape.sub("", raw).strip()
        if clean:
            lines.append(clean)

# Initialize parsing
grouped = defaultdict(list)
current_workflow = None

# Go through each line
for line in lines:
    # Detect workflow line
    if line.endswith(".yml") or line.endswith(".yaml"):
        current_workflow = Path(line).name
        continue

    # Detect and parse issue line (new format)
    if current_workflow and "[Line" in line:
        match = re.search(r"\[Line (\d+), Col (\d+)\] (.+)", line)
        if match:
            line_num, col_num, message = match.groups()
            grouped[current_workflow].append({
                "line": int(line_num),
                "column": int(col_num),
                "message": message
            })
        else:
            print(f"[NO MATCH] {line}")

# Save per-workflow JSONs
for workflow_file, findings in grouped.items():
    output_data = {
        "workflow": workflow_file,
        "tool": "scharf",
        "summary": {
            "total_findings": len(findings),
            "by_rule": {
                "unpinned-github-actions": len(findings)
            }
        },
        "findings": findings
    }

    out_path = output_dir / f"{workflow_file}.json"
    with open(out_path, "w", encoding="utf-8") as f_out:
        json.dump(output_data, f_out, indent=2)

print(f"Saved {len(grouped)} workflow result files to {output_dir}")


Saved 402 workflow result files to ../tools_output/scharf/workflow_with_issues


PINNY ___________________________________________________________________________

./pinny actions pin --dry-run > tools_output/pinny/findings.txt

In [8]:
import json
from pathlib import Path
from collections import defaultdict

# Paths
FINDINGS_FILE = Path("../tools_output/pinny/findings.txt")
WORKFLOWS_DIR = Path("../.github/workflows")
OUTPUT_DIR = Path("../tools_output/pinny/workflow_with_issues")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Step 1: Extract all unpinned actions from findings.txt
unresolved_refs = set()
with open(FINDINGS_FILE, "r", encoding="utf-8") as f:
    for line in f:
        if "Branch references are being used" in line or "No exact match found for ref" in line:
            parts = line.strip().split(":")
            if parts:
                action = parts[-1].strip().strip("`")
                if action:
                    unresolved_refs.add(action)

print(f"Found {len(unresolved_refs)} unpinned actions in findings.txt")

# Step 2: Search these refs in all workflow files
findings_by_file = defaultdict(list)

yml_files = list(WORKFLOWS_DIR.glob("*.yml")) + list(WORKFLOWS_DIR.glob("*.yaml"))
for yml_file in yml_files:
    with open(yml_file, "r", encoding="utf-8") as f:
        lines = f.readlines()

    for idx, line in enumerate(lines, 1):
        for ref in unresolved_refs:
            if ref in line:
                findings_by_file[yml_file.name].append({
                    "rule_id": "unpinned-github-actions",
                    "meta": {
                        "action": ref,
                        "line_snippet": line.strip(),
                        "line_number": idx
                    }
                })

# Step 3: Save per-workflow normalized output
for wf_file, findings in findings_by_file.items():
    summary = {
        "total_findings": len(findings),
        "by_rule": {"unpinned-github-actions": len(findings)}
    }

    result = {
        "workflow": wf_file,
        "tool": "pinny",
        "summary": summary,
        "findings": findings
    }

    with open(OUTPUT_DIR / f"{wf_file}.json", "w", encoding="utf-8") as f_out:
        json.dump(result, f_out, indent=2)

print(f"Saved {len(findings_by_file)} workflow result files to {OUTPUT_DIR}")


Found 18 unpinned actions in findings.txt
Saved 30 workflow result files to ../tools_output/pinny/workflow_with_issues


ZIZMOR ________________________________________________________________________________

./zizmor .github/workflows > tools_output/zizmor/findings.txt

In [8]:
import os
import subprocess
from pathlib import Path

WORKFLOW_DIR = "../.github/workflows"
ZIZMOR = "./../../tools/zizmor/zizmor"
RAW_DIR = Path("../tools_output/zizmor/raw_logs")
RAW_DIR.mkdir(parents=True, exist_ok=True)

for file in sorted(os.listdir(WORKFLOW_DIR)):
    if not file.endswith((".yml", ".yaml")):
        continue

    file_path = os.path.join(WORKFLOW_DIR, file)
    out_path = RAW_DIR / f"{file}.txt"

    try:
        with open(out_path, "w", encoding="utf-8") as out:
            subprocess.run(
                [ZIZMOR, file_path],
                stdout=out,
                stderr=subprocess.DEVNULL,
                text=True
            )
        print(f"‚úÖ Saved findings: {file}")
    except Exception as e:
        print(f"‚ùå Failed on {file}: {e}")


‚úÖ Saved findings: AUTOMATIC1111_stable-diffusion-webui__on_pull_request.yaml
‚úÖ Saved findings: AUTOMATIC1111_stable-diffusion-webui__run_tests.yaml
‚úÖ Saved findings: AUTOMATIC1111_stable-diffusion-webui__warns_merge_master.yml
‚úÖ Saved findings: Chalarangelo_30-seconds-of-code__deploy-production.yml
‚úÖ Saved findings: Chalarangelo_30-seconds-of-code__label.yml
‚úÖ Saved findings: Chalarangelo_30-seconds-of-code__stale.yml
‚úÖ Saved findings: Chalarangelo_30-seconds-of-code__test.yml
‚úÖ Saved findings: EbookFoundation_free-programming-books__check-urls.yml
‚úÖ Saved findings: EbookFoundation_free-programming-books__comment-pr.yml
‚úÖ Saved findings: EbookFoundation_free-programming-books__detect-conflicting-prs.yml
‚úÖ Saved findings: EbookFoundation_free-programming-books__fpb-lint.yml
‚úÖ Saved findings: EbookFoundation_free-programming-books__issues-pinner.yml
‚úÖ Saved findings: EbookFoundation_free-programming-books__rtl-ltr-linter.yml
‚úÖ Saved findings: EbookFoundation_f

In [6]:
import re
import json
from pathlib import Path

# Paths
input_dir = Path("../tools_output/zizmor/raw_logs")
output_dir = Path("../tools_output/zizmor/workflow_with_issues")
output_dir.mkdir(parents=True, exist_ok=True)

# Regex patterns
ansi_escape = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")  # standard ANSI colors
osc8_link = re.compile(r"\x1b]8;;[^\x1b]*\x1b\\(.*?)\x1b]8;;\x1b\\")  # hyperlinks
finding_pattern = re.compile(r"(info|warning|error)\[(.+?)\]: (.+)")
# FIXED: allow optional "../" before .github
location_pattern = re.compile(r"-->\s+(?:\.\./)?\.github/workflows/(.+?):(\d+):(\d+)")
note_pattern = re.compile(r"= note: (.+)", re.IGNORECASE)

saved_files = 0

for input_file in input_dir.glob("*.txt"):
    findings = []
    current = {}
    workflow_file = input_file.stem

    with open(input_file, encoding="utf-8") as f:
        raw_lines = f.readlines()

    # Strip ANSI & hyperlinks
    clean_lines = []
    for line in raw_lines:
        line = ansi_escape.sub("", line)
        line = osc8_link.sub(r"\1", line)
        clean_lines.append(line.strip())

    if any("No findings to report" in line for line in clean_lines):
        continue

    for line in clean_lines:
        if not line:
            continue

        m = finding_pattern.match(line)
        if m:
            if current.get("rule_id"):
                findings.append(current)

            level, rule_id, message = m.groups()
            rule_id = osc8_link.sub(r"\1", rule_id)
            current = {
                "rule_id": rule_id.strip(),
                "level": level,
                "message": message.strip(),
                "workflow": workflow_file,
                "line": None,
                "column": None,
                "note": None
            }
            continue

        m = location_pattern.match(line)
        if m:
            _, line_num, col_num = m.groups()
            current["line"] = int(line_num)
            current["column"] = int(col_num)
            continue

        m = note_pattern.match(line)
        if m:
            current["note"] = m.group(1).strip()
            continue

    if current.get("rule_id"):
        findings.append(current)

    if not findings:
        continue

    rule_summary = {}
    for f in findings:
        rule_summary[f["rule_id"]] = rule_summary.get(f["rule_id"], 0) + 1

    result = {
        "workflow": workflow_file,
        "tool": "zizmor",
        "summary": {
            "total_findings": len(findings),
            "by_rule": rule_summary
        },
        "findings": findings
    }

    out_path = output_dir / f"{workflow_file}.json"
    with open(out_path, "w", encoding="utf-8") as out:
        json.dump(result, out, indent=2)

    saved_files += 1
    print(f"‚úÖ {workflow_file} ‚Üí {len(findings)} findings")

print(f"\n‚úÖ Parsed {saved_files} workflow result files to {output_dir}")


‚úÖ huggingface_transformers__self-comment-ci.yml ‚Üí 36 findings
‚úÖ open-webui_open-webui__release-pypi.yml ‚Üí 2 findings
‚úÖ excalidraw_excalidraw__sentry-production.yml ‚Üí 2 findings
‚úÖ oracle_opengrok__apiary.yml ‚Üí 2 findings
‚úÖ alibaba_ice__release.yml ‚Üí 4 findings
‚úÖ f_awesome-chatgpt-prompts__auto_commands.yml ‚Üí 1 findings
‚úÖ axios_axios__pr-guard.yml ‚Üí 2 findings
‚úÖ nodejs_node__comment-labeled.yml ‚Üí 1 findings
‚úÖ oracle_graal__micronaut.yml ‚Üí 2 findings
‚úÖ Significant-Gravitas_AutoGPT__classic-autogpt-docker-ci.yml ‚Üí 10 findings
‚úÖ awslabs_amazon-ecr-credential-helper__build.yaml ‚Üí 8 findings
‚úÖ facebook_mcrouter__build.yml ‚Üí 2 findings
‚úÖ n8n-io_n8n__benchmark-nightly.yml ‚Üí 1 findings
‚úÖ ossu_computer-science__delete-empty-issues.yml ‚Üí 2 findings
‚úÖ freeCodeCamp_freeCodeCamp__github-no-web-commits.yml ‚Üí 2 findings
‚úÖ oracle_graal__main.yml ‚Üí 4 findings
‚úÖ facebook_react-native__create-release.yml ‚Üí 4 findings
‚úÖ vercel_next.js__rs

SCORECARD ___________________________________________________________________________________

./scorecard --local=. > tools_output/scorecard/findings.txt

In [4]:
import os
import shutil
import subprocess
import json
from pathlib import Path
from tqdm import tqdm

# Paths
WORKFLOW_DIR = Path("../.github/workflows")
TEMP_REPO_DIR = Path("scorecard_tmp_repo")
FINDINGS_PATH = Path("../tools_output/scorecard/findings_all.json")
FINDINGS_PATH.parent.mkdir(parents=True, exist_ok=True)

# Clean previous run
if TEMP_REPO_DIR.exists():
    shutil.rmtree(TEMP_REPO_DIR)
TEMP_REPO_DIR.mkdir(parents=True)
(TEMP_REPO_DIR / ".github" / "workflows").mkdir(parents=True, exist_ok=True)

all_workflow_results = []

yml_files = list(WORKFLOW_DIR.glob("*.yml")) + list(WORKFLOW_DIR.glob("*.yaml"))
print(f"Running Scorecard simulation on {len(yml_files)} workflows...")

for wf in tqdm(yml_files, desc="Simulating"):
    # Clean temp workflows folder
    temp_wf_dir = TEMP_REPO_DIR / ".github" / "workflows"
    for f in temp_wf_dir.glob("*"):
        f.unlink()

    # Copy workflow into temp repo
    temp_wf_path = temp_wf_dir / wf.name
    shutil.copy(wf, temp_wf_path)

    # Run Scorecard
    result = subprocess.run(
        ["../../tools/scorecard/scorecard", f"--local={TEMP_REPO_DIR}", "--show-details", "--format=json"],
        capture_output=True,
        text=True
    )

    try:
        json_data = json.loads(result.stdout)
    except json.JSONDecodeError:
        continue

    json_data["workflow"] = wf.name
    all_workflow_results.append(json_data)

# Save all results to a single JSON file
with open(FINDINGS_PATH, "w", encoding="utf-8") as f:
    json.dump(all_workflow_results, f, indent=2)

print(f"[‚úì] All scorecard workflow results saved to: {FINDINGS_PATH}")


Running Scorecard simulation on 596 workflows...


Simulating:   0%|          | 0/596 [00:00<?, ?it/s]

Simulating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 596/596 [03:31<00:00,  2.82it/s]

[‚úì] All scorecard workflow results saved to: ../tools_output/scorecard/findings_all.json





In [5]:
import json
import re
from pathlib import Path
from collections import defaultdict

# Paths
input_file = Path("../tools_output/scorecard/findings_all.json")
output_dir = Path("../tools_output/scorecard/workflow_with_issues1")
output_dir.mkdir(parents=True, exist_ok=True)

# Relevant rules
relevant_rules = {
    "Dangerous-Workflow",
    "Pinned-Dependencies",
    "Token-Permissions",
    "SAST",
}

# Regex to capture line numbers like ":171"
line_regex = re.compile(r":(\d+)\b")

with open(input_file, "r", encoding="utf-8") as f:
    workflows_data = json.load(f)

saved = 0
for wf in workflows_data:
    workflow_name = wf.get("workflow")
    checks = wf.get("checks", [])
    findings = []

    for check in checks:
        name = check.get("name")
        score = check.get("score")
        if name in relevant_rules and score is not None and score < 10 and score != -1:
            raw_details = check.get("details") or []
            affected_lines = []
            for detail in raw_details:
                for m in line_regex.finditer(detail):
                    affected_lines.append(int(m.group(1)))

            findings.append({
                "rule": name,
                "score": score,
                "reason": check.get("reason"),
                "lines": sorted(set(affected_lines)),
                "raw_details": raw_details,
                "documentation": check.get("documentation", {})
            })

    if not findings:
        continue

    by_rule = defaultdict(int)
    for f in findings:
        by_rule[f["rule"]] += 1

    result = {
        "workflow": workflow_name,
        "tool": "scorecard",
        "summary": {
            "total_findings": len(findings),
            "by_rule": dict(by_rule)
        },
        "findings": findings
    }

    out_path = output_dir / f"{workflow_name}.json"
    with open(out_path, "w", encoding="utf-8") as f_out:
        json.dump(result, f_out, indent=2)
        saved += 1

print(f"Saved {saved} workflow files to {output_dir}")


Saved 593 workflow files to ../tools_output/scorecard/workflow_with_issues1


SEMGREP _________________________________________________________________________________________

semgrep --config p/github-actions .github/workflows --json > tools_output/semgrep/findings.json

In [88]:
import json
from pathlib import Path
from collections import defaultdict

# Input and output paths
input_file = Path("../tools_output/semgrep/findings.json")
workflow_output_dir = Path("../tools_output/semgrep/workflow_with_issues")
workflow_output_dir.mkdir(parents=True, exist_ok=True)

# Load Semgrep findings JSON
with open(input_file, "r", encoding="utf-8") as f:
    data = json.load(f)

# Group findings by file (workflow)
grouped = defaultdict(list)
for finding in data.get("results", []):
    path = Path(finding.get("path", ""))
    if not path.name:
        continue

    workflow_name = path.name
    grouped[workflow_name].append(finding)

# Normalize and save
for wf_name, findings in grouped.items():
    structured = []
    rule_counts = defaultdict(int)

    for f in findings:
        rule_id = f.get("check_id", "unknown").split('.')[-1]
        rule_counts[rule_id] += 1

        structured.append({
            "rule": rule_id,
            "line": f.get("start", {}).get("line"),
            "code": f.get("extra", {}).get("lines"),
            "note": f.get("extra", {}).get("message"),
            "documentation": f.get("extra", {}).get("shortlink")
        })

    output_data = {
        "workflow": wf_name,
        "tool": "semgrep",
        "summary": {
            "total_findings": len(structured),
            "by_rule": dict(rule_counts)
        },
        "findings": structured
    }

    with open(workflow_output_dir / f"{wf_name}.json", "w", encoding="utf-8") as f_out:
        json.dump(output_data, f_out, indent=2)

print(f"[‚úì] Saved {len(list(workflow_output_dir.glob('*.json')))} normalized workflow result files to {workflow_output_dir}")


[‚úì] Saved 55 normalized workflow result files to ../tools_output/semgrep/workflow_with_issues


GGSHIELD _______________________________________________________________________________________

./ggshield secret scan --all-secrets path .github/workflows/ --recursive --json > tools_output/ggshield/findings.json

In [11]:
import json
from pathlib import Path

# Paths
input_path = Path("../tools_output/ggshield/findings.json")
output_dir = Path("../tools_output/ggshield/workflow_with_issues")
output_dir.mkdir(parents=True, exist_ok=True)

# Load JSON data
with open(input_path, "r", encoding="utf-8") as f:
    data = json.load(f)

saved = 0

# Extract and normalize
for entity in data.get("entities_with_incidents", []):
    incidents = entity.get("incidents", [])
    if not incidents:
        continue

    filename = Path(entity["filename"]).name
    findings = []

    for inc in incidents:
        rule = inc.get("policy", "secret-detected")
        documentation = inc.get("detector_documentation")
        note = inc.get("type")

        for occ in inc.get("occurrences", []):
            findings.append({
                "rule": rule,
                "line": occ.get("line_start"),
                "code": occ.get("match"),
                "note": note,
                "documentation": documentation
            })

    if not findings:
        continue

    # Build summary by rule
    by_rule = {}
    for finding in findings:
        r = finding["rule"]
        by_rule[r] = by_rule.get(r, 0) + 1

    result = {
        "workflow": filename,
        "tool": "ggshield",
        "summary": {
            "total_findings": len(findings),
            "by_rule": by_rule
        },
        "findings": findings
    }

    with open(output_dir / f"{filename}.json", "w", encoding="utf-8") as out_f:
        json.dump(result, out_f, indent=2)
        saved += 1

print(f"[‚úì] Saved {saved} normalized workflow result files to {output_dir}")


[‚úì] Saved 1 normalized workflow result files to ../tools_output/ggshield/workflow_with_issues


In [27]:
import subprocess
import csv
from pathlib import Path

# Setup
TOOL = "ggshield"
WORKFLOW_DIR = Path("../.github/workflows").resolve()
GGSHIELD_PATH = Path("../../tools/ggshield/ggshield").resolve()
OUTPUT_CSV = f"{TOOL}_issue_status.csv"

results = []
issue_count = 0

workflow_files = list(WORKFLOW_DIR.glob("*.yml")) + list(WORKFLOW_DIR.glob("*.yaml"))
print(f"[INFO] Found {len(workflow_files)} workflow files.\n")

for wf in sorted(workflow_files):
    print(f"[‚ñ∂] Scanning: {wf.name}")

    try:
        completed = subprocess.run(
            [str(GGSHIELD_PATH), "secret", "scan", "--all-secrets", "path", str(wf)],
            capture_output=True,
            text=True
        )

        stdout = completed.stdout.strip()
        stderr = completed.stderr.strip()

        if "> " in stdout and "secret detected" in stdout:
            status = "issue"
            issue_count += 1
        else:
            status = "clean"

        results.append((wf.name, status))
        print(f"    [‚úì] {status.upper()}")

    except Exception as e:
        print(f"    [‚úó] Error on {wf.name}: {e}")
        results.append((wf.name, "error"))

# Save to CSV
with open(OUTPUT_CSV, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["file_name", "status"])
    writer.writerows(results)

print(f"\n[‚úÖ] Total workflows with issues: {issue_count}")
print(f"[üìÅ] CSV saved to: {OUTPUT_CSV}")


[INFO] Found 596 workflow files.

[‚ñ∂] Scanning: AUTOMATIC1111_stable-diffusion-webui__on_pull_request.yaml
    [‚úì] CLEAN
[‚ñ∂] Scanning: AUTOMATIC1111_stable-diffusion-webui__run_tests.yaml
    [‚úì] CLEAN
[‚ñ∂] Scanning: AUTOMATIC1111_stable-diffusion-webui__warns_merge_master.yml
    [‚úì] CLEAN
[‚ñ∂] Scanning: Chalarangelo_30-seconds-of-code__deploy-production.yml
    [‚úì] CLEAN
[‚ñ∂] Scanning: Chalarangelo_30-seconds-of-code__label.yml
    [‚úì] CLEAN
[‚ñ∂] Scanning: Chalarangelo_30-seconds-of-code__stale.yml
    [‚úì] CLEAN
[‚ñ∂] Scanning: Chalarangelo_30-seconds-of-code__test.yml
    [‚úì] CLEAN
[‚ñ∂] Scanning: EbookFoundation_free-programming-books__check-urls.yml
    [‚úì] CLEAN
[‚ñ∂] Scanning: EbookFoundation_free-programming-books__comment-pr.yml
    [‚úì] CLEAN
[‚ñ∂] Scanning: EbookFoundation_free-programming-books__detect-conflicting-prs.yml
    [‚úì] CLEAN
[‚ñ∂] Scanning: EbookFoundation_free-programming-books__fpb-lint.yml
    [‚úì] CLEAN
[‚ñ∂] Scanning: EbookFoundat

In [40]:
import re
import json
from pathlib import Path

modified_dir = Path("../tools_output/pinny/modified_workflow")
output_dir = Path("../tools_output/pinny/workflow_with_issues")
output_dir.mkdir(parents=True, exist_ok=True)

sha_pattern = re.compile(r"@([0-9a-fA-F]{7,40})")

saved = 0
skipped = []

files = list(modified_dir.glob("*.yml")) + list(modified_dir.glob("*.yaml"))

for modified_path in files:
    modified_lines = modified_path.read_text(encoding="utf-8").splitlines()
    findings = []

    for i, line in enumerate(modified_lines, start=1):
        if "uses:" in line and "@" in line:
            sha_match = sha_pattern.search(line)
            if sha_match:
                findings.append({
                    "rule": "unpinned-github-actions",
                    "pinned": line.strip(),
                    "line": i
                })

    if findings:
        # Normalize workflow name to always end with .yml
        workflow_name = modified_path.name
        if workflow_name.endswith(".yaml"):
            workflow_name = workflow_name[:-5] + ".yml"

        result = {
            "workflow": workflow_name,
            "tool": "pinny",
            "summary": {
                "total_findings": len(findings),
                "by_rule": {"unpinned-github-actions": len(findings)}
            },
            "findings": findings
        }

        # Save file with workflow_name + ".json"
        save_name = workflow_name + ".json"
        with open(output_dir / save_name, "w", encoding="utf-8") as out_f:
            json.dump(result, out_f, indent=2)

        saved += 1
    else:
        skipped.append(modified_path.name)

print(f"Saved {saved} normalized workflow result files to {output_dir}")
print(f"Skipped {len(skipped)} workflows with no findings:")
for name in skipped[:20]:
    print(f" - {name}")


Saved 544 normalized workflow result files to ../tools_output/pinny/workflow_with_issues
Skipped 41 workflows with no findings:
 - electron_electron__pipeline-electron-build-and-test-and-nan.yml
 - huggingface_transformers__self-past-caller.yml
 - Significant-Gravitas_AutoGPT__repo-pr-enforce-base-branch.yml
 - huggingface_transformers__self-push-amd-mi210-caller.yml
 - huggingface_transformers__self-scheduled-amd-caller.yml
 - facebook_react__shared_cleanup_stale_branch_caches.yml
 - microsoft_PowerToys__package-submissions.yml
 - nodejs_node__update-release-links.yml
 - microsoft_TypeScript__pr-modified-files.yml
 - IBM_sarama__fvt-main.yml
 - Significant-Gravitas_AutoGPT__repo-stats.yml
 - electron_electron__pipeline-electron-build-and-test.yml
 - vercel_next.js__rspack-nextjs-dev-integration-tests.yml
 - n8n-io_n8n__test-workflows-nightly.yml
 - nodejs_node__label-flaky-test-issue.yml
 - huggingface_transformers__self-nightly-caller.yml
 - vercel_next.js__turbopack-nextjs-build-int