From 1d6b02b083d1808c8d5f15ce3f0bf3eaefc92da7 Mon Sep 17 00:00:00 2001 From: Jeffrin Date: Sat, 28 Mar 2026 16:38:05 +0530 Subject: [PATCH] Fix detect-secrets API usage for adhoc text scanning --- MANIFEST.in | 3 +++ pyproject.toml | 12 ++++++++++++ shadowaudit/cli.py | 12 ++++++++++++ shadowaudit/core/secrets.py | 15 +++++++-------- tests/test_cli_scan.py | 23 +++++++++++++++++++++++ 5 files changed, 57 insertions(+), 8 deletions(-) create mode 100644 MANIFEST.in create mode 100644 tests/test_cli_scan.py diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..9af7b06 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include policies/gdpr.yaml +include policies/hipaa.yaml +include policies/pci_dss.yaml diff --git a/pyproject.toml b/pyproject.toml index ec3120c..b189086 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,7 @@ +[build-system] +requires = ["hatchling>=1.25.0"] +build-backend = "hatchling.build" + [project] name = "shadowaudit" version = "0.1.0" @@ -6,8 +10,16 @@ requires-python = ">=3.10" dependencies = [ "PyYAML>=6.0", "detect-secrets>=1.5.0", +] + +[project.optional-dependencies] +ner = [ "chromadb>=0.5.0", "sentence-transformers>=3.0.0", + "spacy-transformers>=1.3.5", +] +siem = [ + "datadog-api-client>=2.23.0", ] [project.scripts] diff --git a/shadowaudit/cli.py b/shadowaudit/cli.py index 89931f9..4805ad0 100644 --- a/shadowaudit/cli.py +++ b/shadowaudit/cli.py @@ -4,8 +4,10 @@ import argparse import json +from dataclasses import asdict from shadowaudit.core.policy import PolicyEngine +from shadowaudit.core.scanner import PIIScanner from shadowaudit.reports.gdpr_report import generate_gdpr_report @@ -28,6 +30,9 @@ def _build_parser() -> argparse.ArgumentParser: proxy_parser.add_argument("--port", type=int, default=8080, help="Local listening port") proxy_parser.add_argument("--target", default="https://api.openai.com", help="Upstream API base URL") + scan_parser = subparsers.add_parser("scan", help="Scan input text for PII entities") + scan_parser.add_argument("text", nargs="+", help="Text to scan") + return parser @@ -56,6 +61,13 @@ def main() -> int: run_proxy_server(port=args.port, target=args.target) return 0 + if args.command == "scan": + scanner = PIIScanner(fast_mode=True) + result = scanner.scan(" ".join(args.text)) + payload = result.model_dump() if hasattr(result, "model_dump") else asdict(result) + print(json.dumps(payload, indent=2, ensure_ascii=False)) + return 0 + parser.print_help() return 2 diff --git a/shadowaudit/core/secrets.py b/shadowaudit/core/secrets.py index 84414e3..a0df866 100644 --- a/shadowaudit/core/secrets.py +++ b/shadowaudit/core/secrets.py @@ -7,10 +7,10 @@ from collections import Counter try: # pragma: no cover - optional dependency. - from detect_secrets import SecretsCollection + from detect_secrets.core import scan from detect_secrets.settings import transient_settings except Exception: # pragma: no cover - fallback path when package is unavailable. - SecretsCollection = None + scan = None transient_settings = None @@ -36,7 +36,7 @@ def _candidate_strings(text: str) -> list[str]: return re.findall(r"[A-Za-z0-9_\-+/=]{8,}", text) def _detect_with_library(self, text: str) -> list[str]: - if SecretsCollection is None or transient_settings is None: + if scan is None or transient_settings is None: return [] plugin_settings = { @@ -48,11 +48,10 @@ def _detect_with_library(self, text: str) -> list[str]: findings: list[str] = [] with transient_settings(plugin_settings): - collection = SecretsCollection() - collection.scan_file_content(text, filename="") - for line_secrets in collection.data.values(): - for secret in line_secrets: - findings.append(secret.secret_value) + for secret in scan.scan_line(text): + value = getattr(secret, "secret_value", None) + if value: + findings.append(value) return findings def detect(self, text: str) -> list[str]: diff --git a/tests/test_cli_scan.py b/tests/test_cli_scan.py new file mode 100644 index 0000000..10ab11b --- /dev/null +++ b/tests/test_cli_scan.py @@ -0,0 +1,23 @@ +"""CLI tests for scan command.""" + +from __future__ import annotations + +import json +import sys + +from shadowaudit.cli import main + + +def test_scan_command_outputs_detected_entities(capsys) -> None: + old_argv = sys.argv + sys.argv = ["shadowaudit", "scan", "Email", "alice@example.com"] + try: + code = main() + finally: + sys.argv = old_argv + + captured = capsys.readouterr() + payload = json.loads(captured.out) + + assert code == 0 + assert "EMAIL" in payload["detected_entities"]