diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..94e0276 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,27 @@ +[submodule "app_tests/NodeGoat"] + path = app_tests/NodeGoat + url = https://github.com/OWASP/NodeGoat.git +[submodule "app_tests/DVWA"] + path = app_tests/DVWA + url = https://github.com/digininja/DVWA.git +[submodule "app_tests/WebGoat"] + path = app_tests/WebGoat + url = https://github.com/WebGoat/WebGoat.git +[submodule "app_tests/juice-shop"] + path = app_tests/juice-shop + url = https://github.com/juice-shop/juice-shop.git +[submodule "app_tests/railsgoat"] + path = app_tests/railsgoat + url = https://github.com/OWASP/railsgoat.git +[submodule "app_tests/IWA-DotNet"] + path = app_tests/IWA-DotNet + url = https://github.com/fortify/IWA-DotNet.git +[submodule "app_tests/pygoat"] + path = app_tests/pygoat + url = https://github.com/adeyosemanputra/pygoat.git +[submodule "app_tests/DVIA-v2"] + path = app_tests/DVIA-v2 + url = https://github.com/prateek147/DVIA-v2.git +[submodule "app_tests/scala-woof"] + path = app_tests/scala-woof + url = https://github.com/snyk/scala-woof.git diff --git a/.hooks/version-check.py b/.hooks/version-check.py index 6450df0..ddb23a5 100755 --- a/.hooks/version-check.py +++ b/.hooks/version-check.py @@ -1,4 +1,23 @@ #!/usr/bin/env python3 +""" +Version management script for Socket Basics. + +This script: +1. Ensures version.py and pyproject.toml are in sync +2. Auto-bumps version on commits if unchanged +3. Automatically updates version references in: + - README.md (GitHub Action versions and Docker build tags) + - docs/github-action.md (all action version references) + - docs/pre-commit-hook.md (Docker build tags) + +Pattern matching: +- GitHub Actions: SocketDev/socket-basics@vX.X.X -> @vNEW_VERSION +- Docker builds: docker build -t IMAGE_NAME -> docker build -t IMAGE_NAME:NEW_VERSION + +Usage: +- Normal commit: Will auto-bump patch version if unchanged +- Dev mode: python3 .hooks/version-check.py --dev +""" import subprocess import pathlib import re @@ -8,9 +27,18 @@ VERSION_FILE = pathlib.Path("socket_basics/version.py") PYPROJECT_FILE = pathlib.Path("pyproject.toml") +README_FILES = [ + pathlib.Path("README.md"), + pathlib.Path("docs/github-action.md"), + pathlib.Path("docs/pre-commit-hook.md"), +] VERSION_PATTERN = re.compile(r"__version__\s*=\s*['\"]([^'\"]+)['\"]") PYPROJECT_PATTERN = re.compile(r'^version\s*=\s*"([^"]+)"$', re.MULTILINE) +# Pattern to match SocketDev/socket-basics@vX.X.X or @vX.X.X +ACTION_VERSION_PATTERN = re.compile(r'(SocketDev/socket-basics|socket-basics)@v\d+\.\d+\.\d+') +# Pattern to match docker build with version tag +DOCKER_BUILD_PATTERN = re.compile(r'docker build -t (socketdev/socket-basics|socket-basics)(?::\d+\.\d+\.\d+)?') # Update this URL to match your actual PyPI package if you publish it PYPI_API = "https://pypi.org/pypi/security-wrapper/json" @@ -71,6 +99,31 @@ def find_next_available_dev_version(base_version: str) -> str: print("❌ Could not find available .devN slot after 100 attempts.") sys.exit(1) +def update_readme_versions(version: str): + """Update version references in README files""" + for readme_file in README_FILES: + if not readme_file.exists(): + print(f"⚠️ {readme_file} not found, skipping") + continue + + content = readme_file.read_text() + original_content = content + + # Update action version references (SocketDev/socket-basics@vX.X.X) + content = ACTION_VERSION_PATTERN.sub(rf'\1@v{version}', content) + + # Update docker build commands to include version tag + def docker_replacement(match): + image_name = match.group(1) + return f'docker build -t {image_name}:{version}' + content = DOCKER_BUILD_PATTERN.sub(docker_replacement, content) + + if content != original_content: + readme_file.write_text(content) + print(f"✅ Updated version references in {readme_file}") + else: + print(f"ℹ️ No version updates needed in {readme_file}") + def inject_version(version: str): print(f"🔁 Updating version to: {version}") @@ -85,6 +138,9 @@ def inject_version(version: str): print(f"✅ Updated {PYPROJECT_FILE}") else: print(f"⚠️ Could not find version field in {PYPROJECT_FILE}") + + # Update README files with version references + update_readme_versions(version) def check_version_sync(): """Ensure version.py and pyproject.toml are in sync""" diff --git a/Dockerfile b/Dockerfile index ce9d6e0..203174d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,23 +2,17 @@ FROM python:3.12 # Create application directory -WORKDIR /socket-security-tools +WORKDIR /socket-basics ENV PATH=$PATH:/usr/local/go/bin # Install uv COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ -# Install Trivy -RUN curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin v0.18.3 - -# Install Trufflehog -RUN curl -sSfL https://raw.githubusercontent.com/trufflesecurity/trufflehog/main/scripts/install.sh | sh -s -- -b /usr/local/bin - - +# Install system dependencies RUN apt-get update && apt-get install -y curl git wget # Install Trivy -RUN curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin v0.18.3 +RUN curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin v0.67.2 # Install Trufflehog RUN curl -sSfL https://raw.githubusercontent.com/trufflesecurity/trufflehog/main/scripts/install.sh | sh -s -- -b /usr/local/bin @@ -26,23 +20,17 @@ RUN curl -sSfL https://raw.githubusercontent.com/trufflesecurity/trufflehog/main # Install OpenGrep (connector/runtime dependency) RUN curl -fsSL https://raw.githubusercontent.com/opengrep/opengrep/main/install.sh | bash -# Copy socket_basics package so we can install the CLI entrypoint -COPY socket_basics /socket-security-tools/socket_basics -# Also copy the project root so editable install has access to all files -COPY . /socket-security-tools/ - -COPY pyproject.toml uv.lock LICENSE README.md /scripts/ -# Install Python dependencies using uv -WORKDIR /scripts -RUN uv sync --frozen && uv pip install light-s3-client -ENV PATH="/scripts/.venv/bin:/root/.opengrep/cli/latest:$PATH" - -# Install this package so the `socket-basics` CLI entrypoint is available -WORKDIR /socket-security-tools -# Ensure python can import package if install doesn't run; prefer installed package -ENV PYTHONPATH="/socket-security-tools:${PYTHONPATH}" -# Ensure pyproject is present for editable install; fail loudly if install fails -RUN uv pip install -e . || pip install -e . +# Copy the specific files needed for the project +COPY socket_basics /socket-basics/socket_basics +COPY pyproject.toml /socket-basics/pyproject.toml +COPY README.md /socket-basics/README.md +COPY LICENSE /socket-basics/LICENSE +COPY uv.lock /socket-basics/uv.lock + +# Install Python dependencies using uv from the project root +WORKDIR /socket-basics +RUN pip install -e . && uv sync --frozen --no-dev +ENV PATH="/socket-basics/.venv/bin:/root/.opengrep/cli/latest:$PATH" # Use socket-basics as the default entrypoint ENTRYPOINT ["socket-basics"] diff --git a/README.md b/README.md index 80c03f2..57917d3 100644 --- a/README.md +++ b/README.md @@ -1,425 +1,271 @@ # Socket Basics -Socket Basics is a small, extensible CLI tool that orchestrates multiple security scanners (SAST, secret scanning, container scanning), normalizes their outputs into a single consolidated Socket facts JSON format, and delivers results to configured notifiers (console, Slack, Jira, webhooks, Sumo Logic, MS Sentinel, etc.). +**Comprehensive security scanning with SAST, secrets detection, container scanning, and more — all in one unified tool.** -This README is a first-time, clean-slate guide to installing, running, configuring, and extending the tool. +Socket Basics orchestrates multiple security scanners, normalizes their outputs into Socket's standardized format, and delivers consolidated results through your preferred notification channels. -## Table of contents +![Socket Basics Example Results](docs/screenshots/socket_basics_example_results.png) -- Overview -- Installation -- Quick start -- CLI reference -- Environment variables (INPUT_*) -- Connector architecture -- Notifiers -- Output format -- Docker usage -- Development & testing -- Troubleshooting -- Contributing -- License +## 🚀 Quick Start - GitHub Actions -## Overview +The easiest way to use Socket Basics is through GitHub Actions. Add it to your workflow in minutes: -Socket Basics provides: - -- A unified CLI: `socket-basics` -- A plugin-style connector system for integrating scanners (OpenGrep, Trivy, TruffleHog, etc.) -- Configuration via CLI flags, environment variables, and `socket_basics/connectors.yaml` -- Consolidation of all scanner results into a single `.socket.facts.json` compatible structure -- Notification hooks to send results to external systems - -Design goals: - -- Make it easy to run multiple scanners in a single job -- Normalize outputs for downstream analysis and reporting -- Keep connectors isolated and pluggable - -## Installation - -Recommended: use a Python virtual environment and the `uv` tool (used in development here). The package exposes the `socket-basics` CLI through `pyproject.toml`. - -On macOS / Linux (zsh): +```yaml +name: Security Scan +on: + pull_request: + types: [opened, synchronize, reopened] -```sh -python -m venv .venv -source .venv/bin/activate -# install uv if not already available -curl -LsSf https://astral.sh/uv/install.sh | sh -uv sync -# install this package in editable mode -pip install -e . +jobs: + security-scan: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Run Socket Basics + uses: SocketDev/socket-basics@v1.0.2 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + socket_security_api_key: ${{ secrets.SOCKET_SECURITY_API_KEY }} ``` -After installation you should have the `socket-basics` CLI available in your environment. +**That's it!** With just your `SOCKET_SECURITY_API_KEY`, all scanning configurations are managed through the [Socket Dashboard](https://socket.dev/dashboard) — no workflow changes needed. +### What You Get -## Quick start +- ✅ **Zero Configuration Required** — Configure scanning policies in the Socket Dashboard +- ✅ **All Scanners Included** — SAST, secrets, containers, and dependency analysis +- ✅ **PR Comments** — Automated security findings on pull requests +- ✅ **Centralized Management** — Update policies across all repos from one place -Build the container image and run a scan from your current working directory mounted as `/workspace`. +📖 **[Complete GitHub Actions Guide →](docs/github-action.md)** -1) Build the Docker image (tagged `socket-basics`): +### Other Installation Methods -```sh -docker build -t socket-basics . -``` +Socket Basics can also run locally or in other CI/CD environments: -2) Create a `.env` file that enables Jira + Slack and provides Socket credentials. The example below includes the required `SOCKET_ORG` and `SOCKET_SECURITY_API_KEY` variables used in this quick run (replace placeholders with real values or secrets): +- **[Pre-Commit Hook](docs/pre-commit-hook.md)** — Catch issues before they're committed +- **[Local Docker Installation](docs/local-install-docker.md)** — Run in Docker with no tool installation required +- **[Local Installation](docs/local-installation.md)** — Install security tools natively on your machine -```env -# Socket credentials -SOCKET_ORG=socketdev-demo -SOCKET_SECURITY_API_KEY=your-socket-security-api-key +## ✨ Features -# Enable notifiers -INPUT_JIRA_ENABLED=true -INPUT_JIRA_URL=https://your-jira-instance.atlassian.net -INPUT_JIRA_EMAIL=you@example.com -INPUT_JIRA_API_TOKEN=your-jira-api-token -INPUT_JIRA_PROJECT=PROJ +**Built-in Security Scanners:** +- 🔍 **SAST** — Static analysis for 15+ languages (Python, JavaScript, Go, Java, Ruby, C#, and more) +- 🔐 **Secret Scanning** — Detect leaked credentials and API keys with TruffleHog +- 🐳 **Container Scanning** — Vulnerability scanning for Docker images and Dockerfiles with Trivy +- 📦 **Dependency Analysis** — Socket Tier 1 reachability analysis for supply chain security -INPUT_SLACK_ENABLED=true -INPUT_SLACK_WEBHOOK_URL=https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX +**Enterprise Features** (requires [Socket Enterprise](https://socket.dev/enterprise)): +- 🎛️ **Dashboard Configuration** — Centrally manage scanning policies across your organization +- 📢 **Notification Integrations** — Send results to Slack, Jira, MS Teams, MS Sentinel, Sumo Logic, and webhooks +- 🔄 **Unified Reporting** — Consolidated security view across all your projects -# Optional: prefer tabular console output -INPUT_CONSOLE_ENABLED=true -INPUT_SOCKET_CONSOLE_MODE=tabular -``` +**Flexible Configuration:** +- Configure via CLI flags, environment variables, JSON files, or the Socket Dashboard +- Auto-enablement for container scanning when images or Dockerfiles are specified +- Support for both standard and GitHub Actions `INPUT_*` environment variables -3) Run the container mounting the current directory into `/workspace` and pass the CLI flags you provided. As long as it is a git repo it'll automatically pick up the repo and branch information for the scan. This example runs secrets scanning, JavaScript SAST, requests Socket tier1 reporting, and scans the `trickyhu/sigsci-rule-editor` container image: - -```sh -docker run --rm -v "$PWD:/workspace" --env-file .env socket-basics \ - --workspace /workspace \ - --secrets \ - --console-tabular-enabled \ - --javascript \ - --socket-org socketdev-demo \ - --socket-tier1 \ - --container-images \ - --images trickyhu/sigsci-rule-editor -``` +## 📖 Documentation -Notes: -- The container mounts your current project into `/workspace`, so the CLI option `--workspace /workspace` points to that path inside the container. -- The `.env` file is loaded by `--env-file` to provide credentials and notifier configuration; you can also set secrets via your environment or your CI provider. -- `SOCKET_ORG` and `SOCKET_SECURITY_API_KEY` in the example are included to show the minimum Socket-related env variables for SCA/Socket integrations. The tool also accepts `INPUT_SOCKET_ORG` / `INPUT_SOCKET_API_KEY` style env vars used elsewhere in this repo if you prefer that naming. +### Getting Started +- [GitHub Actions Integration](docs/github-action.md) — Complete guide with workflow examples +- [Pre-Commit Hook Setup](docs/pre-commit-hook.md) — Two installation methods (Docker vs native) +- [Local Docker Installation](docs/local-install-docker.md) — Run with Docker, no tools to install +- [Local Installation](docs/local-installation.md) — Install Socket CLI, Trivy, and other tools natively -Quick local examples (alternate): +### Configuration +All configuration can be managed through: +1. **Socket Dashboard** (Enterprise) — Centralized policy management +2. **CLI Arguments** — Direct command-line flags +3. **Environment Variables** — Standard or `INPUT_*` prefixed for GitHub Actions +4. **JSON Configuration File** — Structured configuration (see `socket_config_example.json`) -Run a basic scan from the repository root and print results to stdout: +See [Configuration Documentation](docs/configuration.md) for details on all available options. -```sh -socket-basics --python --secrets --containers --verbose -``` +## 🎯 What Socket Basics Does -Save results to a file: +1. **Scans** your codebase using multiple security tools in parallel +2. **Normalizes** all findings into a unified Socket facts JSON format +3. **Filters** results based on severity thresholds and configured rules +4. **Reports** consolidated findings through console, files, or notification channels -```sh -socket-basics --python --secrets --containers --output scan-results.socket.facts.json -``` +## 🏢 Enterprise Dashboard Configuration -Run with console notifications only (no output file): +Socket Enterprise customers can configure Socket Basics directly from the [Socket Dashboard](https://socket.dev/dashboard): -```sh -INPUT_CONSOLE_ENABLED=true socket-basics --python --secrets -``` +![Socket Basics Settings](docs/screenshots/socket_basics_settings.png) -## CLI reference +Configure scanning policies, notification channels, and rule sets for your entire organization in one place. Your settings are automatically synchronized when you provide `SOCKET_SECURITY_API_KEY` and `SOCKET_ORG`. -Run `socket-basics --help` to see the up-to-date list of options. Below are the most commonly used flags: +![Socket Basics Section Config](docs/screenshots/socket_basics_section_config.png) -- `--python` / `--no-python` — enable/disable Python SAST -- `--secrets` / `--no-secrets` — enable/disable secret scanning -- `--containers` / `--no-containers` — enable/disable container scanning -- `--all-languages` — run SAST for all languages configured by the connectors -- `--output ` — path to write the consolidated Socket facts JSON -- `--workspace ` — path to repository workspace (defaults to current directory) -- `--repo ` — repository identifier for integrations -- `--branch ` — repository branch to analyze -- `--socket-tier1` / `--no-socket-tier1` — enable/disable Socket tier1 reporting -- `--socket-org ` — Socket organization slug (required for Socket API calls) -- `--console-tabular-enabled` / `--no-console-tabular-enabled` — prefer tabular console output -- `--verbose` / `--no-verbose` — enable/disable debug logging +## 💻 Usage Examples -Connector-specific CLI flags are declared dynamically in `socket_basics/connectors.yaml` and will appear in `--help` when available. +### GitHub Actions (Recommended) -## Environment variables (INPUT_ prefix) - -All environment variables used to configure scanning behavior follow the `INPUT_{PARAM_NAME}` pattern (uppercase). The precedence order is: - -1. CLI arguments -2. Environment variables (`INPUT_*`) -3. `socket_basics/connectors.yaml` -4. Built-in defaults - -Common environment variables used by the project (examples): - -- `INPUT_PYTHON_SAST_ENABLED=true|false` -- `INPUT_SECRET_SCANNING_ENABLED=true|false` -- `INPUT_DOCKERFILES=Dockerfile,Dockerfile.prod` -- `INPUT_DOCKER_IMAGES=org/image:tag,org/other:tag` -- `INPUT_SOCKET_SCANNING_ENABLED=true|false` -- `INPUT_SOCKET_ORG=` -- `INPUT_SOCKET_API_KEY=` -- `INPUT_CONSOLE_ENABLED=true|false` -- `INPUT_SOCKET_CONSOLE_MODE=json|tabular` -- `INPUT_SLACK_ENABLED=true|false` -- `INPUT_SLACK_WEBHOOK_URL=` -- `INPUT_JIRA_ENABLED=true|false` -- `INPUT_JIRA_PROJECT=` +**Dashboard-Configured (Enterprise):** +```yaml +- uses: SocketDev/socket-basics@v1.0.2 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + socket_security_api_key: ${{ secrets.SOCKET_SECURITY_API_KEY }} + # All configuration managed in Socket Dashboard +``` -Connector-specific env vars are listed under each connector's `parameters` block in `socket_basics/connectors.yaml` (look for `env_variable` entries). +**CLI-Configured:** +```yaml +- uses: SocketDev/socket-basics@v1.0.2 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + python_sast_enabled: 'true' + secret_scanning_enabled: 'true' + container_images: 'myapp:latest' +``` -## Connector architecture +📖 **[View Complete GitHub Actions Documentation](docs/github-action.md)** -Connectors live under `socket_basics/core/connector/`. Each connector is a small adapter that: +### Docker -- Implements a `scan()` method that executes the underlying tool and returns raw results -- Implements a `_process_results(raw_results)` method that converts raw output into the Socket facts structure +```bash +# Build with version tag +docker build -t socketdev/socket-basics:1.0.2 . -Connectors are registered and configured via `socket_basics/connectors.yaml`. Typical fields in the YAML mapping: +# Run scan +docker run --rm -v "$PWD:/workspace" socketdev/socket-basics:1.0.3 \ + --workspace /workspace \ + --python-sast-enabled \ + --secret-scanning-enabled \ + --console-tabular-enabled +``` -- `module_path`: Python import path for the connector -- `class`: connector class name -- `enabled_by_default`: boolean -- `parameters`: list of parameter mappings with `name`, `option`, `env_variable`, `type`, and `default` +📖 **[View Docker Installation Guide](docs/local-install-docker.md)** -Add a new connector by creating a directory under `socket_basics/core/connector//`, implementing the connector class, and adding an entry to `connectors.yaml`. +### CLI -## Testing connectors (app_tests) +```bash +socket-basics --python --secrets --containers --verbose +``` -Connector integration tests live in `app_tests/`. This folder is the authoritative place to run connector-level integration tests that exercise scanners against sample repositories or inputs. Do not rely on `local_tests/` or `samples/` for official connector testing — `app_tests/` is maintained for that purpose. +📖 **[View Local Installation Guide](docs/local-installation.md)** -## Notifiers +## 🔧 Requirements -Notifiers are responsible for delivering the consolidated report to different channels. Built-in notifiers include: +**For GitHub Actions & Docker:** No installation needed — all tools are bundled in the container. -- Console (JSON or tabular) -- Slack -- Jira -- Webhook -- Sumo Logic -- MS Sentinel +**For Local Installation:** +- Python 3.8+ +- [Socket CLI](https://docs.socket.dev/docs/cli) (for dependency analysis) +- [Trivy](https://github.com/aquasecurity/trivy) (for container scanning) +- [OpenGrep/Semgrep](https://semgrep.dev/) (for SAST) +- [TruffleHog](https://github.com/trufflesecurity/trufflehog) (for secret scanning) -Notifier behavior is configured via `socket_basics/notifications.yaml` or via connector-specific CLI flags and `INPUT_` environment variables. +See [Local Installation Guide](docs/local-installation.md) for detailed setup instructions. -## Output format +## 📊 Output Format -All scanners' findings are normalized into a consolidated Socket facts JSON structure. High-level shape: +Socket Basics normalizes all scanner findings into a standardized Socket facts JSON structure: ```json { - "components": [ - { - "type": "file", - "name": "path/to/file", - "alerts": [ - { - "type": "sast|secret|container", - "severity": "low|medium|high|critical", - "message": "description", - "location": {"path": "file/path", "line": 42} - } - ] - } - ] + "components": [ + { + "type": "file", + "name": "path/to/file", + "alerts": [ + { + "type": "sast|secret|container", + "severity": "low|medium|high|critical", + "message": "description", + "location": {"path": "file/path", "line": 42} + } + ] + } + ] } ``` -If `--output` is specified the JSON is written to that file. If not specified and console notifier is enabled the output is printed to stdout in the selected console mode. - -Sample consolidated outputs are provided in the `samples/` directory. +Results can be: +- **Printed to console** in tabular or JSON format +- **Written to file** (`.socket.facts.json`) +- **Sent to notification channels** (Slack, Jira, webhooks, etc.) -## Docker usage - -Build the project Docker image and run a scan inside the container: - -```sh -docker build -t socketdev/security-wrapper . - -# Example run (prints to console; replace placeholders as needed) -docker run --rm -v "$PWD:/code" \ - -e "INPUT_CONSOLE_ENABLED=true" \ - -e "INPUT_PYTHON_SAST_ENABLED=true" \ - -e "INPUT_SECRET_SCANNING_ENABLED=true" \ - -e "INPUT_SOCKET_SCANNING_ENABLED=true" \ - -e "INPUT_SOCKET_ORG=your-socket-org" \ - -e "INPUT_SOCKET_API_KEY=your-api-key" \ - socketdev/security-wrapper \ - --python --secrets --containers --output /code/scan-results.socket.facts.json -``` +## 🔌 Connector Architecture -Notes: +Socket Basics uses a plugin-style connector system. Each connector: +- Lives under `socket_basics/core/connector/` +- Implements `scan()` to execute the underlying tool +- Implements `_process_results()` to normalize output -- Image scanning (`INPUT_DOCKER_IMAGES`) requires Docker/DIND access or pre-pulled images inside the container -- Dockerfile scanning only requires the Dockerfile(s) to be present in the workspace +Add new connectors by: +1. Creating a directory under `socket_basics/core/connector//` +2. Implementing the connector class +3. Adding configuration to `socket_basics/connectors.yaml` -## GitHub Actions usage +See the [Developer Guide](docs/development.md) for details. -This repository exposes a GitHub Action (see `action.yml`) which runs the Docker image and accepts many inputs to configure scanning and notifications. Below is a comprehensive list of available inputs (names are the action inputs; when using environment variables in workflows they map to the same semantic names under `with:`): +## 🧪 Testing -Core inputs: +Integration tests for connectors live in `app_tests/`. This is the authoritative location for connector-level testing with sample repositories. -- `github_token` (required) — GitHub token used to post PR comments +```bash +# Run tests +python -m pytest app_tests/ -v -Enable flags (true/false): - -- `python_sast_enabled` — enable Python SAST -- `golang_sast_enabled` — enable Golang SAST -- `javascript_sast_enabled` — enable JavaScript SAST -- `dockerfile_enabled` — enable Dockerfile analysis -- `image_enabled` — enable image scanning -- `secret_scanning_enabled` — enable secret scanning -- `socket_scanning_enabled` — enable Socket reachability scanning -- `socket_sca_enabled` — enable Socket SCA scanning - -Docker/trivy inputs: - -- `docker_images` — comma-separated Docker images to scan -- `dockerfiles` — comma-separated Dockerfile paths to scan - -Trufflehog inputs: - -- `trufflehog_exclude_dir` — comma-separated dirs to exclude -- `trufflehog_rules` — rules to enable -- `trufflehog_show_unverified` — show unverified secrets - -Socket configuration: - -- `socket_org` — Socket organization slug (required for Socket integrations) -- `socket_api_key` — API key for Socket -- `socket_security_api_key` — API key for SCA scanning -- `socket_sca_files` — comma-separated manifest files to include in SCA - -SAST and rule controls: - -- `all_languages_enabled` — run SAST for all supported languages -- `all_rules_enabled` — run all bundled SAST rules -- Per-language enable flags (each accept `true|false`): - - `python_sast_enabled`, `javascript_sast_enabled`, `typescript_sast_enabled`, `go_sast_enabled`, `golang_sast_enabled`, `java_sast_enabled`, `php_sast_enabled`, `ruby_sast_enabled`, `csharp_sast_enabled`, `dotnet_sast_enabled`, `c_sast_enabled`, `cpp_sast_enabled`, `kotlin_sast_enabled`, `scala_sast_enabled`, `swift_sast_enabled`, `rust_sast_enabled`, `elixir_sast_enabled` - -- Per-language rule overrides (comma-separated lists): - - `_enabled_rules` and `_disabled_rules` for languages such as `python`, `javascript`, `go`, `java`, `php`, `ruby`, `csharp`, `dotnet`, `c`, `cpp`, `kotlin`, `scala`, `swift`, `rust`, `elixir` - -Trivy-specific: - -- `trivy_exclude_dir` — comma-separated dirs to exclude from Trivy -- `trivy_rules` — rules to enable in Trivy -- `trivy_disabled_rules` — comma-separated rules to disable -- `trivy_image_scanning_disabled` — disable Trivy image scanning - -Log forwarding / SIEM: - -- `sumo_logic_enabled` — enable Sumo Logic forwarding -- `sumo_logic_http_source_url` — Sumo Logic HTTP source URL -- `ms_sentinel_enabled` — enable Microsoft Sentinel forwarding -- `ms_sentinel_workspace_id` — workspace id -- `ms_sentinel_shared_key` — shared key - -Jira / ticketing: - -- `jira_enabled` — enable Jira ticket creation -- `jira_url` — Jira instance URL -- `jira_email` — Jira account email -- `jira_api_token` — Jira API token -- `jira_project` — Jira project key - -Slack / Teams / Webhook: - -- `slack_enabled` — enable Slack notifications -- `slack_webhook_url` — Slack webhook URL -- `teams_enabled` — enable Teams notifications -- `teams_webhook_url` — Teams webhook URL -- `webhook_enabled` — enable generic webhook -- `webhook_url` — webhook URL -- `webhook_headers` — JSON string of custom headers for the webhook - -Scan scope: - -- `scan_all` — if true, scan the entire workspace regardless of git diff -- `scan_files` — comma-separated list of files to scan (if omitted, action will use git diff or `scan_all`) - -Branding: - -- The action configures brand icon/color via `branding` in `action.yml` (not user-configurable via inputs) - -Example GitHub Actions workflow snippet: - -```yaml -name: Security Scan -on: - pull_request: - types: [opened, synchronize, reopened] - -jobs: - security-scan: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Run Socket Basics - uses: ./ # when running from the same repo; replace with org/repo@vX for published action - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - python_sast_enabled: 'true' - secret_scanning_enabled: 'true' - dockerfile_enabled: 'true' - socket_scanning_enabled: 'true' - socket_org: 'your-socket-org' - socket_api_key: ${{ secrets.SOCKET_API_KEY }} +# Run specific connector tests +python -m pytest app_tests/test_trivy.py -v ``` -Make sure to set any secrets (Socket API keys, Jira tokens, Slack webhooks) using repository or organization secrets. - -## GitHub PR notifier environment variables +## 🐛 Troubleshooting -When running in GitHub Actions or other CI, the GitHub PR notifier will attempt to discover repository and branch information from the environment first, then fall back to local `git` and finally any workspace `facts` that were provided. The notifier recognizes the following environment variables and action inputs (use whichever is most convenient in your workflow): +**Connector fails to load:** +- Verify `module_path` and `class` in `socket_basics/connectors.yaml` -- `GITHUB_REPOSITORY` — owner/repo identifier (e.g., `org/repo`). Automatically provided by GitHub Actions. -- `GITHUB_EVENT_PATH` — path to the GitHub event JSON file (Actions provides this). The notifier will read the event payload to extract PR/head info when present. -- `GITHUB_REF` / `GITHUB_HEAD_REF` — branch refs provided by the Actions runner. `GITHUB_HEAD_REF` is set for pull_request workflows; otherwise `GITHUB_REF` may contain `refs/heads/`. -- `GITHUB_SHA` — commit SHA; used to build exact blob links when available. -- `GITHUB_PR_NUMBER` — optional environment variable you can set to force the PR number to use when posting comments. -- `INPUT_PR_NUMBER` — action input equivalent to `GITHUB_PR_NUMBER` (useful when invoking the action via `with:` in a workflow). -- `INPUT_GITHUB_API_URL` — override the GitHub API base (useful for GitHub Enterprise). When set, it will be normalized to a full URL if a host-only string is provided. +**Socket API errors:** +- Ensure `SOCKET_SECURITY_API_KEY` and `SOCKET_ORG` are set correctly +- Verify your Socket Enterprise subscription is active -Priority for discovery is: explicit action inputs / environment variables → event payload → local `git` discovery → `facts` provided via `--workspace`. +**Notifier errors:** +- Check that notification credentials (Slack webhook, Jira token, etc.) are properly configured +- Remember: Notifiers require Socket Enterprise -If you need to force a PR comment to a specific PR, set `GITHUB_PR_NUMBER` (or `INPUT_PR_NUMBER` in the action `with:` block). +**Image scanning failures:** +- Confirm Docker access in your runtime environment +- For GitHub Actions, ensure images are publicly accessible or credentials are provided +**Enable verbose logging:** +```bash +socket-basics --verbose ... +# or +INPUT_VERBOSE=true socket-basics ... +``` -## Development & testing +## 🤝 Contributing -- Run unit and local tests from `local_tests/` or `app_tests/`. -- Use `uv run` or `python -m` to execute modules while iterating. +We welcome contributions! To add new features: -Local quick test example: +1. **New Connectors:** Implement under `socket_basics/core/connector/` +2. **New Notifiers:** Implement under `socket_basics/core/notification/` +3. **Configuration:** Add entries to `socket_basics/connectors.yaml` or `socket_basics/notifications.yaml` +4. **Tests:** Add test cases to `app_tests/` -```sh -# activate venv -source .venv/bin/activate -# run a subset of local tests -python -m pytest local_tests/test_simple_scan.py -q -``` +See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed guidelines. -Keep test artifacts under `test_results/` (do not create test files outside that directory). +## 📝 License -## Troubleshooting +This project is licensed under the terms specified in the [LICENSE](LICENSE) file. -- If connectors fail to load, verify `module_path` and `class` in `socket_basics/connectors.yaml`. -- For Socket API or notifier errors, ensure `INPUT_SOCKET_ORG` and `INPUT_SOCKET_API_KEY` (or notifier secrets) are set. -- Enable `--verbose` (or `INPUT_VERBOSE=true`) to see debug logs. -- For image scanning failures, confirm Docker access inside the runtime environment. +--- -## Contributing +## 🔗 Resources -1. Implement new connectors under `socket_basics/core/connector/`. -2. Add notifier implementations under `socket_basics/core/notification/` if needed. -3. Add configuration entries to `socket_basics/connectors.yaml` and `socket_basics/notifications.yaml`. -4. Add sample test apps to `app_tests/`. +- [Socket Security](https://socket.dev/) — Main website +- [Socket Dashboard](https://socket.dev/dashboard) — Configure Socket Basics (Enterprise) +- [Socket Documentation](https://docs.socket.dev/) — Complete documentation +- [Socket CLI](https://docs.socket.dev/docs/cli) — Dependency analysis tool +- [Socket Enterprise](https://socket.dev/enterprise) — Learn about Enterprise features -## License +--- -This project is licensed under the terms in `LICENSE` in the repository root. +**Need help?** Visit our [documentation](docs/) or contact [Socket Support](https://socket.dev/support). diff --git a/action.yml b/action.yml index dec1d11..3991881 100644 --- a/action.yml +++ b/action.yml @@ -1,365 +1,727 @@ -name: "Security Scan and Comment Action" -description: "Runs various open source security tools and then comments on PRs with results." -author: "Douglas Coburn" +name: "Socket Basics Security Scanner"name: "Security Scan and Comment Action" + +description: "Comprehensive security scanning with SAST, secrets, container scanning, and more. Configure from Socket Dashboard (Enterprise required)."description: "Runs various open source security tools and then comments on PRs with results." + +author: "Socket Security"author: "Douglas Coburn" + runs: - using: "docker" + +runs: using: "docker" + + using: "docker" image: "Dockerfile" + image: "Dockerfile" inputs: - github_token: - description: "GitHub token to post comments on PRs" - required: true - # PR / API overrides - pr_number: - description: "Optional PR number to post comments to (overrides auto-discovery)" - required: false - default: "" +inputs: github_token: + + # Socket Configuration (Required for Enterprise Features) description: "GitHub token to post comments on PRs" + + socket_org: required: true + + description: "Socket organization slug (required for Enterprise features)" + + required: false # PR / API overrides + + default: "" pr_number: + + socket_security_api_key: description: "Optional PR number to post comments to (overrides auto-discovery)" + + description: "Socket Security API key (required for Enterprise features and Dashboard configuration)" required: false + + required: false default: "" - github_api_url: - description: "Optional GitHub API base URL (useful for GitHub Enterprise). Maps to INPUT_GITHUB_API_URL environment variable when set." - required: false default: "" - # Enable Settings - python_sast_enabled: - description: "Enable Python SAST analysis" - required: false - default: "false" + github_api_url: - golang_sast_enabled: - description: "Enable Golang SAST analysis" - required: false - default: "false" + # GitHub Integration description: "Optional GitHub API base URL (useful for GitHub Enterprise). Maps to INPUT_GITHUB_API_URL environment variable when set." - javascript_sast_enabled: - description: "Enable JavaScript SAST analysis" - required: false - default: "false" + github_token: required: false - dockerfile_enabled: - description: "Enable Dockerfile analysis" - required: false - default: "false" + description: "GitHub token for PR comments and API access" default: "" - image_enabled: - description: "Enable image scanning" required: false - default: "false" - secret_scanning_enabled: - description: "Enable secret scanning" - required: false - default: "false" + default: "" # Enable Settings - socket_scanning_enabled: - description: "Enable Socket reachability scanning" - required: false - default: "false" + python_sast_enabled: - socket_sca_enabled: - description: "Enable Socket SCA (Software Composition Analysis) scanning" - required: false - default: "false" + # Output Configuration description: "Enable Python SAST analysis" - # Docker Configuration - docker_images: - description: "Comma-separated list of Docker images to scan" - required: false - default: "" - dockerfiles: - description: "Comma-separated list of Dockerfiles to scan" - required: false - default: "" + console_tabular_enabled: required: false - # Trufflehog Configuration - trufflehog_exclude_dir: - description: "Comma-separated list of directories to exclude in Trufflehog" - required: false - default: "" - trufflehog_rules: - description: "Rules to enable in Trufflehog" - required: false - default: "" - trufflehog_show_unverified: - description: "Show unverified secrets in Trufflehog results" - required: false - default: "false" + description: "Enable tabular console output" default: "false" - # Socket Configuration - socket_org: - description: "Socket organization for reachability scanning (required if socket_scanning_enabled is true)" - required: false - default: "" - socket_api_key: - description: "Socket API key for authentication" - required: false - default: "" - socket_security_api_key: - description: "Socket Security API key for SCA scanning (required if socket_sca_enabled is true)" required: false - default: "" - socket_sca_files: - description: "Comma-separated list of manifest files to scan (e.g., package.json,requirements.txt,go.mod)" - required: false - default: "" - # SAST configuration - all_languages_enabled: - description: "Enable SAST for all supported languages" - required: false - default: "false" - all_rules_enabled: - description: "Run all bundled SAST rules regardless of language filters" - required: false + default: "true" golang_sast_enabled: + + console_json_enabled: description: "Enable Golang SAST analysis" + + description: "Enable JSON console output" required: false + + required: false default: "false" + default: "false" - # Per-language SAST enable flags - python_sast_enabled: - description: "Enable Python SAST scanning" - required: false + + verbose: javascript_sast_enabled: + + description: "Enable verbose logging" description: "Enable JavaScript SAST analysis" + + required: false required: false + + default: "false" default: "false" + + + + # SAST Configuration dockerfile_enabled: + + all_languages_enabled: description: "Enable Dockerfile analysis" + + description: "Enable SAST for all supported languages" required: false + + required: false default: "false" + default: "false" - javascript_sast_enabled: - description: "Enable JavaScript/TypeScript SAST scanning" - required: false + + all_rules_enabled: image_enabled: + + description: "Run all bundled SAST rules" description: "Enable image scanning" + + required: false required: false + + default: "false" default: "false" + + + + # Language-Specific SAST secret_scanning_enabled: + + python_sast_enabled: description: "Enable secret scanning" + + description: "Enable Python SAST" required: false + + required: false default: "false" + default: "false" + + javascript_sast_enabled: socket_scanning_enabled: + + description: "Enable JavaScript/TypeScript SAST" description: "Enable Socket reachability scanning" + + required: false required: false + + default: "false" default: "false" + typescript_sast_enabled: - description: "Enable TypeScript SAST scanning" - required: false - default: "false" - go_sast_enabled: - description: "Enable Go SAST scanning" - required: false - default: "false" - golang_sast_enabled: - description: "Enable Golang SAST scanning" - required: false - default: "false" - java_sast_enabled: - description: "Enable Java SAST scanning" - required: false - default: "false" - php_sast_enabled: - description: "Enable PHP SAST scanning" - required: false - default: "false" - ruby_sast_enabled: - description: "Enable Ruby SAST scanning" - required: false - default: "false" - csharp_sast_enabled: - description: "Enable C# SAST scanning" - required: false - default: "false" - dotnet_sast_enabled: - description: "Enable .NET SAST scanning" - required: false - default: "false" - c_sast_enabled: - description: "Enable C SAST scanning" - required: false - default: "false" - cpp_sast_enabled: - description: "Enable C++ SAST scanning" - required: false - default: "false" - kotlin_sast_enabled: - description: "Enable Kotlin SAST scanning" - required: false - default: "false" - scala_sast_enabled: - description: "Enable Scala SAST scanning" - required: false - default: "false" - swift_sast_enabled: - description: "Enable Swift SAST scanning" - required: false - default: "false" - rust_sast_enabled: - description: "Enable Rust SAST scanning" - required: false + + description: "Enable TypeScript SAST" socket_sca_enabled: + + required: false description: "Enable Socket SCA (Software Composition Analysis) scanning" + + default: "false" required: false + + go_sast_enabled: default: "false" + + description: "Enable Go SAST" + + required: false # Docker Configuration + + default: "false" docker_images: + + golang_sast_enabled: description: "Comma-separated list of Docker images to scan" + + description: "Enable Golang SAST" required: false + + required: false default: "" + + default: "false" dockerfiles: + + java_sast_enabled: description: "Comma-separated list of Dockerfiles to scan" + + description: "Enable Java SAST" required: false + + required: false default: "" + default: "false" - elixir_sast_enabled: - description: "Enable Elixir SAST scanning" - required: false + + php_sast_enabled: # Trufflehog Configuration + + description: "Enable PHP SAST" trufflehog_exclude_dir: + + required: false description: "Comma-separated list of directories to exclude in Trufflehog" + + default: "false" required: false + + ruby_sast_enabled: default: "" + + description: "Enable Ruby SAST" trufflehog_rules: + + required: false description: "Rules to enable in Trufflehog" + + default: "false" required: false + + csharp_sast_enabled: default: "" + + description: "Enable C# SAST" trufflehog_show_unverified: + + required: false description: "Show unverified secrets in Trufflehog results" + + default: "false" required: false + + dotnet_sast_enabled: default: "false" + + description: "Enable .NET SAST" + + required: false # Socket Configuration + + default: "false" socket_org: + + c_sast_enabled: description: "Socket organization for reachability scanning (required if socket_scanning_enabled is true)" + + description: "Enable C SAST" required: false + + required: false default: "" + + default: "false" socket_api_key: + + cpp_sast_enabled: description: "Socket API key for authentication" + + description: "Enable C++ SAST" required: false + + required: false default: "" + + default: "false" socket_security_api_key: + + kotlin_sast_enabled: description: "Socket Security API key for SCA scanning (required if socket_sca_enabled is true)" + + description: "Enable Kotlin SAST" required: false + + required: false default: "" + + default: "false" socket_sca_files: + + scala_sast_enabled: description: "Comma-separated list of manifest files to scan (e.g., package.json,requirements.txt,go.mod)" + + description: "Enable Scala SAST" required: false + + required: false default: "" + default: "false" - # Per-language rule overrides - python_enabled_rules: - description: "Comma-separated list of Python SAST rules to enable" - required: false - default: "" - python_disabled_rules: - description: "Comma-separated list of Python SAST rules to disable" - required: false - default: "" - javascript_enabled_rules: - description: "Comma-separated list of JavaScript/TypeScript SAST rules to enable" - required: false - default: "" - javascript_disabled_rules: - description: "Comma-separated list of JavaScript/TypeScript SAST rules to disable" - required: false - default: "" - go_enabled_rules: - description: "Comma-separated list of Go SAST rules to enable" - required: false - default: "" - go_disabled_rules: - description: "Comma-separated list of Go SAST rules to disable" - required: false - default: "" - java_enabled_rules: - description: "Comma-separated list of Java SAST rules to enable" - required: false - default: "" - java_disabled_rules: - description: "Comma-separated list of Java SAST rules to disable" - required: false - default: "" - php_enabled_rules: - description: "Comma-separated list of PHP SAST rules to enable" - required: false - default: "" - php_disabled_rules: - description: "Comma-separated list of PHP SAST rules to disable" - required: false - default: "" - ruby_enabled_rules: - description: "Comma-separated list of Ruby SAST rules to enable" - required: false - default: "" - ruby_disabled_rules: - description: "Comma-separated list of Ruby SAST rules to disable" - required: false - default: "" - csharp_enabled_rules: - description: "Comma-separated list of C# SAST rules to enable" - required: false - default: "" - csharp_disabled_rules: - description: "Comma-separated list of C# SAST rules to disable" - required: false - default: "" - dotnet_enabled_rules: - description: "Comma-separated list of .NET SAST rules to enable" - required: false - default: "" - dotnet_disabled_rules: - description: "Comma-separated list of .NET SAST rules to disable" - required: false - default: "" + swift_sast_enabled: # SAST configuration + + description: "Enable Swift SAST" all_languages_enabled: + + required: false description: "Enable SAST for all supported languages" + + default: "false" required: false + + rust_sast_enabled: default: "false" + + description: "Enable Rust SAST" all_rules_enabled: + + required: false description: "Run all bundled SAST rules regardless of language filters" + + default: "false" required: false + + elixir_sast_enabled: default: "false" + + description: "Enable Elixir SAST" # Per-language SAST enable flags + + required: false python_sast_enabled: + + default: "false" description: "Enable Python SAST scanning" + + required: false + + # Rule Configuration (Per-Language) default: "false" + + python_enabled_rules: javascript_sast_enabled: + + description: "Comma-separated Python rules to enable" description: "Enable JavaScript/TypeScript SAST scanning" + + required: false required: false + + default: "" default: "false" + + python_disabled_rules: typescript_sast_enabled: + + description: "Comma-separated Python rules to disable" description: "Enable TypeScript SAST scanning" + + required: false required: false + + default: "" default: "false" + + javascript_enabled_rules: go_sast_enabled: + + description: "Comma-separated JavaScript rules to enable" description: "Enable Go SAST scanning" + + required: false required: false + + default: "" default: "false" + + javascript_disabled_rules: golang_sast_enabled: + + description: "Comma-separated JavaScript rules to disable" description: "Enable Golang SAST scanning" + + required: false required: false + + default: "" default: "false" + + go_enabled_rules: java_sast_enabled: + + description: "Comma-separated Go rules to enable" description: "Enable Java SAST scanning" + + required: false required: false + + default: "" default: "false" + + go_disabled_rules: php_sast_enabled: + + description: "Comma-separated Go rules to disable" description: "Enable PHP SAST scanning" + + required: false required: false + + default: "" default: "false" + + java_enabled_rules: ruby_sast_enabled: + + description: "Comma-separated Java rules to enable" description: "Enable Ruby SAST scanning" + + required: false required: false + + default: "" default: "false" + + java_disabled_rules: csharp_sast_enabled: + + description: "Comma-separated Java rules to disable" description: "Enable C# SAST scanning" + + required: false required: false + + default: "" default: "false" + + php_enabled_rules: dotnet_sast_enabled: + + description: "Comma-separated PHP rules to enable" description: "Enable .NET SAST scanning" + + required: false required: false + + default: "" default: "false" + + php_disabled_rules: c_sast_enabled: + + description: "Comma-separated PHP rules to disable" description: "Enable C SAST scanning" + + required: false required: false + + default: "" default: "false" + + ruby_enabled_rules: cpp_sast_enabled: + + description: "Comma-separated Ruby rules to enable" description: "Enable C++ SAST scanning" + + required: false required: false + + default: "" default: "false" + + ruby_disabled_rules: kotlin_sast_enabled: + + description: "Comma-separated Ruby rules to disable" description: "Enable Kotlin SAST scanning" + + required: false required: false + + default: "" default: "false" + + csharp_enabled_rules: scala_sast_enabled: + + description: "Comma-separated C# rules to enable" description: "Enable Scala SAST scanning" + + required: false required: false + + default: "" default: "false" + + csharp_disabled_rules: swift_sast_enabled: + + description: "Comma-separated C# rules to disable" description: "Enable Swift SAST scanning" + + required: false required: false + + default: "" default: "false" + + dotnet_enabled_rules: rust_sast_enabled: + + description: "Comma-separated .NET rules to enable" description: "Enable Rust SAST scanning" + + required: false required: false + + default: "" default: "false" + + dotnet_disabled_rules: elixir_sast_enabled: + + description: "Comma-separated .NET rules to disable" description: "Enable Elixir SAST scanning" + + required: false required: false + + default: "" default: "false" + c_enabled_rules: - description: "Comma-separated list of C SAST rules to enable" - required: false - default: "" - c_disabled_rules: - description: "Comma-separated list of C SAST rules to disable" - required: false - default: "" - cpp_enabled_rules: - description: "Comma-separated list of C++ SAST rules to enable" - required: false - default: "" - cpp_disabled_rules: - description: "Comma-separated list of C++ SAST rules to disable" - required: false - default: "" - kotlin_enabled_rules: - description: "Comma-separated list of Kotlin SAST rules to enable" - required: false - default: "" - kotlin_disabled_rules: - description: "Comma-separated list of Kotlin SAST rules to disable" - required: false - default: "" - scala_enabled_rules: - description: "Comma-separated list of Scala SAST rules to enable" - required: false - default: "" - scala_disabled_rules: - description: "Comma-separated list of Scala SAST rules to disable" - required: false - default: "" - swift_enabled_rules: - description: "Comma-separated list of Swift SAST rules to enable" - required: false - default: "" - swift_disabled_rules: - description: "Comma-separated list of Swift SAST rules to disable" - required: false - default: "" - rust_enabled_rules: - description: "Comma-separated list of Rust SAST rules to enable" - required: false - default: "" - rust_disabled_rules: - description: "Comma-separated list of Rust SAST rules to disable" - required: false - default: "" - elixir_enabled_rules: - description: "Comma-separated list of Elixir SAST rules to enable" - required: false - default: "" - elixir_disabled_rules: - description: "Comma-separated list of Elixir SAST rules to disable" - required: false - default: "" - # Trivy Configuration - trivy_exclude_dir: - description: "Comma-separated list of directories to exclude in Trivy" - required: false - default: "" - trivy_rules: - description: "Rules to enable in Trivy" - required: false - default: "" - trivy_disabled_rules: - description: "Comma-separated list of Trivy rules to disable" - required: false - default: "" - trivy_image_scanning_disabled: - description: "Disable Trivy image scanning" - required: false - default: "false" + description: "Comma-separated C rules to enable" # Per-language rule overrides + required: false python_enabled_rules: + + default: "" description: "Comma-separated list of Python SAST rules to enable" + + c_disabled_rules: required: false + + description: "Comma-separated C rules to disable" default: "" + + required: false python_disabled_rules: + + default: "" description: "Comma-separated list of Python SAST rules to disable" + + cpp_enabled_rules: required: false + + description: "Comma-separated C++ rules to enable" default: "" + + required: false javascript_enabled_rules: + + default: "" description: "Comma-separated list of JavaScript/TypeScript SAST rules to enable" + + cpp_disabled_rules: required: false + + description: "Comma-separated C++ rules to disable" default: "" + + required: false javascript_disabled_rules: + + default: "" description: "Comma-separated list of JavaScript/TypeScript SAST rules to disable" + + kotlin_enabled_rules: required: false + + description: "Comma-separated Kotlin rules to enable" default: "" + + required: false go_enabled_rules: + + default: "" description: "Comma-separated list of Go SAST rules to enable" + + kotlin_disabled_rules: required: false + + description: "Comma-separated Kotlin rules to disable" default: "" + + required: false go_disabled_rules: + + default: "" description: "Comma-separated list of Go SAST rules to disable" + + scala_enabled_rules: required: false + + description: "Comma-separated Scala rules to enable" default: "" + + required: false java_enabled_rules: + + default: "" description: "Comma-separated list of Java SAST rules to enable" + + scala_disabled_rules: required: false + + description: "Comma-separated Scala rules to disable" default: "" + + required: false java_disabled_rules: + + default: "" description: "Comma-separated list of Java SAST rules to disable" + + swift_enabled_rules: required: false + + description: "Comma-separated Swift rules to enable" default: "" + + required: false php_enabled_rules: + + default: "" description: "Comma-separated list of PHP SAST rules to enable" + + swift_disabled_rules: required: false + + description: "Comma-separated Swift rules to disable" default: "" + + required: false php_disabled_rules: + + default: "" description: "Comma-separated list of PHP SAST rules to disable" + + rust_enabled_rules: required: false + + description: "Comma-separated Rust rules to enable" default: "" + + required: false ruby_enabled_rules: + + default: "" description: "Comma-separated list of Ruby SAST rules to enable" + + rust_disabled_rules: required: false + + description: "Comma-separated Rust rules to disable" default: "" + + required: false ruby_disabled_rules: + + default: "" description: "Comma-separated list of Ruby SAST rules to disable" + + elixir_enabled_rules: required: false + + description: "Comma-separated Elixir rules to enable" default: "" + + required: false csharp_enabled_rules: + + default: "" description: "Comma-separated list of C# SAST rules to enable" + + elixir_disabled_rules: required: false + + description: "Comma-separated Elixir rules to disable" default: "" + + required: false csharp_disabled_rules: + + default: "" description: "Comma-separated list of C# SAST rules to disable" + + required: false + + # Socket Tier 1 Reachability default: "" + + socket_tier_1_enabled: dotnet_enabled_rules: + + description: "Enable Socket Tier 1 reachability analysis (requires Socket CLI)" description: "Comma-separated list of .NET SAST rules to enable" + + required: false required: false + + default: "false" default: "" + + socket_additional_params: dotnet_disabled_rules: + + description: "Additional parameters for Socket CLI" description: "Comma-separated list of .NET SAST rules to disable" + + required: false required: false + + default: "" default: "" + + c_enabled_rules: + + # Secret Scanning description: "Comma-separated list of C SAST rules to enable" + + secret_scanning_enabled: required: false + + description: "Enable secret scanning with TruffleHog" default: "" + + required: false c_disabled_rules: + + default: "false" description: "Comma-separated list of C SAST rules to disable" + + disable_all_secrets: required: false + + description: "Disable all secret scanning" default: "" + + required: false cpp_enabled_rules: + + default: "false" description: "Comma-separated list of C++ SAST rules to enable" + + trufflehog_exclude_dir: required: false + + description: "Comma-separated directories to exclude from secret scanning" default: "" + + required: false cpp_disabled_rules: + + default: "" description: "Comma-separated list of C++ SAST rules to disable" + + trufflehog_show_unverified: required: false + + description: "Show unverified secrets" default: "" + + required: false kotlin_enabled_rules: + + default: "false" description: "Comma-separated list of Kotlin SAST rules to enable" + + required: false + + # Container Scanning (Trivy) default: "" + + container_images: kotlin_disabled_rules: + + description: "Comma-separated container images to scan (auto-enables scanning)" description: "Comma-separated list of Kotlin SAST rules to disable" + + required: false required: false + + default: "" default: "" + + dockerfiles: scala_enabled_rules: + + description: "Comma-separated Dockerfiles to scan (auto-enables scanning)" description: "Comma-separated list of Scala SAST rules to enable" + + required: false required: false + + default: "" default: "" + + trivy_disabled_rules: scala_disabled_rules: + + description: "Comma-separated Trivy rules to disable" description: "Comma-separated list of Scala SAST rules to disable" + + required: false required: false + + default: "" default: "" + + trivy_image_scanning_disabled: swift_enabled_rules: + + description: "Disable Trivy image scanning" description: "Comma-separated list of Swift SAST rules to enable" + + required: false required: false + + default: "false" default: "" + + trivy_vuln_enabled: swift_disabled_rules: + + description: "Enable Trivy vulnerability scanning" description: "Comma-separated list of Swift SAST rules to disable" + + required: false required: false + + default: "false" default: "" + + rust_enabled_rules: + + # Notification Methods (Enterprise Plan Required) description: "Comma-separated list of Rust SAST rules to enable" + + slack_webhook_url: required: false + + description: "Slack webhook URL (Enterprise plan required)" default: "" + + required: false rust_disabled_rules: + + default: "" description: "Comma-separated list of Rust SAST rules to disable" + + webhook_url: required: false + + description: "Generic webhook URL (Enterprise plan required)" default: "" + + required: false elixir_enabled_rules: + + default: "" description: "Comma-separated list of Elixir SAST rules to enable" + + ms_sentinel_workspace_id: required: false + + description: "Microsoft Sentinel workspace ID (Enterprise plan required)" default: "" + + required: false elixir_disabled_rules: + + default: "" description: "Comma-separated list of Elixir SAST rules to disable" + + ms_sentinel_shared_key: required: false + + description: "Microsoft Sentinel shared key (Enterprise plan required)" default: "" - # Log Forwarding Configuration - sumo_logic_enabled: - description: "Enable Sumo Logic log forwarding" - required: false - default: "false" - sumo_logic_http_source_url: - description: "HTTP source URL for Sumo Logic" required: false + + default: "" # Trivy Configuration + + sumologic_endpoint: trivy_exclude_dir: + + description: "Sumo Logic endpoint URL (Enterprise plan required)" description: "Comma-separated list of directories to exclude in Trivy" + + required: false required: false + + default: "" default: "" + + jira_url: trivy_rules: + + description: "Jira instance URL (Enterprise plan required)" description: "Rules to enable in Trivy" + + required: false required: false + + default: "" default: "" + + jira_project: trivy_disabled_rules: + + description: "Jira project key (Enterprise plan required)" description: "Comma-separated list of Trivy rules to disable" + + required: false required: false + + default: "" default: "" + + jira_email: trivy_image_scanning_disabled: + + description: "Jira user email (Enterprise plan required)" description: "Disable Trivy image scanning" + + required: false required: false + + default: "" default: "false" + + jira_api_token: + + description: "Jira API token (Enterprise plan required)" + + required: false # Log Forwarding Configuration + + default: "" sumo_logic_enabled: + + msteams_webhook_url: description: "Enable Sumo Logic log forwarding" + + description: "Microsoft Teams webhook URL (Enterprise plan required)" required: false + + required: false default: "false" + + default: "" sumo_logic_http_source_url: + + description: "HTTP source URL for Sumo Logic" + + # S3 Upload Configuration required: false + + s3_enabled: default: "" + + description: "Enable S3 upload for results" + + required: false # Microsoft Sentinel Configuration + + default: "false" ms_sentinel_enabled: + + s3_bucket: description: "Enable Microsoft Sentinel log forwarding" + + description: "S3 bucket name" required: false + + required: false default: "false" + + default: "" ms_sentinel_workspace_id: + + s3_access_key: description: "Workspace ID for Microsoft Sentinel" + + description: "S3 access key" required: false + + required: false default: "REPLACE_ME" + + default: "" ms_sentinel_shared_key: + + s3_secret_key: description: "Shared key for Microsoft Sentinel" + + description: "S3 secret key" required: false + + required: false default: "REPLACE_ME" + default: "" - # Microsoft Sentinel Configuration - ms_sentinel_enabled: - description: "Enable Microsoft Sentinel log forwarding" - required: false - default: "false" - ms_sentinel_workspace_id: - description: "Workspace ID for Microsoft Sentinel" - required: false - default: "REPLACE_ME" - ms_sentinel_shared_key: - description: "Shared key for Microsoft Sentinel" - required: false - default: "REPLACE_ME" + s3_endpoint: # Jira Configuration + + description: "S3 endpoint URL" jira_enabled: + + required: false description: "Enable Jira ticket creation" + + default: "" required: false + + s3_region: default: "false" + + description: "S3 region" jira_url: + + required: false description: "Jira instance URL" + + default: "" required: false - # Jira Configuration - jira_enabled: - description: "Enable Jira ticket creation" - required: false - default: "false" - jira_url: - description: "Jira instance URL" - required: false default: "" - jira_email: - description: "Jira user email" - required: false + +branding: jira_email: + + icon: "shield" description: "Jira user email" + + color: "blue" required: false + default: "" jira_api_token: description: "Jira API token" diff --git a/app_tests/DVIA-v2 b/app_tests/DVIA-v2 new file mode 160000 index 0000000..96466ab --- /dev/null +++ b/app_tests/DVIA-v2 @@ -0,0 +1 @@ +Subproject commit 96466ab85569dbcd37c69b2d15d22ff666f57031 diff --git a/app_tests/DVWA b/app_tests/DVWA new file mode 160000 index 0000000..4aa0c38 --- /dev/null +++ b/app_tests/DVWA @@ -0,0 +1 @@ +Subproject commit 4aa0c385a9965ed8daae64c4dd28fbb8d4d3d7b4 diff --git a/app_tests/IWA-DotNet b/app_tests/IWA-DotNet new file mode 160000 index 0000000..e002be5 --- /dev/null +++ b/app_tests/IWA-DotNet @@ -0,0 +1 @@ +Subproject commit e002be5094c6478bad1f4d33fac85295265376b6 diff --git a/app_tests/NodeGoat b/app_tests/NodeGoat new file mode 160000 index 0000000..c5cb68a --- /dev/null +++ b/app_tests/NodeGoat @@ -0,0 +1 @@ +Subproject commit c5cb68a7084e4ae7dcc60e6a98768720a81841e8 diff --git a/app_tests/WebGoat b/app_tests/WebGoat new file mode 160000 index 0000000..5ab3cfe --- /dev/null +++ b/app_tests/WebGoat @@ -0,0 +1 @@ +Subproject commit 5ab3cfe2e37fa930f6353f683db72c1b8aad02d1 diff --git a/app_tests/juice-shop b/app_tests/juice-shop new file mode 160000 index 0000000..36870cb --- /dev/null +++ b/app_tests/juice-shop @@ -0,0 +1 @@ +Subproject commit 36870cbbdfe7864698e1adf644c7bf772f67ebb7 diff --git a/app_tests/pygoat b/app_tests/pygoat new file mode 160000 index 0000000..7bc2d0d --- /dev/null +++ b/app_tests/pygoat @@ -0,0 +1 @@ +Subproject commit 7bc2d0d3143885f65998480b688b653be370ec25 diff --git a/app_tests/railsgoat b/app_tests/railsgoat new file mode 160000 index 0000000..c1e8ff1 --- /dev/null +++ b/app_tests/railsgoat @@ -0,0 +1 @@ +Subproject commit c1e8ff1e3b24a1c48fcfc9fbee0f65dc296b49d9 diff --git a/app_tests/scala-woof b/app_tests/scala-woof new file mode 160000 index 0000000..a19ef4d --- /dev/null +++ b/app_tests/scala-woof @@ -0,0 +1 @@ +Subproject commit a19ef4d28bbd28593e73e1279188a3b3b2295386 diff --git a/docs/screenshots/socket_basics_example_results.png b/docs/screenshots/socket_basics_example_results.png new file mode 100644 index 0000000..331aa7f Binary files /dev/null and b/docs/screenshots/socket_basics_example_results.png differ diff --git a/docs/screenshots/socket_basics_section_config.png b/docs/screenshots/socket_basics_section_config.png new file mode 100644 index 0000000..6359093 Binary files /dev/null and b/docs/screenshots/socket_basics_section_config.png differ diff --git a/docs/screenshots/socket_basics_settings.png b/docs/screenshots/socket_basics_settings.png new file mode 100644 index 0000000..f624a2a Binary files /dev/null and b/docs/screenshots/socket_basics_settings.png differ diff --git a/pyproject.toml b/pyproject.toml index c185d53..7258fa0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "socket-basics" +name = "socket_basics" version = "1.0.2" description = "Socket Basics with integrated SAST, secret scanning, and container analysis" readme = "README.md" @@ -27,9 +27,8 @@ dependencies = [ "tabulate~=0.9.0", "light-s3-client~=0.0.30", "PyYAML>=6.0.0", - # tomllib is in stdlib from Python 3.11 as `tomllib`. For older Python versions - # the `tomli` backport is required. Add as a conditional dependency for <3.11. "tomli; python_version < '3.11'", + "socketdev>=0.1.0", ] [project.optional-dependencies] @@ -63,8 +62,8 @@ include = [ "src/**/*.py", ] -[tool.uv] -dev-dependencies = [] +[dependency-groups] +dev = [] [tool.black] line-length = 100 diff --git a/socket_basics/connectors.yaml b/socket_basics/connectors.yaml index e552419..2fea787 100644 --- a/socket_basics/connectors.yaml +++ b/socket_basics/connectors.yaml @@ -35,6 +35,7 @@ connectors: - swift_sast_enabled - rust_sast_enabled - elixir_sast_enabled + - erlang_sast_enabled - name: all_rules_enabled option: --all-rules description: "Run all bundled SAST rules regardless of language filters" @@ -145,6 +146,12 @@ connectors: env_variable: INPUT_ELIXIR_SAST_ENABLED type: bool default: false + - name: erlang_sast_enabled + option: --erlang + description: "Enable Erlang SAST scanning" + env_variable: INPUT_ERLANG_SAST_ENABLED + type: bool + default: false - name: notification_method option: --opengrep-notify description: "Notification method for OpenGrep (e.g., console, slack)" @@ -172,7 +179,7 @@ connectors: description: "Comma-separated list of JavaScript/TypeScript SAST rules to enable (default: high-confidence rules)" env_variable: INPUT_JAVASCRIPT_ENABLED_RULES type: str - default: "js-eval-usage,js-sql-injection,js-command-injection,js-prototype-pollution,js-xss-innerhtml,js-hardcoded-secrets,js-path-traversal,js-tls-reject-unauthorized-false,js-weak-crypto-md5" + default: "js-eval-usage,js-sql-injection,js-command-injection,js-prototype-pollution,js-xss-innerhtml,js-path-traversal,js-tls-reject-unauthorized-false,js-weak-crypto-md5" - name: javascript_disabled_rules option: --javascript-disabled-rules description: "Comma-separated list of JavaScript/TypeScript SAST rules to disable" @@ -398,6 +405,12 @@ connectors: env_variable: INPUT_TRUFFLEHOG_NOTIFICATION_METHOD type: str default: "" + - name: trufflehog_show_unverified + option: --show-unverified + description: "Show unverified secrets in TruffleHog results" + env_variable: INPUT_TRUFFLEHOG_SHOW_UNVERIFIED + type: bool + default: false socket_tier1: class: SocketTier1Scanner @@ -414,38 +427,12 @@ connectors: env_variable: SOCKET_TIER_1_ENABLED type: bool default: false - - name: socket_org - option: --socket-org - description: "Socket organization slug used for reachability scan" - env_variable: SOCKET_ORG - type: str - default: "" - - name: socket_api_key - option: --socket-security-api-key - description: "Socket security API key or token" - env_variable: SOCKET_SECURITY_API_KEY - type: str - default: "" - - name: socket_api_token - option: --socket-security-api-token - description: "Alternative name for Socket security API token" - env_variable: SOCKET_SECURITY_API_TOKEN - type: str - default: "" - name: socket_additional_params option: --socket-additional-params description: "Additional CLI params for 'socket scan reach' (comma or space separated). Also reads SOCKET_ADDITIONAL_PARAMS" env_variable: SOCKET_ADDITIONAL_PARAMS type: str default: "" - group: "Secret Scanning" - - name: trufflehog_show_unverified - option: --show-unverified - description: "Show unverified secrets in TruffleHog results" - env_variable: INPUT_TRUFFLEHOG_SHOW_UNVERIFIED - type: bool - default: false - trivy: class: TrivyScanner @@ -456,62 +443,26 @@ connectors: module_path: "socket_basics.core.connector.trivy" enabled_by_default: false parameters: - - name: dockerfile_scanning_enabled - option: --dockerfile - description: "Enable Dockerfile scanning" - env_variable: INPUT_DOCKERFILE_SCANNING_ENABLED - type: bool - default: false - group: "Dockerfile Scanning" - - name: container_image_scanning_enabled - option: --container-images - description: "Enable container image scanning" - env_variable: INPUT_CONTAINER_IMAGE_SCANNING_ENABLED - type: bool - default: false - group: "Container Image Scanning" - - name: container_images_to_scan + - name: container_images option: --images - description: "Comma-separated list of container images to scan" + description: "Comma-separated list of container images to scan (auto-enables image scanning)" env_variable: INPUT_CONTAINER_IMAGES_TO_SCAN type: str default: "" group: "Container Image Scanning" - - name: dockerfile_enabled - option: --dockerfile-legacy - description: "Enable Dockerfile scanning (legacy parameter)" - env_variable: INPUT_DOCKERFILE_ENABLED - type: bool - default: false - group: "Dockerfile Scanning" - name: dockerfiles option: --dockerfiles - description: "Comma-separated list of Dockerfiles to scan" + description: "Comma-separated list of Dockerfiles to scan (auto-enables Dockerfile scanning)" env_variable: INPUT_DOCKERFILES type: str default: "" group: "Dockerfile Scanning" - - name: image_enabled - option: --image-enabled - description: "Enable container image scanning (legacy parameter)" - env_variable: INPUT_IMAGE_ENABLED - type: bool - default: false - group: "Container Image Scanning" - - name: docker_images - option: --docker-images - description: "Comma-separated list of Docker images to scan (legacy parameter)" - env_variable: INPUT_DOCKER_IMAGES - type: str - default: "" - group: "Container Image Scanning" - - name: notification_method + - name: trivy_notification_method option: --trivy-notify description: "Notification method for Trivy (e.g., console, slack)" env_variable: INPUT_TRIVY_NOTIFICATION_METHOD type: str default: "" - group: "Container Image Scanning" - name: trivy_disabled_rules option: --trivy-disabled-rules description: "Comma-separated list of Trivy rules to disable" @@ -524,9 +475,13 @@ connectors: env_variable: INPUT_TRIVY_IMAGE_SCANNING_DISABLED type: bool default: false - disables: - - container_image_scanning_enabled - - image_enabled + - name: trivy_vuln_enabled + option: --trivy-vuln-enabled + description: "Enable Trivy vulnerability scanning for all supported language ecosystems" + env_variable: INPUT_TRIVY_VULN_ENABLED + type: bool + default: false + group: "Trivy Vulnerability Scanning" # Global connector settings settings: diff --git a/socket_basics/core/config.py b/socket_basics/core/config.py index ad2f38c..e5e7d26 100644 --- a/socket_basics/core/config.py +++ b/socket_basics/core/config.py @@ -16,12 +16,40 @@ class Config: """Configuration object that provides unified access to all settings""" - def __init__(self, config_dict: Dict[str, Any] = None): - """Initialize configuration from dictionary or environment""" - if config_dict is None: - config_dict = load_config_from_env() + def __init__(self, config_dict: Dict[str, Any] | None = None, json_config_path: str | None = None): + """Initialize configuration from dictionary, JSON file, or environment - self._config = config_dict + Args: + config_dict: Optional configuration dictionary (takes precedence) + json_config_path: Optional path to JSON configuration file + """ + if config_dict is not None: + # Use provided config dictionary directly + self._config = config_dict + elif json_config_path is not None: + # Load from JSON file and merge with environment + try: + json_config = load_config_from_json(json_config_path) + self._config = merge_json_and_env_config(json_config) + except (FileNotFoundError, json.JSONDecodeError, ValueError) as e: + logger = logging.getLogger(__name__) + logger.error("Failed to load JSON config from %s: %s", json_config_path, e) + # Fall back to merged config (includes Socket Basics API config) + self._config = merge_json_and_env_config() + else: + # Default: merge environment config with Socket Basics API config + self._config = merge_json_and_env_config() + + self._config = self._config + + # DEBUG: Log final configuration values + logger = logging.getLogger(__name__) + logger.debug("Final Config object created with key values:") + logger.debug(f" javascript_sast_enabled: {self._config.get('javascript_sast_enabled')}") + logger.debug(f" socket_tier_1_enabled: {self._config.get('socket_tier_1_enabled')}") + logger.debug(f" console_tabular_enabled: {self._config.get('console_tabular_enabled')}") + logger.debug(f" socket_org: {self._config.get('socket_org')}") + logger.debug(f" socket_api_key set: {bool(self._config.get('socket_api_key'))}") # Validate workspace path: warn and fall back to cwd when missing ws = Path(self._config.get('workspace', os.getcwd())) if not ws.exists(): @@ -83,6 +111,41 @@ def get_action_for_severity(self, severity: str) -> str: # Default action for unknown severities return 'monitor' + @property + def repo(self) -> str: + """Get repository name""" + return str(self.get('repo', '')) + + @property + def branch(self) -> str: + """Get branch name""" + return str(self.get('branch', '')) + + @property + def commit_hash(self) -> str: + """Get commit hash (optional)""" + return str(self.get('commit_hash', '')) + + @property + def is_default_branch(self) -> bool: + """Check if current branch is the default branch""" + return bool(self.get('is_default_branch', False)) + + def is_notifier_available(self, notifier: str) -> bool: + """Check if a notifier is available based on Socket plan""" + available_notifiers = self.get('available_notifiers', ['console_tabular', 'console_json']) + if not isinstance(available_notifiers, list): + available_notifiers = ['console_tabular', 'console_json'] + return notifier in available_notifiers + + def get_socket_plan_info(self) -> Dict[str, Any]: + """Get Socket plan information""" + return { + 'plan': self.get('socket_plan', 'free'), + 'has_enterprise': self.get('socket_has_enterprise', False), + 'available_notifiers': self.get('available_notifiers', ['console_tabular', 'console_json']) + } + def should_run_sast(self) -> bool: """Check if any SAST language is enabled dynamically""" try: @@ -173,7 +236,7 @@ def get_enabled_rules_for_language(self, language: str) -> List[str]: rules_param = f"{language}_enabled_rules" rules_str = self.get(rules_param, "") - if not rules_str.strip(): + if not rules_str or not rules_str.strip(): return [] return [rule.strip() for rule in rules_str.split(',') if rule.strip()] @@ -222,6 +285,143 @@ def build_filtered_opengrep_rules(self) -> Dict[str, List[str]]: return {k: list(v) for k, v in rule_file_filters.items()} +# Centralized environment variable getters +# All connectors and notifiers should use these methods instead of calling os.getenv directly + +def get_env_with_fallbacks(*env_vars: str, default: str = '') -> str: + """Get environment variable value with multiple fallback options. + + Args: + *env_vars: Variable number of environment variable names to check (in priority order) + default: Default value if none of the env vars are set + + Returns: + First non-empty environment variable value found, or default + """ + for env_var in env_vars: + value = os.getenv(env_var) + if value: + return value + return default + + +def get_github_token() -> str: + """Get GitHub token from environment variables.""" + return get_env_with_fallbacks('GITHUB_TOKEN', 'INPUT_GITHUB_TOKEN') + + +def get_github_repository() -> str: + """Get GitHub repository from environment variables.""" + return get_env_with_fallbacks('GITHUB_REPOSITORY', 'INPUT_GITHUB_REPOSITORY') + + +def get_github_pr_number() -> str: + """Get GitHub PR number from environment variables.""" + return get_env_with_fallbacks('GITHUB_PR_NUMBER', 'INPUT_PR_NUMBER') + + +def get_slack_webhook_url() -> str: + """Get Slack webhook URL from environment variables.""" + return get_env_with_fallbacks('SLACK_WEBHOOK_URL', 'INPUT_SLACK_WEBHOOK_URL') + + +def get_webhook_url() -> str: + """Get generic webhook URL from environment variables.""" + return get_env_with_fallbacks('WEBHOOK_URL', 'INPUT_WEBHOOK_URL') + + +def get_ms_sentinel_workspace_id() -> str: + """Get Microsoft Sentinel workspace ID from environment variables.""" + return get_env_with_fallbacks('MS_SENTINEL_WORKSPACE_ID', 'INPUT_MS_SENTINEL_WORKSPACE_ID') + + +def get_ms_sentinel_shared_key() -> str: + """Get Microsoft Sentinel shared key from environment variables.""" + return get_env_with_fallbacks('MS_SENTINEL_SHARED_KEY', 'INPUT_MS_SENTINEL_SHARED_KEY') + + +def get_ms_sentinel_collector_url() -> str: + """Get Microsoft Sentinel collector URL from environment variables.""" + return get_env_with_fallbacks('MS_SENTINEL_COLLECTOR_URL', 'INPUT_MS_SENTINEL_COLLECTOR_URL') + + +def get_jira_url() -> str: + """Get JIRA URL from environment variables.""" + return get_env_with_fallbacks('JIRA_URL', 'INPUT_JIRA_URL') + + +def get_jira_project() -> str: + """Get JIRA project from environment variables.""" + return get_env_with_fallbacks('JIRA_PROJECT', 'INPUT_JIRA_PROJECT') + + +def get_jira_email() -> str: + """Get JIRA email from environment variables.""" + return get_env_with_fallbacks('JIRA_EMAIL', 'INPUT_JIRA_EMAIL') + + +def get_jira_api_token() -> str: + """Get JIRA API token from environment variables.""" + return get_env_with_fallbacks('JIRA_API_TOKEN', 'INPUT_JIRA_API_TOKEN') + + +def get_sumologic_http_source_url() -> str: + """Get SumoLogic HTTP source URL from environment variables.""" + return get_env_with_fallbacks('SUMO_LOGIC_HTTP_SOURCE_URL', 'INPUT_SUMO_LOGIC_HTTP_SOURCE_URL') + + +def get_sumologic_endpoint() -> str: + """Get SumoLogic endpoint from environment variables.""" + return get_env_with_fallbacks('SUMOLOGIC_ENDPOINT', 'INPUT_SUMOLOGIC_ENDPOINT') + + +def get_msteams_webhook_url() -> str: + """Get Microsoft Teams webhook URL from environment variables.""" + return get_env_with_fallbacks('MSTEAMS_WEBHOOK_URL', 'INPUT_MSTEAMS_WEBHOOK_URL') + + +def get_socket_basics_severities() -> str: + """Get Socket Basics severities from environment variables.""" + return get_env_with_fallbacks('SOCKET_BASICS_SEVERITIES', 'INPUT_FINDING_SEVERITIES') + + +def get_github_workspace() -> str: + """Get GitHub workspace from environment variables.""" + return get_env_with_fallbacks('GITHUB_WORKSPACE', default=os.getcwd()) + + +def load_config_from_json(json_path: str) -> Dict[str, Any]: + """Load configuration from a JSON file + + Args: + json_path: Path to the JSON configuration file + + Returns: + Dictionary containing the configuration from the JSON file + + Raises: + FileNotFoundError: If the JSON file doesn't exist + json.JSONDecodeError: If the JSON file is malformed + """ + try: + with open(json_path, 'r', encoding='utf-8') as f: + config = json.load(f) + + # Validate that the loaded config is a dictionary + if not isinstance(config, dict): + raise ValueError(f"JSON config file must contain a JSON object, got {type(config).__name__}") + + logger = logging.getLogger(__name__) + logger.info("Successfully loaded configuration from JSON file: %s", json_path) + + return config + + except FileNotFoundError: + raise FileNotFoundError(f"JSON configuration file not found: {json_path}") + except json.JSONDecodeError as e: + raise json.JSONDecodeError(f"Invalid JSON in configuration file {json_path}: {e.msg}", e.doc, e.pos) + + def load_config_from_env() -> Dict[str, Any]: """Load configuration from environment variables dynamically from connectors.yaml""" config = { @@ -233,8 +433,36 @@ def load_config_from_env() -> Dict[str, Any]: 'scan_all': os.getenv('INPUT_SCAN_ALL', 'false').lower() == 'true', 'scan_files': os.getenv('INPUT_SCAN_FILES', ''), + # Core Socket API configuration (top-level, like workspace) + 'socket_org': ( + os.getenv('SOCKET_ORG', '') or + os.getenv('SOCKET_ORG_SLUG', '') or + os.getenv('INPUT_SOCKET_ORG', '') + ), + 'socket_api_key': ( + os.getenv('SOCKET_SECURITY_API_KEY', '') or + os.getenv('SOCKET_SECURITY_API_TOKEN', '') or + os.getenv('SOCKET_API_KEY', '') or + os.getenv('INPUT_SOCKET_SECURITY_API_KEY', '') or + os.getenv('INPUT_SOCKET_API_KEY', '') + ), + + # Socket plan detection (will be populated later in merge process) + 'socket_plan': '', + 'socket_has_enterprise': False, + 'available_notifiers': ['console_tabular', 'console_json'], # Default free plan notifiers + # OpenGrep configuration (optional override for custom rules) 'opengrep_rules_dir': os.getenv('INPUT_OPENGREP_RULES_DIR', ''), + + # GitHub environment variables for discovery functions + 'github_actor': os.getenv('GITHUB_ACTOR', ''), + 'github_pr_number': os.getenv('GITHUB_PR_NUMBER', ''), + 'github_head_ref': os.getenv('GITHUB_HEAD_REF', ''), + 'github_event_path': os.getenv('GITHUB_EVENT_PATH', ''), + 'github_sha': os.getenv('GITHUB_SHA', ''), + 'github_repository': os.getenv('GITHUB_REPOSITORY', ''), + 'github_ref_name': os.getenv('GITHUB_REF_NAME', ''), } # Dynamically load connector parameters from YAML configuration @@ -317,7 +545,432 @@ def load_config_from_env() -> Dict[str, Any]: except Exception: # Best-effort; do not fail on notification parsing pass + + # Auto-enable scanning when values are provided (removes need for separate enabled flags) + # If container_images has a value, enable image scanning + if config.get('container_images'): + config['trivy_image_enabled'] = True + config['container_image_scanning_enabled'] = True + + # If dockerfiles has a value, enable Dockerfile scanning + if config.get('dockerfiles'): + config['trivy_dockerfile_enabled'] = True + config['dockerfile_scanning_enabled'] = True + + return config + + +def load_socket_basics_config() -> Dict[str, Any] | None: + """Load Socket Basics configuration from Socket API if organization has enterprise plan + + Returns: + Socket Basics configuration dictionary if available, None otherwise + """ + logger = logging.getLogger(__name__) + logger.debug(" load_socket_basics_config() called") + + # Check if Socket API integration is available + api_key = ( + os.environ.get('SOCKET_SECURITY_API_KEY') + or os.environ.get('SOCKET_SECURITY_API_TOKEN') + ) + + logger.debug(f" API key check - SOCKET_SECURITY_API_KEY set: {bool(os.environ.get('SOCKET_SECURITY_API_KEY'))}") + logger.debug(f" API key check - SOCKET_SECURITY_API_TOKEN set: {bool(os.environ.get('SOCKET_SECURITY_API_TOKEN'))}") + logger.debug(f" Final api_key available: {bool(api_key)}") + + if not api_key: + logger.debug(" Socket API key not available, returning free plan config") + return { + 'socket_plan': 'free', + 'socket_has_enterprise': False, + 'available_notifiers': ['console_tabular', 'console_json'] + } + + org_slug = ( + os.environ.get('SOCKET_ORG_SLUG') + or os.environ.get('SOCKET_ORG') + ) + + logger.debug(f" SOCKET_ORG_SLUG: {os.environ.get('SOCKET_ORG_SLUG', 'not set')}") + logger.debug(f" SOCKET_ORG: {os.environ.get('SOCKET_ORG', 'not set')}") + logger.debug(f" org_slug from env: {org_slug or 'not set - will auto-discover'}") + + try: + # Import socketdev here to avoid import errors if not installed + from socketdev import socketdev + + # Initialize SDK + sdk = socketdev(token=api_key, timeout=100) + + # Get organizations and find the right one or auto-discover + orgs = sdk.org.get() + target_org = None + + logger.debug(f" Found {len(orgs.get('organizations', {}))} organizations in API response") + + if len(orgs) > 0: + if org_slug: + # Look for specific organization + logger.debug(f" Looking for specific organization: {org_slug}") + for org_key in orgs['organizations']: + org = orgs['organizations'][org_key] + if org.get('slug') == org_slug: + target_org = org + logger.info(f"Found organization '{org_slug}' with plan: {org.get('plan', '')}") + break + else: + # Auto-discover first organization + logger.debug(" Auto-discovering organization (no SOCKET_ORG set)") + for org_key in orgs['organizations']: + org = orgs['organizations'][org_key] + target_org = org + org_slug = org['slug'] + logger.info(f"Auto-discovered organization '{org_slug}' with plan: {org.get('plan', '')}") + break + + if not target_org or not org_slug: + logger.warning("No suitable organization found in API response") + return None + + # Check if organization has enterprise plan + plan = target_org.get('plan', '') + has_enterprise = plan.startswith('enterprise') + + # Always return plan information, even for non-enterprise plans + base_plan_config = { + 'socket_plan': plan, + 'socket_has_enterprise': has_enterprise, + 'socket_org': org_slug, # Populate discovered org + 'available_notifiers': ['console_tabular', 'console_json'] if not has_enterprise else [ + 'console_tabular', 'console_json', 'slack', 'ms_teams', 'jira', + 'webhook', 'sumologic', 'ms_sentinel', 'github_pr', 'json_notifier' + ] + } + + if not has_enterprise: + logger.info(f"Organization '{org_slug}' does not have enterprise plan, returning basic config only") + return base_plan_config + + # Get Socket Basics configuration + basics_config_response = sdk.basics.get_config(org_slug=org_slug) + logger.info(f"Retrieved Socket Basics config for enterprise organization '{org_slug}'") + + # Convert response to dictionary if needed + basics_config = None + if isinstance(basics_config_response, dict): + basics_config = basics_config_response + elif hasattr(basics_config_response, '__dict__'): + basics_config = basics_config_response.__dict__ + elif hasattr(basics_config_response, 'to_dict') and callable(getattr(basics_config_response, 'to_dict')): + basics_config = basics_config_response.to_dict() + else: + # Try to convert to dict using json serialization + try: + basics_config = json.loads(json.dumps(basics_config_response, default=str)) + except Exception: + logger.warning("Could not convert Socket Basics config response to dictionary") + return None + + # If additionalParameters contains JSON, parse and merge it + if isinstance(basics_config, dict) and basics_config.get('additionalParameters'): + logger.debug(" Found additionalParameters in Socket Basics config") + logger.debug(f" additionalParameters content: {basics_config['additionalParameters']}") + try: + additional_params = json.loads(basics_config['additionalParameters']) + logger.debug(f" Parsed additionalParameters: {json.dumps(additional_params, indent=2)}") + if isinstance(additional_params, dict): + merged_config = {**base_plan_config, **basics_config, **additional_params} + logger.debug(" Merged additionalParameters into Socket Basics config") + logger.debug(f" Final merged config keys: {list(merged_config.keys())}") + logger.debug(f" Key config values - javascript_sast_enabled: {merged_config.get('javascript_sast_enabled')}, socket_tier_1_enabled: {merged_config.get('socket_tier_1_enabled')}, console_tabular_enabled: {merged_config.get('console_tabular_enabled')}") + return merged_config + except json.JSONDecodeError as e: + logger.warning(f"additionalParameters is not valid JSON: {e}, using base config") + logger.debug(f" Raw additionalParameters that failed to parse: {repr(basics_config['additionalParameters'])}") + + # Return basic config merged with plan information + return {**base_plan_config, **basics_config} if isinstance(basics_config, dict) else base_plan_config + + except ImportError: + logger.debug("socketdev package not installed, skipping Socket Basics config load") + return None + except Exception as e: + logger.warning(f"Error loading Socket Basics config: {e}") + return None + +def load_explicit_env_config() -> Dict[str, Any]: + """Load only explicitly set environment variables (not defaults)""" + config = {} + + # Core settings - only if explicitly set + if 'GITHUB_WORKSPACE' in os.environ: + config['workspace'] = os.environ['GITHUB_WORKSPACE'] + if 'OUTPUT_DIR' in os.environ: + config['output_dir'] = os.environ['OUTPUT_DIR'] + if 'INPUT_SCAN_ALL' in os.environ: + config['scan_all'] = os.environ['INPUT_SCAN_ALL'].lower() == 'true' + if 'INPUT_SCAN_FILES' in os.environ: + config['scan_files'] = os.environ['INPUT_SCAN_FILES'] + if 'INPUT_OPENGREP_RULES_DIR' in os.environ: + config['opengrep_rules_dir'] = os.environ['INPUT_OPENGREP_RULES_DIR'] + + # Dynamically load connector parameters from YAML configuration - only if explicitly set + try: + connectors_config = load_connectors_config() + + for connector_name, connector_config in connectors_config.get('connectors', {}).items(): + for param in connector_config.get('parameters', []): + param_name = param.get('name') + env_variable = param.get('env_variable') + param_type = param.get('type', 'str') + + if param_name and env_variable and env_variable in os.environ: + env_value = os.environ[env_variable] + + if param_type == 'bool': + val = env_value.lower() == 'true' + config[param_name] = val + # honor 'enables' and 'disables' metadata from connectors.yaml + try: + if val and 'enables' in param: + for enabled in param.get('enables', []): + config[enabled] = True + if val and 'disables' in param: + for disabled in param.get('disables', []): + config[disabled] = False + except Exception: + pass + elif param_type == 'int': + try: + config[param_name] = int(env_value) + except ValueError: + pass # Skip invalid values + else: # str type + config[param_name] = env_value + + except Exception as e: + logging.getLogger(__name__).warning("Warning: Error loading explicit env config: %s", e) + + return config + + +def normalize_api_config(api_config: Dict[str, Any]) -> Dict[str, Any]: + """Normalize camelCase API keys to snake_case internal format. + + Maps Socket Basics API response keys (camelCase) to internal config keys (snake_case). + This allows the API to use camelCase while maintaining snake_case internally. + + Args: + api_config: Configuration dictionary from Socket Basics API (camelCase) + + Returns: + Normalized configuration dictionary (snake_case) + """ + # Mapping from camelCase API keys to snake_case internal keys + API_TO_INTERNAL_MAP = { + # Console/Output + 'consoleTabularEnabled': 'console_tabular_enabled', + 'consoleJsonEnabled': 'console_json_enabled', + 'verbose': 'verbose', + + # SAST Language Flags + 'allLanguagesEnabled': 'all_languages_enabled', + 'pythonSastEnabled': 'python_sast_enabled', + 'javascriptSastEnabled': 'javascript_sast_enabled', + 'typescriptSastEnabled': 'typescript_sast_enabled', + 'goSastEnabled': 'go_sast_enabled', + 'golangSastEnabled': 'golang_sast_enabled', + 'javaSastEnabled': 'java_sast_enabled', + 'phpSastEnabled': 'php_sast_enabled', + 'rubySastEnabled': 'ruby_sast_enabled', + 'csharpSastEnabled': 'csharp_sast_enabled', + 'dotnetSastEnabled': 'dotnet_sast_enabled', + 'cSastEnabled': 'c_sast_enabled', + 'cppSastEnabled': 'cpp_sast_enabled', + 'kotlinSastEnabled': 'kotlin_sast_enabled', + 'scalaSastEnabled': 'scala_sast_enabled', + 'swiftSastEnabled': 'swift_sast_enabled', + 'rustSastEnabled': 'rust_sast_enabled', + 'elixirSastEnabled': 'elixir_sast_enabled', + + # SAST Rules Configuration + 'allRulesEnabled': 'all_rules_enabled', + 'pythonEnabledRules': 'python_enabled_rules', + 'pythonDisabledRules': 'python_disabled_rules', + 'javascriptEnabledRules': 'javascript_enabled_rules', + 'javascriptDisabledRules': 'javascript_disabled_rules', + 'goEnabledRules': 'go_enabled_rules', + 'goDisabledRules': 'go_disabled_rules', + 'javaEnabledRules': 'java_enabled_rules', + 'javaDisabledRules': 'java_disabled_rules', + 'kotlinEnabledRules': 'kotlin_enabled_rules', + 'kotlinDisabledRules': 'kotlin_disabled_rules', + 'scalaEnabledRules': 'scala_enabled_rules', + 'scalaDisabledRules': 'scala_disabled_rules', + 'phpEnabledRules': 'php_enabled_rules', + 'phpDisabledRules': 'php_disabled_rules', + 'rubyEnabledRules': 'ruby_enabled_rules', + 'rubyDisabledRules': 'ruby_disabled_rules', + 'csharpEnabledRules': 'csharp_enabled_rules', + 'csharpDisabledRules': 'csharp_disabled_rules', + 'dotnetEnabledRules': 'dotnet_enabled_rules', + 'dotnetDisabledRules': 'dotnet_disabled_rules', + 'cEnabledRules': 'c_enabled_rules', + 'cDisabledRules': 'c_disabled_rules', + 'cppEnabledRules': 'cpp_enabled_rules', + 'cppDisabledRules': 'cpp_disabled_rules', + 'swiftEnabledRules': 'swift_enabled_rules', + 'swiftDisabledRules': 'swift_disabled_rules', + 'rustEnabledRules': 'rust_enabled_rules', + 'rustDisabledRules': 'rust_disabled_rules', + 'elixirEnabledRules': 'elixir_enabled_rules', + 'elixirDisabledRules': 'elixir_disabled_rules', + + # OpenGrep/SAST Configuration + 'openGrepNotificationMethod': 'opengrep_notification_method', + + # Socket Tier 1 + 'socketTier1Enabled': 'socket_tier_1_enabled', + 'socketAdditionalParams': 'socket_additional_params', + + # Secret Scanning + 'secretScanningEnabled': 'secret_scanning_enabled', + 'disableAllSecrets': 'disable_all_secrets', + 'trufflehogExcludeDir': 'trufflehog_exclude_dir', + 'trufflehogShowUnverified': 'trufflehog_show_unverified', + 'trufflehogNotificationMethod': 'trufflehog_notification_method', + + # Container/Image Scanning + 'containerImagesToScan': 'container_images', + 'dockerfiles': 'dockerfiles', + 'trivyImageEnabled': 'trivy_image_enabled', + 'trivyDockerfileEnabled': 'trivy_dockerfile_enabled', + 'trivyNotificationMethod': 'trivy_notification_method', + 'trivyDisabledRules': 'trivy_disabled_rules', + 'trivyImageScanningDisabled': 'trivy_image_scanning_disabled', + + # Notifier Configuration + 'slackWebhookUrl': 'slack_webhook_url', + 'webhookUrl': 'webhook_url', + 'msSentinelWorkspaceId': 'ms_sentinel_workspace_id', + 'msSentinelKey': 'ms_sentinel_shared_key', + 'sumologicEndpoint': 'sumologic_endpoint', + 'jiraUrl': 'jira_url', + 'jiraProject': 'jira_project', + 'jiraEmail': 'jira_email', + 'jiraApiToken': 'jira_api_token', + 'githubToken': 'github_token', + 'githubApiUrl': 'github_api_url', + 'msteamsWebhookUrl': 'msteams_webhook_url', + + # S3 Configuration + 's3Enabled': 's3_enabled', + 's3Bucket': 's3_bucket', + 's3AccessKey': 's3_access_key', + 's3SecretKey': 's3_secret_key', + 's3Endpoint': 's3_endpoint', + 's3Region': 's3_region', + + # Additional Features + 'externalCveScanningEnabled': 'external_cve_scanning_enabled', + 'socketScanningEnabled': 'socket_scanning_enabled', + 'socketScaEnabled': 'socket_sca_enabled', + 'additionalParameters': 'additional_parameters', + } + + normalized = {} + logger = logging.getLogger(__name__) + + for api_key, value in api_config.items(): + # Check if we have a mapping for this key + if api_key in API_TO_INTERNAL_MAP: + internal_key = API_TO_INTERNAL_MAP[api_key] + normalized[internal_key] = value + logger.debug(f" Mapped API key '{api_key}' -> '{internal_key}' = {value}") + else: + # Pass through unmapped keys as-is (for plan info, etc.) + normalized[api_key] = value + logger.debug(f" Pass-through key '{api_key}' = {value}") + + # Special handling: if containerImagesToScan or dockerfiles have values, enable scanning + # This eliminates the need for separate *_enabled flags + if normalized.get('container_images'): + normalized['trivy_image_enabled'] = True + normalized['container_image_scanning_enabled'] = True # For backward compatibility + logger.debug(" Auto-enabled trivy_image_enabled because container_images is set") + + if normalized.get('dockerfiles'): + normalized['trivy_dockerfile_enabled'] = True + normalized['dockerfile_scanning_enabled'] = True # For backward compatibility + logger.debug(" Auto-enabled trivy_dockerfile_enabled because dockerfiles is set") + + # Handle trivy notification method mapping + if 'trivy_notification_method' in normalized: + normalized['notification_method'] = normalized['trivy_notification_method'] + + # Handle trufflehog notification method mapping + if 'trufflehog_notification_method' in normalized: + if 'notification_method' not in normalized: + normalized['notification_method'] = normalized['trufflehog_notification_method'] + + # Handle opengrep notification method mapping + if 'opengrep_notification_method' in normalized: + if 'notification_method' not in normalized: + normalized['notification_method'] = normalized['opengrep_notification_method'] + + return normalized + + +def merge_json_and_env_config(json_config: Dict[str, Any] | None = None) -> Dict[str, Any]: + """Merge JSON configuration with environment variables + + Priority order: + 1. Explicitly set environment variables (highest priority) + 2. JSON config (if provided) + 3. Socket Basics API config (if no JSON config and API available) + 4. Environment defaults (lowest priority) + + Args: + json_config: Optional dictionary from JSON config file + + Returns: + Merged configuration dictionary + """ + # Start with environment defaults + config = load_config_from_env() + + # Override with Socket Basics API config if no explicit JSON config provided + if not json_config: + logger = logging.getLogger(__name__) + logger.debug(" No JSON config provided, attempting to load Socket Basics API config") + socket_basics_config = load_socket_basics_config() + logger.debug(f" Socket Basics API config result: {socket_basics_config is not None}") + if socket_basics_config: + # Normalize camelCase API keys to snake_case internal format + normalized_config = normalize_api_config(socket_basics_config) + config.update(normalized_config) + logging.getLogger(__name__).info("Loaded Socket Basics configuration from API") + else: + logger.debug(" No Socket Basics API config loaded") + + # Override with explicit JSON config if provided + if json_config: + # Also normalize JSON config in case it comes from API + normalized_json = normalize_api_config(json_config) + config.update(normalized_json) + + # Finally, override with explicitly set environment variables (highest priority) + explicit_env = load_explicit_env_config() + config.update(explicit_env) + + logger = logging.getLogger(__name__) + if json_config: + logger.info("Merged JSON configuration with environment variables (explicit env takes precedence)") + elif socket_basics_config: + logger.info("Merged Socket Basics API configuration with environment variables (explicit env takes precedence)") + return config @@ -394,12 +1047,17 @@ def add_dynamic_cli_args(parser: argparse.ArgumentParser): def parse_cli_args(): """Parse command line arguments and return argument parser""" parser = argparse.ArgumentParser(description='Socket Security Basics - Dynamic security scanning') - parser.add_argument('--config', type=str, help='JSON config file path') + parser.add_argument('--config', type=str, + help='Path to JSON configuration file. JSON config is merged with environment variables (environment takes precedence)') parser.add_argument('--output', type=str, default='.socket.facts.json', help='Output file name (default: .socket.facts.json)') parser.add_argument('--workspace', type=str, help='Workspace directory to scan') parser.add_argument('--repo', type=str, help='Repository name (use when workspace is not a git repo)') parser.add_argument('--branch', type=str, help='Branch name (use when workspace is not a git repo)') + parser.add_argument('--default-branch', action='store_true', help='Explicitly mark this as the default branch (sets make_default_branch=true and set_as_pending_head=true)') + parser.add_argument('--commit-message', type=str, help='Commit message for full scan submission') + parser.add_argument('--pull-request', type=int, help='Pull request number for full scan submission') + parser.add_argument('--committers', type=str, help='Comma-separated list of committers for full scan submission') parser.add_argument('--scan-files', type=str, help='Comma-separated list of files to scan') parser.add_argument('--console-tabular-enabled', action='store_true', help='Enable consolidated console tabular output') parser.add_argument('--console-json-enabled', action='store_true', help='Enable consolidated console JSON output') @@ -416,10 +1074,15 @@ def create_config_from_args(args) -> Config: """Create configuration object from parsed CLI arguments""" # Load base config from environment or JSON file if args.config: - with open(args.config, 'r') as f: - config_dict = json.load(f) + try: + json_config = load_config_from_json(args.config) + config_dict = merge_json_and_env_config(json_config) + except (FileNotFoundError, json.JSONDecodeError, ValueError) as e: + logger = logging.getLogger(__name__) + logger.error("Failed to load JSON config from %s: %s", args.config, e) + raise SystemExit(f"Error loading configuration file: {e}") else: - config_dict = load_config_from_env() + config_dict = merge_json_and_env_config() # Override config with CLI args if args.workspace: @@ -435,11 +1098,65 @@ def create_config_from_args(args) -> Config: config_dict['console_json_enabled'] = True if args.verbose: config_dict['verbose'] = args.verbose - # Repository/branch overrides from CLI - if getattr(args, 'repo', None): - config_dict['repository'] = args.repo - if getattr(args, 'branch', None): - config_dict['branch'] = args.branch + # Repository/branch discovery with precedence: CLI -> Env -> Git -> Error + config_dict['repo'] = _discover_repository( + getattr(args, 'repo', None), + github_repository=config_dict.get('github_repository', ''), + github_event_path=config_dict.get('github_event_path', '') + ) + config_dict['branch'] = _discover_branch( + getattr(args, 'branch', None), + github_head_ref=config_dict.get('github_head_ref', ''), + github_ref_name=config_dict.get('github_ref_name', ''), + github_event_path=config_dict.get('github_event_path', '') + ) + config_dict['commit_hash'] = _discover_commit_hash() + + # Default branch detection: CLI flag -> Environment -> Git detection + is_default_branch = False + if getattr(args, 'default_branch', False): + # Explicitly set via CLI + is_default_branch = True + logger = logging.getLogger(__name__) + logger.debug("Default branch explicitly set via --default-branch CLI flag") + elif config_dict.get('socket_default_branch', False): + # Set via SOCKET_DEFAULT_BRANCH environment variable + is_default_branch = True + logger = logging.getLogger(__name__) + logger.debug("Default branch set via SOCKET_DEFAULT_BRANCH environment variable") + else: + # Auto-detect by comparing current branch with repository default + current_branch = config_dict.get('branch', '') + workspace_path = config_dict.get('workspace', '') + is_default_branch = _discover_is_default_branch(current_branch, workspace_path) + + config_dict['is_default_branch'] = is_default_branch + + # Handle additional full scan parameters + if getattr(args, 'commit_message', None): + config_dict['commit_message'] = args.commit_message + + # Pull request discovery: CLI -> Environment -> Default to 0 + if getattr(args, 'pull_request', None) is not None: + config_dict['pull_request'] = args.pull_request + else: + config_dict['pull_request'] = _discover_pull_request( + github_pr_number=config_dict.get('github_pr_number', ''), + github_event_path=config_dict.get('github_event_path', ''), + github_head_ref=config_dict.get('github_head_ref', '') + ) + + # Committer discovery: CLI -> Git -> Environment + if getattr(args, 'committers', None): + # Parse comma-separated committers from CLI + committers = [c.strip() for c in args.committers.split(',') if c.strip()] + config_dict['committers'] = committers + else: + # Auto-discover committers from git + config_dict['committers'] = _discover_committers( + github_actor=config_dict.get('github_actor', '') + ) + if getattr(args, 'enable_s3_upload', False): config_dict['enable_s3_upload'] = True else: @@ -480,6 +1197,19 @@ def create_config_from_args(args) -> Config: config_dict[param_name] = arg_value except Exception as e: logging.getLogger(__name__).warning("Warning: Error processing dynamic CLI args: %s", e) + + # Auto-enable scanning when values are provided (removes need for separate enabled flags) + # If container_images has a value, enable image scanning + if config_dict.get('container_images'): + config_dict['trivy_image_enabled'] = True + config_dict['container_image_scanning_enabled'] = True + logging.getLogger(__name__).debug("Auto-enabled Trivy image scanning because --images provided") + + # If dockerfiles has a value, enable Dockerfile scanning + if config_dict.get('dockerfiles'): + config_dict['trivy_dockerfile_enabled'] = True + config_dict['dockerfile_scanning_enabled'] = True + logging.getLogger(__name__).debug("Auto-enabled Trivy Dockerfile scanning because --dockerfiles provided") # Persist the chosen output filename into config so connectors can reference it try: @@ -544,7 +1274,7 @@ def create_config_from_args(args) -> Config: pass else: # Not a git repo: require CLI-provided repo and branch - if not config_dict.get('repository') or not config_dict.get('branch'): + if not config_dict.get('repo') or not config_dict.get('branch'): raise RuntimeError('Workspace is not a git repository; please provide --repo and --branch') except RuntimeError: # propagate to caller so CLI user sees the error @@ -594,3 +1324,510 @@ def _detect_git_changed_files(workspace_path: str, mode: str = 'staged', commit: return [] except Exception: return [] + + +def discover_all_files(workspace_path: str, respect_gitignore: bool = True) -> List[str]: + """Discover all files in a workspace, optionally respecting .gitignore patterns. + + Args: + workspace_path: Path to the workspace directory + respect_gitignore: Whether to respect .gitignore patterns (default: True) + + Returns: + List of relative file paths from the workspace root + """ + import fnmatch + import os + + workspace = Path(workspace_path) + if not workspace.exists() or not workspace.is_dir(): + return [] + + all_files = [] + gitignore_patterns = [] + + # Load .gitignore patterns if requested and file exists + if respect_gitignore: + gitignore_file = workspace / '.gitignore' + if gitignore_file.exists(): + try: + with open(gitignore_file, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + # Skip empty lines and comments + if not line or line.startswith('#'): + continue + gitignore_patterns.append(line) + except Exception: + # If we can't read .gitignore, continue without patterns + pass + + # Always ignore common patterns even if no .gitignore + default_ignore_patterns = [ + '.git', + '.git/**', + '__pycache__', + '__pycache__/**', + '*.pyc', + '.DS_Store', + '.venv', + '.venv/**', + 'venv', + 'venv/**', + 'node_modules', + 'node_modules/**', + '.tmp', + '.tmp/**', + ] + + all_patterns = gitignore_patterns + default_ignore_patterns + + def should_ignore(file_path: str) -> bool: + """Check if a file should be ignored based on patterns""" + # Convert to forward slashes for consistent pattern matching + normalized_path = file_path.replace(os.sep, '/') + + for pattern in all_patterns: + # Handle directory patterns (ending with /) + if pattern.endswith('/'): + dir_pattern = pattern[:-1] + if normalized_path == dir_pattern or normalized_path.startswith(dir_pattern + '/'): + return True + # Handle patterns with /** (recursive directory) + elif '/**' in pattern: + base_pattern = pattern.replace('/**', '') + if normalized_path.startswith(base_pattern + '/') or normalized_path == base_pattern: + return True + # Handle glob patterns + elif '*' in pattern or '?' in pattern: + if fnmatch.fnmatch(normalized_path, pattern): + return True + # Also check if any parent directory matches + parts = normalized_path.split('/') + for i in range(1, len(parts) + 1): + partial_path = '/'.join(parts[:i]) + if fnmatch.fnmatch(partial_path, pattern): + return True + # Handle exact matches + else: + if normalized_path == pattern or normalized_path.startswith(pattern + '/'): + return True + + return False + + # Walk the directory tree + try: + for root, dirs, files in os.walk(workspace): + # Get relative path from workspace + rel_root = os.path.relpath(root, workspace) + if rel_root == '.': + rel_root = '' + + # Filter out directories that should be ignored + dirs[:] = [d for d in dirs if not should_ignore(os.path.join(rel_root, d) if rel_root else d)] + + # Add files that shouldn't be ignored + for file in files: + rel_file_path = os.path.join(rel_root, file) if rel_root else file + if not should_ignore(rel_file_path): + all_files.append(rel_file_path) + + except Exception: + # If directory walking fails, return empty list + return [] + + # Sort for consistent ordering + all_files.sort() + return all_files + + +def _parse_github_event(github_event_path: str = '') -> Dict[str, str]: + """Parse GitHub event.json file for repo and branch information + + Args: + github_event_path: Path to GitHub event file from GITHUB_EVENT_PATH + + Returns: + Dict with 'repo' and 'branch' keys, empty strings if not found + """ + event_info = {'repo': '', 'branch': ''} + + # Only look for event file if we're in a GitHub environment + if not github_event_path: + return event_info + + # Use the event path from environment, fallback to 'event.json' + event_file = Path(github_event_path) + if not event_file.exists(): + return event_info + + try: + with open(event_file, 'r') as f: + event_data = json.load(f) + + # Extract repo from pull_request.head.repo.full_name or repository.full_name + if 'pull_request' in event_data and 'head' in event_data['pull_request']: + pr_head = event_data['pull_request']['head'] + if 'repo' in pr_head and 'full_name' in pr_head['repo']: + event_info['repo'] = pr_head['repo']['full_name'] + # Extract branch from pull_request.head.ref + if 'ref' in pr_head: + event_info['branch'] = pr_head['ref'] + elif 'repository' in event_data and 'full_name' in event_data['repository']: + event_info['repo'] = event_data['repository']['full_name'] + + except (json.JSONDecodeError, KeyError, Exception) as e: + logging.getLogger(__name__).debug("Failed to parse event.json: %s", e) + + return event_info + + +def _discover_repository(cli_repo: str | None, github_repository: str = '', github_event_path: str = '') -> str: + """Discover repository name with precedence: CLI -> SCM Env -> GitHub event.json -> Git -> Error + + Args: + cli_repo: Repository from CLI argument (highest precedence) + + Returns: + Repository name in 'owner/repo' format + + Raises: + SystemExit: If repository cannot be determined + """ + import subprocess + + logger = logging.getLogger(__name__) + + # 1. CLI Option (highest precedence) + if cli_repo: + logger.debug("Using repository from CLI: %s", cli_repo) + return cli_repo + + # 2. SCM Environment Variables (GitHub Actions, etc.) + if github_repository: + logger.debug("Using repository from GitHub environment: %s", github_repository) + return github_repository + + # 3. GitHub event.json file + event_info = _parse_github_event(github_event_path) + if event_info['repo']: + logger.debug("Using repository from event.json: %s", event_info['repo']) + return event_info['repo'] + + # 4. Git information + try: + url = subprocess.check_output( + ['git', 'config', '--get', 'remote.origin.url'], + text=True, + stderr=subprocess.DEVNULL + ).strip() + + if url.endswith('.git'): + url = url[:-4] + + if url.startswith('git@'): + # git@github.com:owner/repo + repo = url.split(':', 1)[1] + logger.debug("Using repository from git remote (SSH): %s", repo) + return repo + else: + # https://github.com/owner/repo + parts = url.rstrip('/').split('/') + if len(parts) >= 2: + repo = f"{parts[-2]}/{parts[-1]}" + logger.debug("Using repository from git remote (HTTPS): %s", repo) + return repo + except (subprocess.CalledProcessError, FileNotFoundError, Exception) as e: + logger.debug("Failed to discover repository from git: %s", e) + + # 5. Error if not found + logger.error("Could not determine repository name. Please provide --repo argument.") + raise SystemExit("Repository discovery failed. Use --repo owner/repo to specify manually.") + + +def _discover_branch(cli_branch: str | None, github_head_ref: str = '', github_ref_name: str = '', github_event_path: str = '') -> str: + """Discover branch name with precedence: CLI -> SCM Env -> GitHub event.json -> Git -> Error + + Args: + cli_branch: Branch from CLI argument (highest precedence) + + Returns: + Branch name + + Raises: + SystemExit: If branch cannot be determined + """ + import subprocess + + logger = logging.getLogger(__name__) + + # 1. CLI Option (highest precedence) + if cli_branch: + logger.debug("Using branch from CLI: %s", cli_branch) + return cli_branch + + # 2. SCM Environment Variables (GitHub Actions, etc.) + # For PRs, GITHUB_HEAD_REF contains the PR source branch + if github_head_ref: + logger.debug("Using branch from GITHUB_HEAD_REF: %s", github_head_ref) + return github_head_ref + + # For direct pushes, GITHUB_REF_NAME contains the branch + if github_ref_name: + logger.debug("Using branch from GITHUB_REF_NAME: %s", github_ref_name) + return github_ref_name + + # 3. GitHub event.json file + event_info = _parse_github_event(github_event_path) + if event_info['branch']: + logger.debug("Using branch from event.json: %s", event_info['branch']) + return event_info['branch'] + + # 4. Git information + try: + branch = subprocess.check_output( + ['git', 'rev-parse', '--abbrev-ref', 'HEAD'], + text=True, + stderr=subprocess.DEVNULL + ).strip() + + if branch and branch != 'HEAD': + logger.debug("Using branch from git: %s", branch) + return branch + except (subprocess.CalledProcessError, FileNotFoundError, Exception) as e: + logger.debug("Failed to discover branch from git: %s", e) + + # 5. Error if not found (if not a git repo and still unknown) + logger.error("Could not determine branch name. Please provide --branch argument.") + raise SystemExit("Branch discovery failed. Use --branch branch-name to specify manually.") + + +def _discover_commit_hash() -> str: + """Discover current commit hash from git + + Returns: + Commit hash (short form) or empty string if not available + """ + import subprocess + + logger = logging.getLogger(__name__) + + # 1. Environment Variable (GitHub Actions) + commit = os.getenv('GITHUB_SHA') + if commit: + # Return short form (first 7 characters) + short_commit = commit[:7] if len(commit) >= 7 else commit + logger.debug("Using commit hash from environment: %s", short_commit) + return short_commit + + # 2. Git information + try: + commit = subprocess.check_output( + ['git', 'rev-parse', '--short', 'HEAD'], + text=True, + stderr=subprocess.DEVNULL + ).strip() + + if commit: + logger.debug("Using commit hash from git: %s", commit) + return commit + except (subprocess.CalledProcessError, FileNotFoundError, Exception) as e: + logger.debug("Failed to discover commit hash from git: %s", e) + + # 3. Return empty string if not found (commit hash is optional) + logger.debug("Could not determine commit hash - this is optional") + return "" + + +def _discover_is_default_branch(current_branch: str, workspace_path: str = '') -> bool: + """Discover if the current branch is the default branch for the repository + + Args: + current_branch: The current branch name + workspace_path: The workspace directory path (defaults to current directory) + + Returns: + True if current branch is the default branch, False otherwise + """ + import subprocess + + logger = logging.getLogger(__name__) + + if not current_branch: + logger.debug("No current branch provided, cannot determine if it's default") + return False + + # 1. Try to get the default branch from git remote (most reliable) + try: + # Change to the workspace directory for git commands + cwd = workspace_path if workspace_path else None + + # Get the default branch from the remote origin + result = subprocess.check_output( + ['git', 'symbolic-ref', 'refs/remotes/origin/HEAD'], + text=True, + stderr=subprocess.DEVNULL, + cwd=cwd + ).strip() + + # Extract branch name from refs/remotes/origin/branch-name + if result and result.startswith('refs/remotes/origin/'): + default_branch = result.replace('refs/remotes/origin/', '') + is_default = current_branch == default_branch + logger.debug("Default branch from git remote: %s, current: %s, is_default: %s", + default_branch, current_branch, is_default) + return is_default + + except (subprocess.CalledProcessError, FileNotFoundError, Exception) as e: + logger.debug("Failed to get default branch from git remote: %s", e) + + # 2. Fallback: try to get default branch via git ls-remote + try: + # Get the default branch by checking what HEAD points to on the remote + result = subprocess.check_output( + ['git', 'ls-remote', '--symref', 'origin', 'HEAD'], + text=True, + stderr=subprocess.DEVNULL, + cwd=cwd + ).strip() + + # Parse the output: "ref: refs/heads/main\tHEAD" + for line in result.split('\n'): + if line.startswith('ref: refs/heads/'): + default_branch = line.split('ref: refs/heads/')[1].split('\t')[0] + is_default = current_branch == default_branch + logger.debug("Default branch from ls-remote: %s, current: %s, is_default: %s", + default_branch, current_branch, is_default) + return is_default + + except (subprocess.CalledProcessError, FileNotFoundError, Exception) as e: + logger.debug("Failed to get default branch from ls-remote: %s", e) + + # 3. Fallback: check common default branch names + common_defaults = ['main', 'master', 'develop', 'trunk'] + if current_branch in common_defaults: + logger.debug("Current branch '%s' matches common default branch pattern", current_branch) + return True + + # 4. Final fallback: not a default branch + logger.debug("Could not determine if '%s' is the default branch, assuming it's not", current_branch) + return False + + +def _discover_pull_request(github_pr_number: str = '', github_event_path: str = '', github_head_ref: str = '') -> int: + """Discover pull request number from environment + + Args: + github_pr_number: GITHUB_PR_NUMBER environment variable + github_event_path: GITHUB_EVENT_PATH environment variable + github_head_ref: GITHUB_HEAD_REF environment variable + + Returns: + Pull request number or 0 if not found/not a PR + """ + import subprocess + + logger = logging.getLogger(__name__) + + # 1. Environment Variable (GitHub Actions) + if github_pr_number: + try: + pr_num = int(github_pr_number) + logger.debug("Using PR number from GITHUB_PR_NUMBER: %s", pr_num) + return pr_num + except ValueError: + logger.debug("Invalid PR number in GITHUB_PR_NUMBER: %s", github_pr_number) + + # 2. GitHub event.json file (only if in GitHub environment) + if github_event_path: + event_file = Path(github_event_path) + if event_file.exists(): + try: + with open(event_file, 'r') as f: + event_data = json.load(f) + + # Extract PR number from pull_request.number + if 'pull_request' in event_data and 'number' in event_data['pull_request']: + pr_num = event_data['pull_request']['number'] + logger.debug("Using PR number from event.json: %s", pr_num) + return pr_num + except (json.JSONDecodeError, KeyError, Exception) as e: + logger.debug("Failed to parse PR number from event.json: %s", e) + + # 3. Check if we're in a GitHub PR context + if github_head_ref: + # We're in a PR context but don't have the number + logger.debug("In GitHub PR context but no PR number found, defaulting to 0") + return 0 + + # 4. Not a PR context + logger.debug("Not in a PR context, returning 0") + return 0 + + +def _discover_committers(github_actor: str = '') -> List[str]: + """Discover committer emails from git with GitHub user ID preference + + Args: + github_actor: GITHUB_ACTOR environment variable + + Returns: + List of committer email addresses, preferring GitHub user IDs when available + """ + import subprocess + + logger = logging.getLogger(__name__) + committers = [] + + # 1. Environment Variables (GitHub Actions) - prefer GitHub user ID + if github_actor: + # Use the GitHub username directly as the committer + committers.append(github_actor) + logger.debug("Using GitHub user ID from GITHUB_ACTOR: %s", github_actor) + return committers # Return early since we have the preferred GitHub user ID + + # 2. Git information - get the current commit author email + git_email = None + try: + git_email = subprocess.check_output( + ['git', 'log', '-1', '--pretty=format:%ae'], + text=True, + stderr=subprocess.DEVNULL + ).strip() + + if git_email: + logger.debug("Found git commit author email: %s", git_email) + except (subprocess.CalledProcessError, FileNotFoundError, Exception) as e: + logger.debug("Failed to discover committer email from git: %s", e) + + # 3. Git config user.email as fallback + if not git_email: + try: + git_email = subprocess.check_output( + ['git', 'config', 'user.email'], + text=True, + stderr=subprocess.DEVNULL + ).strip() + + if git_email: + logger.debug("Found git config user.email: %s", git_email) + except (subprocess.CalledProcessError, FileNotFoundError, Exception) as e: + logger.debug("Failed to discover committer email from git config: %s", e) + + # 4. Extract GitHub user ID from email if it's a GitHub noreply email + if git_email: + # Check if it's a GitHub noreply email pattern: username@users.noreply.github.com + if git_email.endswith('@users.noreply.github.com'): + github_username = git_email.split('@')[0] + committers.append(github_username) + logger.debug("Extracted GitHub user ID from noreply email: %s", github_username) + else: + # Use the configured email as-is + committers.append(git_email) + logger.debug("Using configured email: %s", git_email) + + # 5. Return empty list if no committers found + if not committers: + logger.debug("Could not determine committer information") + + return committers diff --git a/socket_basics/core/connector/manager.py b/socket_basics/core/connector/manager.py index 7ebd0b5..2d6039d 100644 --- a/socket_basics/core/connector/manager.py +++ b/socket_basics/core/connector/manager.py @@ -14,6 +14,7 @@ from .base import BaseConnector, ConnectorError, ConnectorConfigError from ..validator import SocketFactsValidator +from ..config import get_socket_basics_severities logger = logging.getLogger(__name__) @@ -33,8 +34,8 @@ def __init__(self, config, connectors_config_path: Optional[str] = None): self.loaded_connectors: Dict[str, BaseConnector] = {} # Determine allowed severities from environment or config; used by connectors try: - sev_env = os.getenv('SOCKET_BASICS_SEVERITIES') or os.getenv('INPUT_FINDING_SEVERITIES') - if sev_env is None: + sev_env = get_socket_basics_severities() + if not sev_env: self.allowed_severities = {"critical", "high"} else: self.allowed_severities = {s.strip().lower() for s in str(sev_env).split(',') if s.strip()} @@ -271,11 +272,22 @@ def run_all_scans(self) -> Dict[str, Any]: aggregated_components: Dict[str, Any] = {} # aggregated notifications keyed by title -> {'headers': ..., 'rows': [...]} notifications_by_title: Dict[str, Dict[str, Any]] = {} + # per-notifier notifications from connectors: {connector_name: {notifier_name: {title, headers, rows}}} + per_notifier_notifications: Dict[str, Dict[str, Any]] = {} for name, connector in connectors.items(): - logger.info(f"Running scan with connector: {name}") + logger.debug(f" Running scan with connector: {name} ({type(connector).__name__})") + logger.debug(f" Connector {name} is_enabled(): {connector.is_enabled()}") try: results = connector.scan() + logger.debug(f" Connector {name} scan completed, results type: {type(results)}") + if isinstance(results, dict): + components = results.get('components', []) + logger.debug(f" Connector {name} returned {len(components) if isinstance(components, list) else 'non-list'} components") + if components and isinstance(components, list): + logger.debug(f" First component from {name}: {components[0] if components else 'none'}") + else: + logger.debug(f" Connector {name} returned non-dict result: {results}") except Exception as e: logger.error(f"Error running connector {name}: {e}") if self.connectors_config.get('settings', {}).get('fail_fast', False): @@ -290,7 +302,8 @@ def run_all_scans(self) -> Dict[str, Any]: if isinstance(results, dict): for k, v in results.items(): if isinstance(v, dict) and isinstance(v.get('data'), list): - logger.debug("Connector %s raw wrapper '%s' contains %d inner items", name, k, len(v.get('data'))) + data_list = v.get('data', []) + logger.debug("Connector %s raw wrapper '%s' contains %d inner items", name, k, len(data_list)) except Exception: logger.debug('Failed to log raw wrapper details for connector %s', name) @@ -329,8 +342,20 @@ def run_all_scans(self) -> Dict[str, Any]: if not isinstance(comps, list): logger.warning("Connector %s returned invalid 'components' shape; expected list", name) comps = [] - if not isinstance(notifs, list): - logger.warning("Connector %s returned invalid 'notifications' shape; expected list", name) + + # Handle new per-notifier notifications format or legacy list format + if isinstance(notifs, dict): + # New format: {notifier_name: {title, headers, rows}, ...} + logger.debug("Connector %s returned new per-notifier notifications format", name) + # Store per-notifier format for notification manager to process + per_notifier_notifications[name] = notifs + notifs = [] # Don't process as legacy format + elif isinstance(notifs, list): + # Legacy format: [{title, headers, rows}, ...] + logger.debug("Connector %s returned legacy notifications format", name) + # Will be processed below + else: + logger.warning("Connector %s returned invalid 'notifications' shape; expected dict or list", name) notifs = [] # Merge components @@ -338,7 +363,14 @@ def run_all_scans(self) -> Dict[str, Any]: for c in comps: try: cid = c.get('id') or c.get('name') or str(id(c)) - aggregated_components[cid] = c + + # Set "direct": true by default for all connectors except socket_tier1 + # Connectors can override this in their results if needed + component = deepcopy(c) if c else {} + if name != 'socket_tier1' and 'direct' not in component: + component['direct'] = True + + aggregated_components[cid] = component except Exception: logger.debug('Skipping malformed component from connector %s', name) @@ -422,7 +454,33 @@ def run_all_scans(self) -> Dict[str, Any]: components_list = [] notifications_list = [] - return {'components': components_list, 'notifications': notifications_list} + # If we have per-notifier notifications, use those instead of legacy format + if per_notifier_notifications: + # Merge all connector per-notifier notifications + merged_per_notifier: Dict[str, Any] = {} + for connector_name, notifier_data in per_notifier_notifications.items(): + for notifier_name, notification_payload in notifier_data.items(): + if notifier_name not in merged_per_notifier: + # Initialize with the first connector's data + merged_per_notifier[notifier_name] = notification_payload + else: + # Merge lists of tables from multiple connectors + existing = merged_per_notifier[notifier_name] + if isinstance(existing, list) and isinstance(notification_payload, list): + # Both are lists of tables - extend the list + existing.extend(notification_payload) + elif isinstance(existing, dict) and isinstance(notification_payload, dict): + # Legacy single-table format - merge rows + if ('rows' in existing and 'rows' in notification_payload and + isinstance(existing['rows'], list) and isinstance(notification_payload['rows'], list)): + existing['rows'].extend(notification_payload['rows']) + else: + # Mixed formats or incompatible - log and skip + logger.warning(f"Cannot merge notifications for {notifier_name}: existing={type(existing)}, new={type(notification_payload)} from connector {connector_name}") + + return {'components': components_list, 'notifications': merged_per_notifier} + else: + return {'components': components_list, 'notifications': notifications_list} def get_connector_info(self) -> Dict[str, Any]: """Get information about available connectors diff --git a/socket_basics/core/connector/opengrep/__init__.py b/socket_basics/core/connector/opengrep/__init__.py index e9dd189..f97db76 100644 --- a/socket_basics/core/connector/opengrep/__init__.py +++ b/socket_basics/core/connector/opengrep/__init__.py @@ -19,6 +19,12 @@ from ..base import BaseConnector # Opengrep produces canonical components/notifications directly +# Import individual notifier modules +from . import github_pr, slack, ms_teams, ms_sentinel, sumologic, console, jira, webhook, json_notifier + +# Import shared formatters +from ...formatters import get_all_formatters + logger = logging.getLogger(__name__) @@ -186,44 +192,14 @@ def scan(self) -> Dict[str, Any]: wrapper = {'components': socket_facts.get('components', [])} try: comps_map: Dict[str, Dict[str, Any]] = {c.get('id') or c.get('name'): c for c in socket_facts.get('components', [])} - tables: List[Dict[str, Any]] = [] - groups: Dict[str, List[Dict[str, Any]]] = {} - # Group all alerts by their subType - for c in comps_map.values(): - comp_name = c.get('name') or c.get('id') or '' - for a in c.get('alerts', []): - st = a.get('subType') or a.get('subtype') or 'sast-generic' - groups.setdefault(st, []).append({'component': c, 'alert': a}) - - for subtype, items in groups.items(): - rows: List[List[str]] = [] - for it in items: - c = it['component'] - a = it['alert'] - props = a.get('props', {}) or {} - full_path = props.get('filePath', a.get('location', {}).get('path')) or '-' - try: - from pathlib import Path as _P - file_name = _P(full_path).name - except Exception: - file_name = full_path - # include severity as second column so notifiers (Jira) can display it - rows.append([ - props.get('ruleId', a.get('title', '')), - a.get('severity', ''), - file_name, - full_path, - f"{props.get('startLine','')}-{props.get('endLine','')}", - props.get('codeSnippet','') or '' - ]) - # Provide headers so downstream notifiers preserve column names - headers = ['Rule', 'Severity', 'File', 'Path', 'Lines', 'Code'] - tables.append({'title': subtype, 'headers': headers, 'rows': rows}) - - if tables: - wrapper['notifications'] = tables + + # Build notifications using new shared formatters + notifications_by_notifier = self.generate_notifications(wrapper.get('components', [])) + + if notifications_by_notifier: + wrapper['notifications'] = notifications_by_notifier elif notifications: - # adopt normalize_components notifications into canonical tables + # fallback to old format if needed wrapper['notifications'] = [{'title': 'results', 'rows': notifications}] except Exception: # if grouping fails, fall back to raw notifications @@ -295,9 +271,6 @@ def _convert_to_socket_facts(self, raw_results: Any) -> Dict[str, Any]: 'description': message, 'severity': sev_label, 'type': 'generic', - # Keep location focused on line information only; do NOT - # include the path here so callers rely on props['filePath'] - # for the file path (schema requirement). 'location': { 'start': start, 'end': end @@ -388,38 +361,18 @@ def _convert_to_socket_facts(self, raw_results: Any) -> Dict[str, Any]: comp_id = path if comp_id not in comps: - # initialize component with qualifiers; connectors may populate - # a component-level 'type' qualifier to mirror alert subType comps[comp_id] = { 'id': comp_id, - # Use generic top-level type (keep specific scanner type in qualifiers) 'type': 'generic', + 'subPath': detected_subtype, 'name': path, "internal": True, - 'alerts': [], - 'qualifiers': { - 'scanner': 'opengrep' - } + 'alerts': [] } comps[comp_id]['alerts'].append(alert) except Exception: logger.debug('Failed to convert single opengrep result to alert', exc_info=True) - - # Filter out components with no alerts (shouldn't happen) and return - for k, v in comps.items(): - # set component-level qualifier 'type' from the first alert subType if present - alerts = v.get('alerts') or [] - if not alerts: - continue - first_sub = alerts[0].get('subType') or alerts[0].get('subtype') - if first_sub: - try: - v.setdefault('qualifiers', {}) - v['qualifiers']['type'] = first_sub - except Exception: - pass - out[k] = v - return out + return comps # If it's already a mapping of component_id -> component, filter empty alerts if all(isinstance(v, dict) for v in raw_results.values()): @@ -431,13 +384,15 @@ def _convert_to_socket_facts(self, raw_results: Any) -> Dict[str, Any]: return {} + + def notification_rows(self, processed_results: Dict[str, Any]) -> List[List[str]]: - # Build canonical list-of-table dicts grouped by filename or rule - tables: List[Dict[str, Any]] = [] + # Legacy method - returns flat list of rows (not grouped tables) + # This is kept for backward compatibility + rows: List[List[str]] = [] if not processed_results: - return tables - groups: Dict[str, List[List[str]]] = {} - headers = ['Rule', 'Severity', 'File', 'Path', 'Lines', 'Code'] + return rows + for comp in processed_results.values(): for a in comp.get('alerts', []): props = a.get('props', {}) or {} @@ -445,7 +400,6 @@ def notification_rows(self, processed_results: Dict[str, Any]) -> List[List[str] full_path = props.get('filePath', a.get('location', {}).get('path')) or '-' try: from pathlib import Path - file_name = Path(full_path).name except Exception: file_name = full_path @@ -458,11 +412,51 @@ def notification_rows(self, processed_results: Dict[str, Any]) -> List[List[str] f"{props.get('startLine','')}-{props.get('endLine','')}", props.get('codeSnippet','') or '' ] - group_key = props.get('ruleId') or file_name - groups.setdefault(group_key, []).append(row) - - for title, rows in groups.items(): - tables.append({'title': title, 'headers': headers, 'rows': rows}) - - return tables + rows.append(row) + + return rows + + def generate_notifications(self, components: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, str]]]: + """Generate pre-formatted notifications for all notifier types. + + Args: + components: List of component dictionaries with alerts + + Returns: + Dictionary mapping notifier keys to lists of notification dictionaries + """ + if not components: + return {} + + # Create component mapping for compatibility with connector-specific formatters + comps_map = {c.get('id') or c.get('name') or str(id(c)): c for c in components} + + # Get all alerts grouped by subtype, with severity filtering + groups: Dict[str, List[Dict[str, Any]]] = {} + for c in comps_map.values(): + for a in c.get('alerts', []): + # Filter by severity - only include alerts that match allowed severities + alert_severity = (a.get('severity') or '').strip().lower() + if alert_severity and hasattr(self, 'allowed_severities') and alert_severity not in self.allowed_severities: + continue # Skip this alert - severity not enabled + + st = a.get('subType') or a.get('subtype') or 'sast-generic' + groups.setdefault(st, []).append({'component': c, 'alert': a}) + + if not groups: + return {} + + # Build notifications for each notifier type using OpenGrep-specific modules + notifications_by_notifier = {} + notifications_by_notifier['github_pr'] = github_pr.format_notifications(groups) + notifications_by_notifier['slack'] = slack.format_notifications(groups) + notifications_by_notifier['msteams'] = ms_teams.format_notifications(groups) + notifications_by_notifier['ms_sentinel'] = ms_sentinel.format_notifications(groups) + notifications_by_notifier['sumologic'] = sumologic.format_notifications(groups) + notifications_by_notifier['json'] = json_notifier.format_notifications(groups) + notifications_by_notifier['console'] = console.format_notifications(groups) + notifications_by_notifier['jira'] = jira.format_notifications(groups) + notifications_by_notifier['webhook'] = webhook.format_notifications(groups) + + return notifications_by_notifier diff --git a/socket_basics/core/connector/opengrep/console.py b/socket_basics/core/connector/opengrep/console.py new file mode 100644 index 0000000..00b96a0 --- /dev/null +++ b/socket_basics/core/connector/opengrep/console.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +""" +Console notifier for OpenGrep results. +Formats results for human-readable console output with truncated content. +""" + +from pathlib import Path +from typing import Dict, Any, List + + +def format_notifications(groups: Dict[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]: + """Format for console output - return multiple tables grouped by subtype.""" + tables = [] + + # Map subtypes to friendly display names + subtype_names = { + 'sast-python': 'SAST Python', + 'sast-javascript': 'SAST JavaScript', + 'sast-golang': 'SAST Go', + 'sast-java': 'SAST Java', + 'sast-php': 'SAST PHP', + 'sast-ruby': 'SAST Ruby', + 'sast-csharp': 'SAST C#', + 'sast-dotnet': 'SAST .NET', + 'sast-c': 'SAST C', + 'sast-cpp': 'SAST C++', + 'sast-kotlin': 'SAST Kotlin', + 'sast-scala': 'SAST Scala', + 'sast-swift': 'SAST Swift', + 'sast-rust': 'SAST Rust', + 'sast-elixir': 'SAST Elixir', + 'sast-generic': 'SAST Generic' + } + + for subtype, items in groups.items(): + if not items: # Skip empty groups + continue + + rows = [] + for item in items: + c = item['component'] + a = item['alert'] + props = a.get('props', {}) or {} + full_path = props.get('filePath', a.get('location', {}).get('path')) or '-' + + try: + file_name = Path(full_path).name + except Exception: + file_name = full_path + + # Truncate for console readability + code_snippet = props.get('codeSnippet', '') or '' + if len(code_snippet) > 80: + code_snippet = code_snippet[:77] + '...' + + rows.append([ + props.get('ruleId', a.get('title', '')), + a.get('severity', ''), + file_name, + full_path, + f"{props.get('startLine','')}-{props.get('endLine','')}", + code_snippet + ]) + + # Create a separate table for each subtype/language group + from tabulate import tabulate + + display_name = subtype_names.get(subtype, subtype.upper()) + headers = ['Rule', 'Severity', 'File', 'Path', 'Lines', 'Code'] + table_content = tabulate(rows, headers=headers, tablefmt='grid') if rows else f"No {display_name} issues found." + + tables.append({ + 'title': display_name, + 'content': table_content + }) + + # Return list of tables - one per language group + return tables \ No newline at end of file diff --git a/socket_basics/core/connector/opengrep/github_pr.py b/socket_basics/core/connector/opengrep/github_pr.py new file mode 100644 index 0000000..2bb0e72 --- /dev/null +++ b/socket_basics/core/connector/opengrep/github_pr.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 +""" +GitHub PR notifier for OpenGrep results. +Formats results with markdown for better GitHub display. +""" + +from pathlib import Path +from typing import Dict, Any, List +import logging +import yaml + +logger = logging.getLogger(__name__) + + +def _get_github_pr_result_limit() -> int: + """Get the result limit for GitHub PR notifications.""" + try: + notifications_yaml = Path(__file__).parent.parent.parent / 'notifications.yaml' + with open(notifications_yaml, 'r') as f: + config = yaml.safe_load(f) + return config.get('settings', {}).get('result_limits', {}).get('github_pr', 100) + except Exception as e: + logger.warning(f"Could not load GitHub PR result limit from notifications.yaml: {e}, using default 100") + return 100 + + +def format_notifications(groups: Dict[str, List[Dict[str, Any]]], config=None) -> List[Dict[str, Any]]: + """Format for GitHub PR comments - detailed with markdown formatting.""" + tables = [] + + # Map subtypes to friendly display names + subtype_names = { + 'sast-python': 'SAST Python', + 'sast-javascript': 'SAST JavaScript', + 'sast-golang': 'SAST Go', + 'sast-java': 'SAST Java', + 'sast-php': 'SAST PHP', + 'sast-ruby': 'SAST Ruby', + 'sast-csharp': 'SAST C#', + 'sast-dotnet': 'SAST .NET', + 'sast-c': 'SAST C', + 'sast-cpp': 'SAST C++', + 'sast-kotlin': 'SAST Kotlin', + 'sast-scala': 'SAST Scala', + 'sast-swift': 'SAST Swift', + 'sast-rust': 'SAST Rust', + } + + severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3} + + for subtype, items in groups.items(): + rows = [] + for item in items: + c = item['component'] + a = item['alert'] + props = a.get('props', {}) or {} + full_path = props.get('filePath', a.get('location', {}).get('path')) or '-' + + try: + file_name = Path(full_path).name + except Exception: + file_name = full_path + + # Format code snippets with
 tags and 
for line breaks + code_snippet = props.get('codeSnippet', '') or '' + if code_snippet: + # Use
 tags for better code formatting as requested
+                code_formatted = code_snippet.replace('\n', '
') + if len(code_formatted) > 200: + code_formatted = code_formatted[:200] + '...' + code_snippet = f"
{code_formatted}
" + else: + code_snippet = '-' + + severity = a.get('severity', '').lower() + rows.append(( + severity_order.get(severity, 4), + [ + f"**{props.get('ruleId', a.get('title', ''))}**", + f"*{a.get('severity', '')}*", + f"`{file_name}`", + f"`{full_path}`", + f"Lines {props.get('startLine','')}-{props.get('endLine','')}", + code_snippet + ] + )) + + # Sort by severity and extract rows + rows.sort(key=lambda x: x[0]) + rows = [row[1] for row in rows] + + # Apply truncation + # result_limit = _get_github_pr_result_limit() + total_results = len(rows) + was_truncated = False + # + # if total_results > result_limit: + # logger.info(f"Truncating GitHub PR OpenGrep results from {total_results} to {result_limit} (prioritized by severity)") + # rows = rows[:result_limit] + # was_truncated = True + + # Create markdown table for this subtype + display_name = subtype_names.get(subtype, subtype.upper()) + if not rows: + content = f"No {display_name} issues found." + else: + headers = ['Rule', 'Severity', 'File', 'Path', 'Lines', 'Code'] + header_row = '| ' + ' | '.join(headers) + ' |' + separator_row = '| ' + ' | '.join(['---'] * len(headers)) + ' |' + content_rows = [] + for row in rows: + content_rows.append('| ' + ' | '.join(str(cell) for cell in row) + ' |') + + content = '\n'.join([header_row, separator_row] + content_rows) + + # Add truncation notice if needed + # if was_truncated: + # content += f"\n\n⚠️ **Results truncated to {result_limit} highest severity findings** (total: {total_results}). See full scan URL for complete results." + + # Build title with repo/branch/commit info from config + title_parts = ["Socket Security Results"] + if config: + if config.repo: + title_parts.append(config.repo) + if config.branch: + title_parts.append(config.branch) + if config.commit_hash: + title_parts.append(config.commit_hash) + + title = " - ".join(title_parts) + + # Count total findings for summary + total_findings = total_results if not was_truncated else total_results + + # Add summary section with scanner findings + summary_content = f"""## Summary + +| Scanner | Findings | +|---------|----------| +| {display_name} | {total_findings} | + +## Details + +{content}""" + + # Wrap content with HTML comment markers for section updates + wrapped_content = f""" +# {title} + +{summary_content} +""" + + tables.append({ + 'title': title, + 'content': wrapped_content + }) + + return tables \ No newline at end of file diff --git a/socket_basics/core/connector/opengrep/jira.py b/socket_basics/core/connector/opengrep/jira.py new file mode 100644 index 0000000..fcd2669 --- /dev/null +++ b/socket_basics/core/connector/opengrep/jira.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python3 +""" +Jira notifier for OpenGrep results. +Formats results for Jira tickets with priority mapping and detailed descriptions. +""" + +from pathlib import Path +from typing import Dict, Any, List +import logging + +logger = logging.getLogger(__name__) + + +def _get_jira_result_limit() -> int: + """Get the Jira result limit from notifications config, with fallback to default.""" + try: + import yaml + + # Try to load notifications.yaml to get the limit + base_dir = Path(__file__).parent.parent.parent + notifications_path = base_dir / "notifications.yaml" + + if notifications_path.exists(): + with open(notifications_path, 'r') as f: + config = yaml.safe_load(f) + result_limits = config.get('settings', {}).get('result_limits', {}) + return result_limits.get('jira', result_limits.get('default', 30)) + except Exception as e: + logger.debug(f"Could not load Jira result limit from config: {e}") + + # Fallback to conservative default + return 30 + + +def format_notifications(groups: Dict[str, List[Dict[str, Any]]], config=None) -> List[Dict[str, Any]]: + """Format for Jira tickets - using panels instead of tables for better control.""" + results = [] + + # Map subtypes to friendly display names + subtype_names = { + 'sast-python': 'SAST Python', + 'sast-javascript': 'SAST JavaScript', + 'sast-golang': 'SAST Go', + 'sast-java': 'SAST Java', + 'sast-php': 'SAST PHP', + 'sast-ruby': 'SAST Ruby', + 'sast-csharp': 'SAST C#', + 'sast-dotnet': 'SAST .NET', + 'sast-c': 'SAST C', + 'sast-cpp': 'SAST C++', + 'sast-kotlin': 'SAST Kotlin', + 'sast-scala': 'SAST Scala', + 'sast-swift': 'SAST Swift', + 'sast-rust': 'SAST Rust', + } + + # Define severity ranking for sorting + severity_rank = { + 'critical': 0, + 'high': 1, + 'medium': 2, + 'low': 3 + } + + for subtype, items in groups.items(): + display_name = subtype_names.get(subtype, subtype.upper()) + + if not items: + content = { + "type": "doc", + "version": 1, + "content": [ + { + "type": "paragraph", + "content": [{"type": "text", "text": f"No {display_name} issues found."}] + } + ] + } + else: + # Sort items by severity (Critical -> High -> Medium -> Low) + sorted_items = sorted( + items, + key=lambda x: severity_rank.get( + x['alert'].get('severity', '').lower(), + 999 # Unknown severities go to the end + ) + ) + + # Get Jira-specific result limit and truncate if needed + max_items = _get_jira_result_limit() + truncated_count = 0 + if len(sorted_items) > max_items: + truncated_count = len(sorted_items) - max_items + sorted_items = sorted_items[:max_items] + logger.info(f"Truncated {display_name} results from {len(items)} to {max_items} for Jira") + + panels = [] + + for item in sorted_items: + c = item['component'] + a = item['alert'] + props = a.get('props', {}) or {} + full_path = props.get('filePath', a.get('location', {}).get('path')) or '-' + + try: + file_name = Path(full_path).name + except Exception: + file_name = full_path + + # Map severity to Jira priority + severity = a.get('severity', '').lower() + jira_priority = { + 'critical': 'Highest', + 'high': 'High', + 'medium': 'Medium', + 'low': 'Low' + }.get(severity, 'Medium') + + rule_id = props.get('ruleId', a.get('title', '')) + description = a.get('description', '') + + # Determine panel color based on priority + panel_type = { + 'Highest': 'error', + 'High': 'warning', + 'Medium': 'note', + 'Low': 'info' + }.get(jira_priority, 'note') + + # Build panel content + panel_content = [ + { + "type": "heading", + "attrs": {"level": 3}, + "content": [{"type": "text", "text": f"🔍 {rule_id}", "marks": [{"type": "strong"}]}] + }, + { + "type": "paragraph", + "content": [ + {"type": "text", "text": "File: ", "marks": [{"type": "strong"}]}, + {"type": "text", "text": full_path, "marks": [{"type": "code"}]} + ] + }, + { + "type": "paragraph", + "content": [ + {"type": "text", "text": "Description: ", "marks": [{"type": "strong"}]}, + {"type": "text", "text": description} + ] + } + ] + + # Add code snippet if available + code_snippet = props.get('codeSnippet', '') or '' + if code_snippet: + # Determine language from subtype + language_map = { + 'sast-python': 'python', + 'sast-javascript': 'javascript', + 'sast-golang': 'go', + 'sast-java': 'java', + 'sast-php': 'php', + 'sast-ruby': 'ruby', + 'sast-csharp': 'c#', + 'sast-dotnet': 'c#', + 'sast-c': 'c', + 'sast-cpp': 'c++', + 'sast-kotlin': 'scala', + 'sast-scala': 'scala', + 'sast-swift': 'swift', + 'sast-rust': 'javascript', + } + language = language_map.get(subtype, 'javascript') + + start_line = props.get('startLine', '') + end_line = props.get('endLine', '') + + panel_content.extend([ + { + "type": "paragraph", + "content": [ + {"type": "text", "text": f"Code (Lines {start_line}-{end_line}):", "marks": [{"type": "strong"}]} + ] + }, + { + "type": "codeBlock", + "attrs": {"language": language}, + "content": [{"type": "text", "text": code_snippet}] + } + ]) + + # Create the panel + panels.append({ + "type": "panel", + "attrs": {"panelType": panel_type}, + "content": panel_content + }) + + # Add a rule/divider between issues + panels.append({ + "type": "rule" + }) + + # Remove the last rule + if panels and panels[-1]["type"] == "rule": + panels.pop() + + # Add truncation notice if results were truncated + if truncated_count > 0: + truncation_panel = { + "type": "panel", + "attrs": {"panelType": "info"}, + "content": [ + { + "type": "paragraph", + "content": [ + {"type": "text", "text": "⚠️ ", "marks": [{"type": "strong"}]}, + {"type": "text", "text": f"Showing top {max_items} results (by severity). "}, + {"type": "text", "text": f"{truncated_count} additional results truncated. "}, + {"type": "text", "text": "View full results at the scan URL below."} + ] + } + ] + } + panels.append(truncation_panel) + + content = { + "type": "doc", + "version": 1, + "content": panels + } + + results.append({ + 'title': display_name, + 'content': content + }) + + return results \ No newline at end of file diff --git a/socket_basics/core/connector/opengrep/json_notifier.py b/socket_basics/core/connector/opengrep/json_notifier.py new file mode 100644 index 0000000..d900a2c --- /dev/null +++ b/socket_basics/core/connector/opengrep/json_notifier.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +""" +JSON notifier for OpenGrep results. +Formats results with complete structured data for programmatic consumption. +""" + +from pathlib import Path +from typing import Dict, Any, List + + +def format_notifications(groups: Dict[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]: + """Format for JSON output - return multiple structured datasets grouped by subtype.""" + import json + tables = [] + + # Map subtypes to friendly display names + subtype_names = { + 'sast-python': 'SAST Python', + 'sast-javascript': 'SAST JavaScript', + 'sast-golang': 'SAST Go', + 'sast-java': 'SAST Java', + 'sast-php': 'SAST PHP', + 'sast-ruby': 'SAST Ruby', + 'sast-csharp': 'SAST C#', + 'sast-dotnet': 'SAST .NET', + 'sast-c': 'SAST C', + 'sast-cpp': 'SAST C++', + 'sast-kotlin': 'SAST Kotlin', + 'sast-scala': 'SAST Scala', + 'sast-swift': 'SAST Swift', + 'sast-rust': 'SAST Rust', + 'sast-elixir': 'SAST Elixir', + 'sast-generic': 'SAST Generic' + } + + for subtype, items in groups.items(): + if not items: # Skip empty groups + continue + + structured_data = [] + for item in items: + c = item['component'] + a = item['alert'] + props = a.get('props', {}) or {} + full_path = props.get('filePath', a.get('location', {}).get('path')) or '-' + + try: + file_name = Path(full_path).name + except Exception: + file_name = full_path + + structured_data.append({ + 'rule': props.get('ruleId', a.get('title', '')), + 'severity': a.get('severity', ''), + 'file_name': file_name, + 'file_path': full_path, + 'lines': f"{props.get('startLine','')}-{props.get('endLine','')}", + 'code_snippet': props.get('codeSnippet', '') or '', + 'subtype': subtype, + 'description': a.get('description', ''), + 'confidence': props.get('confidence', ''), + 'fingerprint': props.get('fingerprint', '') + }) + + # Create JSON content for this subtype + display_name = subtype_names.get(subtype, subtype.upper()) + content = json.dumps({ + 'results': structured_data, + 'metadata': { + 'subtype': subtype, + 'display_name': display_name, + 'total_issues': len(structured_data) + } + }, indent=2) + + tables.append({ + 'title': display_name, + 'content': content + }) + + # Return list of tables - one per language group + return tables \ No newline at end of file diff --git a/socket_basics/core/connector/opengrep/ms_sentinel.py b/socket_basics/core/connector/opengrep/ms_sentinel.py new file mode 100644 index 0000000..6608b5c --- /dev/null +++ b/socket_basics/core/connector/opengrep/ms_sentinel.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +""" +Microsoft Sentinel notifier for OpenGrep results. +Formats results structured for SIEM ingestion. +""" + +from typing import Dict, Any, List +import logging +import yaml +from pathlib import Path + +logger = logging.getLogger(__name__) + + +def _get_ms_sentinel_result_limit() -> int: + """Get the result limit for MS Sentinel notifications.""" + try: + notifications_yaml = Path(__file__).parent.parent.parent / 'notifications.yaml' + with open(notifications_yaml, 'r') as f: + config = yaml.safe_load(f) + return config.get('settings', {}).get('result_limits', {}).get('ms_sentinel', 500) + except Exception as e: + logger.warning(f"Could not load MS Sentinel result limit from notifications.yaml: {e}, using default 500") + return 500 + + +def format_notifications(groups: Dict[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]: + """Format for Microsoft Sentinel - return multiple structured datasets grouped by subtype.""" + tables = [] + + # Map subtypes to friendly display names + subtype_names = { + 'sast-python': 'SAST Python', + 'sast-javascript': 'SAST JavaScript', + 'sast-golang': 'SAST Go', + 'sast-java': 'SAST Java', + 'sast-php': 'SAST PHP', + 'sast-ruby': 'SAST Ruby', + 'sast-csharp': 'SAST C#', + 'sast-dotnet': 'SAST .NET', + 'sast-c': 'SAST C', + 'sast-cpp': 'SAST C++', + 'sast-kotlin': 'SAST Kotlin', + 'sast-scala': 'SAST Scala', + 'sast-swift': 'SAST Swift', + 'sast-rust': 'SAST Rust', + 'sast-elixir': 'SAST Elixir', + 'sast-generic': 'SAST Generic' + } + + severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3} + + for subtype, items in groups.items(): + if not items: # Skip empty groups + continue + + rows = [] + for item in items: + c = item['component'] + a = item['alert'] + props = a.get('props', {}) or {} + + severity = a.get('severity', '').lower() + # More structured format for SIEM + rows.append(( + severity_order.get(severity, 4), + [ + props.get('ruleId', a.get('title', '')), + a.get('severity', ''), + props.get('filePath', ''), + f"{props.get('startLine','')}-{props.get('endLine','')}", + subtype, + a.get('description', ''), + props.get('confidence', ''), + props.get('fingerprint', '') + ] + )) + + # Sort by severity and extract rows + rows.sort(key=lambda x: x[0]) + rows = [row[1] for row in rows] + + # Apply truncation + result_limit = _get_ms_sentinel_result_limit() + total_results = len(rows) + was_truncated = False + + if total_results > result_limit: + logger.info(f"Truncating MS Sentinel OpenGrep results from {total_results} to {result_limit} (prioritized by severity)") + rows = rows[:result_limit] + was_truncated = True + + # Create a separate table for each subtype/language group + display_name = subtype_names.get(subtype, subtype.upper()) + headers = ['RuleId', 'Severity', 'FilePath', 'LineRange', 'SubType', 'Description', 'Confidence', 'Fingerprint'] + header_row = ' | '.join(headers) + separator_row = ' | '.join(['---'] * len(headers)) + content_rows = [] + for row in rows: + content_rows.append(' | '.join(str(cell) for cell in row)) + + content = '\n'.join([header_row, separator_row] + content_rows) if rows else f"No {display_name} issues found." + + # Add truncation notice if needed + if was_truncated: + content += f"\n\nResults truncated to {result_limit} highest severity findings (total: {total_results}). See full scan URL for complete results." + + tables.append({ + 'title': display_name, + 'content': content + }) + + # Return list of tables - one per language group + return tables \ No newline at end of file diff --git a/socket_basics/core/connector/opengrep/ms_teams.py b/socket_basics/core/connector/opengrep/ms_teams.py new file mode 100644 index 0000000..3af3db4 --- /dev/null +++ b/socket_basics/core/connector/opengrep/ms_teams.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +""" +Microsoft Teams notifier for OpenGrep results. +Formats results in clean tabular format suitable for Teams. +""" + +from pathlib import Path +from typing import Dict, Any, List +import logging +import yaml + +logger = logging.getLogger(__name__) + + +def _get_ms_teams_result_limit() -> int: + """Get the result limit for MS Teams notifications.""" + try: + notifications_yaml = Path(__file__).parent.parent.parent / 'notifications.yaml' + with open(notifications_yaml, 'r') as f: + config = yaml.safe_load(f) + return config.get('settings', {}).get('result_limits', {}).get('ms_teams', 50) + except Exception as e: + logger.warning(f"Could not load MS Teams result limit from notifications.yaml: {e}, using default 50") + return 50 + + +def format_notifications(groups: Dict[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]: + """Format for Microsoft Teams - return multiple tables grouped by subtype.""" + tables = [] + + # Map subtypes to friendly display names + subtype_names = { + 'sast-python': 'SAST Python', + 'sast-javascript': 'SAST JavaScript', + 'sast-golang': 'SAST Go', + 'sast-java': 'SAST Java', + 'sast-php': 'SAST PHP', + 'sast-ruby': 'SAST Ruby', + 'sast-csharp': 'SAST C#', + 'sast-dotnet': 'SAST .NET', + 'sast-c': 'SAST C', + 'sast-cpp': 'SAST C++', + 'sast-kotlin': 'SAST Kotlin', + 'sast-scala': 'SAST Scala', + 'sast-swift': 'SAST Swift', + 'sast-rust': 'SAST Rust', + 'sast-elixir': 'SAST Elixir', + 'sast-generic': 'SAST Generic' + } + + severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3} + + for subtype, items in groups.items(): + if not items: # Skip empty groups + continue + + rows = [] + for item in items: + c = item['component'] + a = item['alert'] + props = a.get('props', {}) or {} + full_path = props.get('filePath', a.get('location', {}).get('path')) or '-' + + try: + file_name = Path(full_path).name + except Exception: + file_name = full_path + + severity = a.get('severity', '').lower() + rows.append(( + severity_order.get(severity, 4), + [ + props.get('ruleId', a.get('title', '')), + a.get('severity', ''), + file_name, + full_path, + f"{props.get('startLine','')}-{props.get('endLine','')}", + (props.get('codeSnippet', '') or '')[:150] # Truncate for Teams + ] + )) + + # Sort by severity and extract rows + rows.sort(key=lambda x: x[0]) + rows = [row[1] for row in rows] + + # Apply truncation + result_limit = _get_ms_teams_result_limit() + total_results = len(rows) + was_truncated = False + + if total_results > result_limit: + logger.info(f"Truncating MS Teams OpenGrep results from {total_results} to {result_limit} (prioritized by severity)") + rows = rows[:result_limit] + was_truncated = True + + # Create a separate table for each subtype/language group + display_name = subtype_names.get(subtype, subtype.upper()) + headers = ['Rule', 'Severity', 'File', 'Path', 'Lines', 'Code'] + header_row = ' | '.join(headers) + separator_row = ' | '.join(['---'] * len(headers)) + content_rows = [] + for row in rows: + content_rows.append(' | '.join(str(cell) for cell in row)) + + content = '\n'.join([header_row, separator_row] + content_rows) if rows else f"No {display_name} issues found." + + # Add truncation notice if needed + if was_truncated: + content += f"\n\n⚠️ Results truncated to {result_limit} highest severity findings (total: {total_results}). See full scan URL for complete results." + + tables.append({ + 'title': display_name, + 'content': content + }) + + # Return list of tables - one per language group + return tables \ No newline at end of file diff --git a/socket_basics/core/connector/opengrep/slack.py b/socket_basics/core/connector/opengrep/slack.py new file mode 100644 index 0000000..9409bfe --- /dev/null +++ b/socket_basics/core/connector/opengrep/slack.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +""" +Slack notifier for OpenGrep results. +Formats results concisely with emojis for visual appeal. +""" + +from pathlib import Path +from typing import Dict, Any, List +import logging +import yaml + +logger = logging.getLogger(__name__) + + +def _get_slack_result_limit() -> int: + """Get the result limit for Slack notifications.""" + try: + notifications_yaml = Path(__file__).parent.parent.parent / 'notifications.yaml' + with open(notifications_yaml, 'r') as f: + config = yaml.safe_load(f) + return config.get('settings', {}).get('result_limits', {}).get('slack', 50) + except Exception as e: + logger.warning(f"Could not load Slack result limit from notifications.yaml: {e}, using default 50") + return 50 + + +def format_notifications(groups: Dict[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]: + """Format for Slack notifications - return multiple tables grouped by subtype.""" + tables = [] + + # Map subtypes to friendly display names + subtype_names = { + 'sast-python': 'SAST Python', + 'sast-javascript': 'SAST JavaScript', + 'sast-golang': 'SAST Go', + 'sast-java': 'SAST Java', + 'sast-php': 'SAST PHP', + 'sast-ruby': 'SAST Ruby', + 'sast-csharp': 'SAST C#', + 'sast-dotnet': 'SAST .NET', + 'sast-c': 'SAST C', + 'sast-cpp': 'SAST C++', + 'sast-kotlin': 'SAST Kotlin', + 'sast-scala': 'SAST Scala', + 'sast-swift': 'SAST Swift', + 'sast-rust': 'SAST Rust', + 'sast-elixir': 'SAST Elixir', + 'sast-generic': 'SAST Generic' + } + + severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3} + + for subtype, items in groups.items(): + if not items: # Skip empty groups + continue + + rows = [] + for item in items: + c = item['component'] + a = item['alert'] + props = a.get('props', {}) or {} + full_path = props.get('filePath', a.get('location', {}).get('path')) or '-' + + try: + file_name = Path(full_path).name + except Exception: + file_name = full_path + + # Add severity emojis for Slack + severity = a.get('severity', '').lower() + severity_emoji = { + 'critical': '🔴', + 'high': '🟠', + 'medium': '🟡', + 'low': '🟢' + }.get(severity, '⚪') + + rows.append(( + severity_order.get(severity, 4), + [ + props.get('ruleId', a.get('title', '')), + f"{severity_emoji} {a.get('severity', '')}", + file_name, + full_path, + f"{props.get('startLine','')}-{props.get('endLine','')}", + (props.get('codeSnippet', '') or '')[:100] + ('...' if len(props.get('codeSnippet', '') or '') > 100 else '') + ] + )) + + # Sort by severity and extract rows + rows.sort(key=lambda x: x[0]) + rows = [row[1] for row in rows] + + # Apply truncation + result_limit = _get_slack_result_limit() + total_results = len(rows) + was_truncated = False + + if total_results > result_limit: + logger.info(f"Truncating Slack OpenGrep results from {total_results} to {result_limit} (prioritized by severity)") + rows = rows[:result_limit] + was_truncated = True + + # Create a separate table for each subtype/language group + from tabulate import tabulate + + display_name = subtype_names.get(subtype, subtype.upper()) + headers = ['Rule', 'Severity', 'File', 'Path', 'Lines', 'Code'] + table_content = tabulate(rows, headers=headers, tablefmt='pipe') if rows else f"No {display_name} issues found." + + # Add truncation notice if needed + if was_truncated: + table_content += f"\n\n⚠️ *Results truncated to {result_limit} highest severity findings (total: {total_results}). See full scan URL for complete results.*" + + tables.append({ + 'title': display_name, + 'content': table_content + }) + + # Return list of tables - one per language group + return tables \ No newline at end of file diff --git a/socket_basics/core/connector/opengrep/sumologic.py b/socket_basics/core/connector/opengrep/sumologic.py new file mode 100644 index 0000000..2935acf --- /dev/null +++ b/socket_basics/core/connector/opengrep/sumologic.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +""" +SumoLogic notifier for OpenGrep results. +Formats results in structured logging format suitable for log parsing. +""" + +from typing import Dict, Any, List +import logging +import yaml +from pathlib import Path + +logger = logging.getLogger(__name__) + + +def _get_sumologic_result_limit() -> int: + """Get the result limit for SumoLogic notifications.""" + try: + notifications_yaml = Path(__file__).parent.parent.parent / 'notifications.yaml' + with open(notifications_yaml, 'r') as f: + config = yaml.safe_load(f) + return config.get('settings', {}).get('result_limits', {}).get('sumologic', 500) + except Exception as e: + logger.warning(f"Could not load SumoLogic result limit from notifications.yaml: {e}, using default 500") + return 500 + + +def format_notifications(groups: Dict[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]: + """Format for SumoLogic - return multiple structured log datasets grouped by subtype.""" + tables = [] + + # Map subtypes to friendly display names + subtype_names = { + 'sast-python': 'SAST Python', + 'sast-javascript': 'SAST JavaScript', + 'sast-golang': 'SAST Go', + 'sast-java': 'SAST Java', + 'sast-php': 'SAST PHP', + 'sast-ruby': 'SAST Ruby', + 'sast-csharp': 'SAST C#', + 'sast-dotnet': 'SAST .NET', + 'sast-c': 'SAST C', + 'sast-cpp': 'SAST C++', + 'sast-kotlin': 'SAST Kotlin', + 'sast-scala': 'SAST Scala', + 'sast-swift': 'SAST Swift', + 'sast-rust': 'SAST Rust', + 'sast-elixir': 'SAST Elixir', + 'sast-generic': 'SAST Generic' + } + + severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3} + + for subtype, items in groups.items(): + if not items: # Skip empty groups + continue + + rows = [] + for item in items: + c = item['component'] + a = item['alert'] + props = a.get('props', {}) or {} + + severity = a.get('severity', '').lower() + # Key-value format suitable for log parsing + rows.append(( + severity_order.get(severity, 4), + [ + f"rule={props.get('ruleId', a.get('title', ''))}", + f"severity={a.get('severity', '')}", + f"file={props.get('filePath', '')}", + f"lines={props.get('startLine','')}-{props.get('endLine','')}", + f"type={subtype}", + f"scanner=opengrep" + ] + )) + + # Sort by severity and extract rows + rows.sort(key=lambda x: x[0]) + rows = [row[1] for row in rows] + + # Apply truncation + result_limit = _get_sumologic_result_limit() + total_results = len(rows) + was_truncated = False + + if total_results > result_limit: + logger.info(f"Truncating SumoLogic OpenGrep results from {total_results} to {result_limit} (prioritized by severity)") + rows = rows[:result_limit] + was_truncated = True + + # Create a separate log dataset for each subtype/language group + display_name = subtype_names.get(subtype, subtype.upper()) + headers = ['Rule', 'Severity', 'File', 'Lines', 'Type', 'Scanner'] + header_row = ' | '.join(headers) + separator_row = ' | '.join(['---'] * len(headers)) + content_rows = [] + for row in rows: + content_rows.append(' | '.join(str(cell) for cell in row)) + + content = '\n'.join([header_row, separator_row] + content_rows) if rows else f"No {display_name} issues found." + + # Add truncation notice if needed + if was_truncated: + content += f"\n\nresults_truncated=true total_results={total_results} displayed_results={result_limit} note=\"Results truncated to {result_limit} highest severity findings. See full scan URL for complete results.\"" + + tables.append({ + 'title': display_name, + 'content': content + }) + + # Return list of tables - one per language group + return tables \ No newline at end of file diff --git a/socket_basics/core/connector/opengrep/webhook.py b/socket_basics/core/connector/opengrep/webhook.py new file mode 100644 index 0000000..8ba5d25 --- /dev/null +++ b/socket_basics/core/connector/opengrep/webhook.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 +""" +Webhook notifier for OpenGrep results. +Formats results for generic webhook consumption with flexible structured format. +""" + +from pathlib import Path +from typing import Dict, Any, List + + +def format_notifications(groups: Dict[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]: + """Format for generic webhook - return multiple flexible structured datasets grouped by subtype.""" + tables = [] + + # Map subtypes to friendly display names + subtype_names = { + 'sast-python': 'SAST Python', + 'sast-javascript': 'SAST JavaScript', + 'sast-golang': 'SAST Go', + 'sast-java': 'SAST Java', + 'sast-php': 'SAST PHP', + 'sast-ruby': 'SAST Ruby', + 'sast-csharp': 'SAST C#', + 'sast-dotnet': 'SAST .NET', + 'sast-c': 'SAST C', + 'sast-cpp': 'SAST C++', + 'sast-kotlin': 'SAST Kotlin', + 'sast-scala': 'SAST Scala', + 'sast-swift': 'SAST Swift', + 'sast-rust': 'SAST Rust', + 'sast-elixir': 'SAST Elixir', + 'sast-generic': 'SAST Generic' + } + + for subtype, items in groups.items(): + if not items: # Skip empty groups + continue + + rows = [] + for item in items: + c = item['component'] + a = item['alert'] + props = a.get('props', {}) or {} + full_path = props.get('filePath', a.get('location', {}).get('path')) or '-' + + try: + file_name = Path(full_path).name + except Exception: + file_name = full_path + + rows.append([ + props.get('ruleId', a.get('title', '')), + a.get('severity', ''), + file_name, + full_path, + f"{props.get('startLine','')}-{props.get('endLine','')}", + props.get('codeSnippet', '') or '', + subtype, + 'opengrep' + ]) + + # Create a separate dataset for each subtype/language group + display_name = subtype_names.get(subtype, subtype.upper()) + headers = ['Rule', 'Severity', 'File', 'Path', 'Lines', 'Code', 'SubType', 'Scanner'] + header_row = ' | '.join(headers) + separator_row = ' | '.join(['---'] * len(headers)) + content_rows = [] + for row in rows: + content_rows.append(' | '.join(str(cell) for cell in row)) + + content = '\n'.join([header_row, separator_row] + content_rows) if rows else f"No {display_name} issues found." + + tables.append({ + 'title': display_name, + 'content': content + }) + + # Return list of tables - one per language group + return tables \ No newline at end of file diff --git a/socket_basics/core/connector/socket_tier1/console.py b/socket_basics/core/connector/socket_tier1/console.py new file mode 100644 index 0000000..a6e8e78 --- /dev/null +++ b/socket_basics/core/connector/socket_tier1/console.py @@ -0,0 +1,68 @@ +"""Console notifier formatting for Socket Tier1 reachability analysis.""" + +from typing import Dict, Any, List + + +def _make_purl(comp: Dict[str, Any]) -> str: + """Construct a best-effort purl from a component entry.""" + typ = comp.get('type') + namespace = comp.get('namespace') + name = comp.get('name') or comp.get('id') + version = comp.get('version') + if not name: + return '' + # Basic purl: pkg:type/namespace/name@version (percent-encode @ in namespace if needed) + if namespace: + # If namespace already contains @ (scoped npm), percent-encode + ns = namespace.replace('@', '%40') + p = f"pkg:{typ}/{ns}/{name}" + else: + p = f"pkg:{typ}/{name}" + if version: + p = p + f"@{version}" + return p + + +def format_notifications(components_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Format for console output - human readable with full trace information.""" + rows = [] + for comp in components_list: + comp_name = str(comp.get('name') or comp.get('id') or '-') + + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + purl = str(props.get('purl') or _make_purl(comp) or comp_name) + cve_id = str(props.get('ghsaId') or props.get('cveId') or a.get('title') or '') + severity = str(a.get('severity') or props.get('severity') or '') + reachability = str(props.get('reachability') or '') + + # Truncate for console readability + short_purl = purl[:40] + '...' if len(purl) > 40 else purl + + # Show actual trace data for reachable vulnerabilities + trace_data = str(props.get('trace') or '') + if reachability == 'reachable' and trace_data.strip(): + # Format full trace with proper line breaks + trace_lines = trace_data.strip().split('\n') + formatted_trace = '\n'.join(trace_lines) + else: + formatted_trace = '' + + rows.append([ + cve_id, + severity, + reachability, + short_purl, + formatted_trace + ]) + + # Format as a table using tabulate + from tabulate import tabulate + + headers = ['CVE/GHSA', 'Severity', 'Reachability', 'Package', 'Trace'] + table_content = tabulate(rows, headers=headers, tablefmt='grid') if rows else "No Socket Tier1 vulnerabilities found." + + return [{ + 'title': 'Socket Tier1 Reachability Analysis', + 'content': table_content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/socket_tier1/github_pr.py b/socket_basics/core/connector/socket_tier1/github_pr.py new file mode 100644 index 0000000..769919c --- /dev/null +++ b/socket_basics/core/connector/socket_tier1/github_pr.py @@ -0,0 +1,114 @@ +"""GitHub PR notifier formatting for Socket Tier1 reachability analysis.""" + +from typing import Dict, Any, List + + +def _make_purl(comp: Dict[str, Any]) -> str: + """Construct a best-effort purl from a component entry.""" + typ = comp.get('type') + namespace = comp.get('namespace') + name = comp.get('name') or comp.get('id') + version = comp.get('version') + if not name: + return '' + # Basic purl: pkg:type/namespace/name@version (percent-encode @ in namespace if needed) + if namespace: + # If namespace already contains @ (scoped npm), percent-encode + ns = namespace.replace('@', '%40') + p = f"pkg:{typ}/{ns}/{name}" + else: + p = f"pkg:{typ}/{name}" + if version: + p = p + f"@{version}" + return p + + +def format_notifications(components_list: List[Dict[str, Any]], config=None) -> List[Dict[str, Any]]: + """Format for GitHub PR comments - detailed with markdown formatting.""" + rows = [] + for comp in components_list: + comp_name = str(comp.get('name') or comp.get('id') or '-') + + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + purl = str(props.get('purl') or _make_purl(comp) or comp_name) + cve_id = str(props.get('ghsaId') or props.get('cveId') or a.get('title') or '') + severity = str(a.get('severity') or props.get('severity') or '') + reachability = str(props.get('reachability') or '').lower() + + # Format with markdown for better GitHub display + trace_raw = props.get('trace') or '' + trace_str = '' + if isinstance(trace_raw, list): + trace_str = '\n'.join(str(x) for x in trace_raw) + elif isinstance(trace_raw, str): + trace_str = trace_raw + + if reachability == 'reachable' and trace_str: + # Convert newlines to
tags for GitHub markdown tables + trace_formatted = trace_str.replace('\n', '
') + # Use
 tags for better code formatting as requested
+                if len(trace_formatted) > 300:
+                    trace_formatted = trace_formatted[:300] + '...'
+                trace_formatted = f"
{trace_formatted}
" + else: + trace_formatted = f"`{purl}`" + + rows.append([ + f"**{cve_id}**", + f"*{severity}*", + f"**{reachability.upper()}**" if reachability == 'reachable' else reachability, + f"`{purl}`", + trace_formatted + ]) + + # Create markdown table + if not rows: + content = "No reachability issues found." + else: + headers = ['CVE/GHSA', 'Severity', 'Reachability', 'PURL', 'Trace'] + header_row = '| ' + ' | '.join(headers) + ' |' + separator_row = '| ' + ' | '.join(['---'] * len(headers)) + ' |' + content_rows = [] + for row in rows: + content_rows.append('| ' + ' | '.join(str(cell) for cell in row) + ' |') + + content = '\n'.join([header_row, separator_row] + content_rows) + + # Build title with repo/branch/commit info from config + title_parts = ["Socket Security Tier 1 Results"] + if config: + if config.repo: + title_parts.append(config.repo) + if config.branch: + title_parts.append(config.branch) + if config.commit_hash: + title_parts.append(config.commit_hash) + + title = " - ".join(title_parts) + + # Count total findings for summary + total_findings = len(rows) + + # Add summary section with scanner findings + summary_content = f"""## Summary + +| Scanner | Findings | +|---------|----------| +| Socket Tier1 | {total_findings} | + +## Details + +{content}""" + + # Wrap content with HTML comment markers for section updates + wrapped_content = f""" +# {title} + +{summary_content} +""" + + return [{ + 'title': title, + 'content': wrapped_content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/socket_tier1/jira.py b/socket_basics/core/connector/socket_tier1/jira.py new file mode 100644 index 0000000..5fc5972 --- /dev/null +++ b/socket_basics/core/connector/socket_tier1/jira.py @@ -0,0 +1,228 @@ +"""Jira notifier formatting for Socket Tier1 reachability analysis.""" + +from typing import Dict, Any, List + + +def _detect_language_from_purl(purl: str) -> str: + """Detect programming language from PURL (Package URL).""" + # Extract the package type from PURL format: pkg:type/namespace/name@version + if not purl or not purl.startswith('pkg:'): + return 'JavaScript' # Default fallback for most npm packages + + try: + # Split by : to get the type part + parts = purl.split(':', 2) # ['pkg', 'type', 'rest'] + if len(parts) >= 2: + package_type = parts[1].split('/')[0] # Get type before any / + + # Map package types to Jira-supported languages + type_to_language = { + 'npm': 'JavaScript', + 'pypi': 'Python', + 'maven': 'Java', + 'gradle': 'Java', + 'nuget': 'C#', + 'gem': 'Ruby', + 'go': 'Go', + 'cargo': 'Rust', # Not in Jira supported list, will fallback + 'composer': 'PHP', + 'swift': 'Swift', + 'cocoapods': 'Swift', + 'hackage': 'Haskell', + 'hex': 'Erlang', + 'cran': 'R', + 'cpan': 'Perl', + } + + detected_lang = type_to_language.get(package_type.lower(), 'JavaScript') + + # Ensure the language is in Jira's supported list + jira_supported = [ + 'ActionScript', 'Ada', 'AppleScript', 'bash', 'C', 'C#', 'C++', + 'CSS', 'Erlang', 'Go', 'Groovy', 'Haskell', 'HTML', 'JavaScript', + 'JSON', 'Lua', 'Nyan', 'Objc', 'Perl', 'PHP', 'Python', 'R', + 'Ruby', 'Scala', 'SQL', 'Swift', 'VisualBasic', 'XML', 'YAML' + ] + + if detected_lang in jira_supported: + return detected_lang + else: + return 'JavaScript' # Safe fallback + + except Exception: + pass + + return 'JavaScript' # Default fallback + + +def _make_purl(comp: Dict[str, Any]) -> str: + """Construct a best-effort purl from a component entry.""" + typ = comp.get('type') + namespace = comp.get('namespace') + name = comp.get('name') or comp.get('id') + version = comp.get('version') + if not name: + return '' + # Basic purl: pkg:type/namespace/name@version (percent-encode @ in namespace if needed) + if namespace: + # If namespace already contains @ (scoped npm), percent-encode + ns = namespace.replace('@', '%40') + p = f"pkg:{typ}/{ns}/{name}" + else: + p = f"pkg:{typ}/{name}" + if version: + p = p + f"@{version}" + return p + + +def format_notifications(components_list: List[Dict[str, Any]], config=None) -> List[Dict[str, Any]]: + """Format for Jira tickets - using panels for better layout control.""" + + # Define severity ranking for sorting + severity_rank = { + 'critical': 0, + 'high': 1, + 'medium': 2, + 'low': 3 + } + + # Collect all alerts with component info + all_alerts = [] + for comp in components_list: + comp_name = str(comp.get('name') or comp.get('id') or '-') + + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + purl = str(props.get('purl') or _make_purl(comp) or comp_name) + cve_id = str(props.get('ghsaId') or props.get('cveId') or a.get('title') or '') + severity = str(a.get('severity') or props.get('severity') or '').lower() + reachability = str(props.get('reachability') or '').lower() + + # Format trace data + trace_raw = props.get('trace') or '' + trace_str = '' + if isinstance(trace_raw, list): + trace_str = '\n'.join(str(x) for x in trace_raw) + elif isinstance(trace_raw, str): + trace_str = trace_raw + + all_alerts.append({ + 'cve_id': cve_id, + 'severity': severity, + 'reachability': reachability, + 'purl': purl, + 'trace_str': trace_str + }) + + if not all_alerts: + content = { + "type": "doc", + "version": 1, + "content": [ + { + "type": "paragraph", + "content": [{"type": "text", "text": "No reachability issues found."}] + } + ] + } + else: + # Sort alerts by severity (Critical -> High -> Medium -> Low) + sorted_alerts = sorted( + all_alerts, + key=lambda x: severity_rank.get(x['severity'], 999) + ) + + panels = [] + + for alert in sorted_alerts: + # Map severity to Jira priority + jira_priority = { + 'critical': 'Highest', + 'high': 'High', + 'medium': 'Medium', + 'low': 'Low' + }.get(alert['severity'], 'Medium') + + # Determine panel color based on priority + panel_type = { + 'Highest': 'error', + 'High': 'warning', + 'Medium': 'note', + 'Low': 'info' + }.get(jira_priority, 'note') + + # Build panel content + panel_content = [ + { + "type": "heading", + "attrs": {"level": 3}, + "content": [{"type": "text", "text": f"🔒 {alert['cve_id']}", "marks": [{"type": "strong"}]}] + }, + { + "type": "paragraph", + "content": [ + {"type": "text", "text": "Severity: ", "marks": [{"type": "strong"}]}, + {"type": "text", "text": jira_priority} + ] + }, + { + "type": "paragraph", + "content": [ + {"type": "text", "text": "Reachability: ", "marks": [{"type": "strong"}]}, + {"type": "text", "text": alert['reachability'].upper() if alert['reachability'] == 'reachable' else alert['reachability']} + ] + }, + { + "type": "paragraph", + "content": [ + {"type": "text", "text": "Package: ", "marks": [{"type": "strong"}]}, + {"type": "text", "text": alert['purl'], "marks": [{"type": "code"}]} + ] + } + ] + + # Add trace if reachable and trace exists + if alert['reachability'] == 'reachable' and alert['trace_str']: + # Dynamically determine language from PURL + language = _detect_language_from_purl(alert['purl']) + + panel_content.extend([ + { + "type": "paragraph", + "content": [ + {"type": "text", "text": "Call Trace:", "marks": [{"type": "strong"}]} + ] + }, + { + "type": "codeBlock", + "attrs": {"language": language.lower()}, + "content": [{"type": "text", "text": alert['trace_str']}] + } + ]) + + # Create the panel + panels.append({ + "type": "panel", + "attrs": {"panelType": panel_type}, + "content": panel_content + }) + + # Add a rule/divider between issues + panels.append({ + "type": "rule" + }) + + # Remove the last rule + if panels and panels[-1]["type"] == "rule": + panels.pop() + + content = { + "type": "doc", + "version": 1, + "content": panels + } + + return [{ + 'title': 'Socket Tier1 Reachability Analysis', + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/socket_tier1/json_notifier.py b/socket_basics/core/connector/socket_tier1/json_notifier.py new file mode 100644 index 0000000..277c381 --- /dev/null +++ b/socket_basics/core/connector/socket_tier1/json_notifier.py @@ -0,0 +1,80 @@ +"""JSON notifier formatting for Socket Tier1 reachability analysis.""" + +from typing import Dict, Any, List + + +def _make_purl(comp: Dict[str, Any]) -> str: + """Construct a best-effort purl from a component entry.""" + typ = comp.get('type') + namespace = comp.get('namespace') + name = comp.get('name') or comp.get('id') + version = comp.get('version') + if not name: + return '' + # Basic purl: pkg:type/namespace/name@version (percent-encode @ in namespace if needed) + if namespace: + # If namespace already contains @ (scoped npm), percent-encode + ns = namespace.replace('@', '%40') + p = f"pkg:{typ}/{ns}/{name}" + else: + p = f"pkg:{typ}/{name}" + if version: + p = p + f"@{version}" + return p + + +def format_notifications(components_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Format for JSON output - complete structured data.""" + rows = [] + for comp in components_list: + comp_name = str(comp.get('name') or comp.get('id') or '-') + + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + purl = str(props.get('purl') or _make_purl(comp) or comp_name) + cve_id = str(props.get('ghsaId') or props.get('cveId') or a.get('title') or '') + severity = str(a.get('severity') or props.get('severity') or '') + reachability = str(props.get('reachability') or '') + + # Include trace data for JSON + trace_raw = props.get('trace') or '' + trace_str = '' + if isinstance(trace_raw, list): + trace_str = '\n'.join(str(x) for x in trace_raw) + elif isinstance(trace_raw, str): + trace_str = trace_raw + + rows.append([ + cve_id, + severity, + reachability, + purl, + str(props.get('ghsaId', '')), + str(props.get('cveId', '')), + comp_name, + str(comp.get('version', '')), + trace_str, + str(props.get('undeterminableReachability', False)) + ]) + + # Format as JSON data structure + if not rows: + content = "No Socket Tier1 vulnerabilities found." + else: + import json + # For JSON, create a structured array of objects + structured_data = [] + headers = ['ID', 'Severity', 'Reachability', 'PURL', 'GHSA', 'CVE', 'Component', 'Version', 'Trace', 'Undeterminable'] + for row in rows: + obj = {} + for i, header in enumerate(headers): + if i < len(row): + obj[header] = row[i] + structured_data.append(obj) + + content = json.dumps(structured_data, indent=2) + + return [{ + 'title': 'Socket Tier1 Reachability Analysis', + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/socket_tier1/ms_sentinel.py b/socket_basics/core/connector/socket_tier1/ms_sentinel.py new file mode 100644 index 0000000..325d775 --- /dev/null +++ b/socket_basics/core/connector/socket_tier1/ms_sentinel.py @@ -0,0 +1,69 @@ +"""Microsoft Sentinel notifier formatting for Socket Tier1 reachability analysis.""" + +from typing import Dict, Any, List + + +def _make_purl(comp: Dict[str, Any]) -> str: + """Construct a best-effort purl from a component entry.""" + typ = comp.get('type') + namespace = comp.get('namespace') + name = comp.get('name') or comp.get('id') + version = comp.get('version') + if not name: + return '' + # Basic purl: pkg:type/namespace/name@version (percent-encode @ in namespace if needed) + if namespace: + # If namespace already contains @ (scoped npm), percent-encode + ns = namespace.replace('@', '%40') + p = f"pkg:{typ}/{ns}/{name}" + else: + p = f"pkg:{typ}/{name}" + if version: + p = p + f"@{version}" + return p + + +def format_notifications(components_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Format for Microsoft Sentinel - structured for SIEM ingestion.""" + rows = [] + for comp in components_list: + comp_name = str(comp.get('name') or comp.get('id') or '-') + + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + purl = str(props.get('purl') or _make_purl(comp) or comp_name) + cve_id = str(props.get('ghsaId') or props.get('cveId') or a.get('title') or '') + ghsa_id = str(props.get('ghsaId', '')) + cve_only = str(props.get('cveId', '')) + severity = str(a.get('severity') or props.get('severity') or '') + reachability = str(props.get('reachability') or '') + + # More structured format for SIEM + rows.append([ + cve_id, + severity, + reachability, + purl, + ghsa_id, + cve_only, + comp_name, + str(props.get('undeterminableReachability', False)) + ]) + + # Format as structured data for MS Sentinel + if not rows: + content = "No Socket Tier1 vulnerabilities found." + else: + headers = ['ID', 'Severity', 'Reachability', 'PURL', 'GHSA', 'CVE', 'Component', 'Undeterminable'] + header_row = ' | '.join(headers) + separator_row = ' | '.join(['---'] * len(headers)) + content_rows = [] + for row in rows: + content_rows.append(' | '.join(str(cell) for cell in row)) + + content = '\n'.join([header_row, separator_row] + content_rows) + + return [{ + 'title': 'Socket Tier1 Vulnerability Findings', + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/socket_tier1/ms_teams.py b/socket_basics/core/connector/socket_tier1/ms_teams.py new file mode 100644 index 0000000..a112e30 --- /dev/null +++ b/socket_basics/core/connector/socket_tier1/ms_teams.py @@ -0,0 +1,64 @@ +"""Microsoft Teams notifier formatting for Socket Tier1 reachability analysis.""" + +from typing import Dict, Any, List + + +def _make_purl(comp: Dict[str, Any]) -> str: + """Construct a best-effort purl from a component entry.""" + typ = comp.get('type') + namespace = comp.get('namespace') + name = comp.get('name') or comp.get('id') + version = comp.get('version') + if not name: + return '' + # Basic purl: pkg:type/namespace/name@version (percent-encode @ in namespace if needed) + if namespace: + # If namespace already contains @ (scoped npm), percent-encode + ns = namespace.replace('@', '%40') + p = f"pkg:{typ}/{ns}/{name}" + else: + p = f"pkg:{typ}/{name}" + if version: + p = p + f"@{version}" + return p + + +def format_notifications(components_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Format for Microsoft Teams - clean tabular format.""" + rows = [] + for comp in components_list: + comp_name = str(comp.get('name') or comp.get('id') or '-') + + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + purl = str(props.get('purl') or _make_purl(comp) or comp_name) + cve_id = str(props.get('ghsaId') or props.get('cveId') or a.get('title') or '') + severity = str(a.get('severity') or props.get('severity') or '') + reachability = str(props.get('reachability') or '').lower() + + # Clean format for Teams + rows.append([ + cve_id, + severity, + reachability.upper(), + purl[:60] + '...' if len(purl) > 60 else purl, # Truncate for Teams + 'Reachable' if reachability == 'reachable' else 'Not Reachable' + ]) + + # Format as markdown table for MS Teams + if not rows: + content = "No Socket Tier1 vulnerabilities found." + else: + headers = ['CVE/GHSA', 'Severity', 'Reachability', 'Package', 'Status'] + header_row = ' | '.join(headers) + separator_row = ' | '.join(['---'] * len(headers)) + content_rows = [] + for row in rows: + content_rows.append(' | '.join(str(cell) for cell in row)) + + content = '\n'.join([header_row, separator_row] + content_rows) + + return [{ + 'title': 'Socket Tier1 Reachability Analysis', + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/socket_tier1/scanner.py b/socket_basics/core/connector/socket_tier1/scanner.py index 47ce8b5..b7e764e 100644 --- a/socket_basics/core/connector/socket_tier1/scanner.py +++ b/socket_basics/core/connector/socket_tier1/scanner.py @@ -11,6 +11,20 @@ from ..base import BaseConnector, ConnectorExecutionError +# Import individual notifier modules +from . import github_pr +from . import slack +from . import ms_teams +from . import ms_sentinel +from . import sumologic +from . import json_notifier +from . import console +from . import jira +from . import webhook + +# Import shared formatters +from ...formatters import get_all_formatters + logger = logging.getLogger(__name__) @@ -28,11 +42,7 @@ class SocketTier1Scanner(BaseConnector): FACTS_FILENAME = ".socket.facts.json" def is_enabled(self) -> bool: - # Allow explicit enable via env var - val = os.environ.get('SOCKET_TIER_1_ENABLED', '').lower() - if val in ('1', 'true', 'yes', 'on'): - return True - # Also allow enabling via config object if present + # Check config object first (which already handles environment variables) # connectors.yaml defines the parameter as 'socket_tier_1_enabled' try: if hasattr(self.config, 'get'): @@ -49,15 +59,22 @@ def is_enabled(self) -> bool: def _get_auth_env(self) -> Dict[str, str]: env = {} - # Prefer explicit environment variables, then fall back to values populated - # into the Config object by CLI parsing (create_config_from_args) - org = os.environ.get('SOCKET_ORG') or (self.config.get('socket_org') if hasattr(self.config, 'get') else getattr(self.config, 'socket_org', '')) + # Use Config object exclusively - it already handles environment variables + # and Socket API auto-discovery with proper precedence + + # Get organization from config (which already handles SOCKET_ORG, SOCKET_ORG_SLUG, etc.) + org = ( + self.config.get('socket_org') if hasattr(self.config, 'get') + else getattr(self.config, 'socket_org', None) + ) + + # Get API key from config (which already handles SOCKET_SECURITY_API_KEY, SOCKET_SECURITY_API_TOKEN, etc.) api_key = ( - os.environ.get('SOCKET_SECURITY_API_KEY') - or os.environ.get('SOCKET_SECURITY_API_TOKEN') - or (self.config.get('socket_api_key') if hasattr(self.config, 'get') else getattr(self.config, 'socket_api_key', '')) - or (self.config.get('socket_api_token') if hasattr(self.config, 'get') else getattr(self.config, 'socket_api_token', '')) + self.config.get('socket_api_key') if hasattr(self.config, 'get') + else getattr(self.config, 'socket_api_key', None) ) + logger.debug(f" Socket Tier1 auth - org: '{org}', api_key_set: {bool(api_key)}") + if org: env['SOCKET_ORG'] = org if api_key: @@ -65,8 +82,11 @@ def _get_auth_env(self) -> Dict[str, str]: return env def _parse_additional_params(self) -> List[str]: - raw = os.environ.get('SOCKET_ADDITIONAL_PARAMS', '') - raw = raw or (self.config.get('socket_additional_params') if hasattr(self.config, 'get') else getattr(self.config, 'socket_additional_params', '')) + # Use config object exclusively - it already handles SOCKET_ADDITIONAL_PARAMS env var + raw = ( + self.config.get('socket_additional_params') if hasattr(self.config, 'get') + else getattr(self.config, 'socket_additional_params', '') + ) if not raw: return [] # Allow comma-separated or regular shell splitting @@ -186,7 +206,8 @@ def _determine_reachability(self, vuln: Dict[str, Any], comp: Dict[str, Any]) -> for entry in comp_reach: if not target_id: continue - if entry.get('ghsa_id') and str(entry.get('ghsa_id')).lower() == str(target_id).lower(): + entry_id = entry.get('ghsa_id') + if entry_id and str(entry_id).lower() == str(target_id).lower(): matched = entry break @@ -201,6 +222,9 @@ def _determine_reachability(self, vuln: Dict[str, Any], comp: Dict[str, Any]) -> out['type'] = 'reachable' elif t == 'unreachable' and out['type'] != 'reachable': out['type'] = 'unreachable' + elif t == 'missing_support' and out['type'] not in ('reachable', 'unreachable'): + # missing_support means analysis couldn't determine reachability, treat as unknown + out['type'] = 'unknown' # For reachable entries, build a structured trace from 'matches' if t == 'reachable': @@ -246,7 +270,6 @@ def _determine_reachability(self, vuln: Dict[str, Any], comp: Dict[str, Any]) -> else: out['trace'].append(f" -> {comp_name}") - # Do not include pattern lines in the trace output; only include # the formatted match lines and the final '-> component@version' line. @@ -254,6 +277,11 @@ def _determine_reachability(self, vuln: Dict[str, Any], comp: Dict[str, Any]) -> if out['type'] != 'reachable': out['trace'] = [] + # If no matched reachability entry was found, this vulnerability is not applicable to this component version + if matched is None: + out['type'] = 'not_applicable' + return out + # If patterns exist but no matched reachability and not undeterminable, leave as unknown return out @@ -368,26 +396,29 @@ def _format_match_groups(self, matches_or_details: Any) -> str: except Exception: return '' + + def _convert_to_socket_facts(self, raw_results: Any) -> Dict[str, Any]: - """Convert Socket CLI .socket.facts.json into a Socket facts wrapper with alerts + """Convert Socket CLI .socket.facts.json into a Socket facts wrapper with notifications - - Keeps components list as-is (adds 'alerts' list per component) - - For each vulnerability in a component, emits an alert with CVE/GHSA, severity, reachability, purl, and trace + - Keeps components list as-is (NO MODIFICATIONS to components from .socket.facts.json) + - For each vulnerability in a component, generates alerts for notifications only + - Returns original components unchanged and puts processed notifications in notifications section """ - out: Dict[str, Any] = {"components": raw_results.get('components', [])} - comps = raw_results.get('components') if isinstance(raw_results, dict) else None - if not comps: - # unknown shape, return raw - return raw_results - - # Keep a map of generated alerts per component key so we can build - # notifications without injecting those alerts back into the component - generated_alerts_map: Dict[str, List[Dict[str, Any]]] = {} - - for c in comps: - comp = deepcopy(c) + # Return original components unchanged - no modifications allowed per requirement + original_components = raw_results.get('components', []) if isinstance(raw_results, dict) else [] + + if not original_components: + # No socket components found, return empty structure + return {"components": [], "notifications": {}} + + # Generate alerts for notifications only - do NOT modify original components + components_with_alerts_for_notifications = [] + + for c in original_components: alerts: List[Dict[str, Any]] = [] vulns = c.get('vulnerabilities') or [] + for v in vulns: vid = v.get('ghsaId') or v.get('cveId') or v.get('id') or v.get('vulnId') # severity heuristics (pull from multiple possible fields) @@ -424,6 +455,10 @@ def _convert_to_socket_facts(self, raw_results: Any) -> Dict[str, Any]: sev = 'unknown' reach = self._determine_reachability(v, c) + + # Skip vulnerabilities that are not applicable to this component version + if reach.get('type') == 'not_applicable': + continue purl = self._make_purl(c) @@ -459,165 +494,113 @@ def _convert_to_socket_facts(self, raw_results: Any) -> Dict[str, Any]: } alerts.append(alert) - orig_had_alerts = bool(c.get('alerts')) - if orig_had_alerts: - # preserve original alerts as provided by the Socket CLI - try: - comp['alerts'] = deepcopy(c.get('alerts') or []) - except Exception: - comp['alerts'] = c.get('alerts') or [] - else: - # Ensure we do not leave an empty 'alerts' list on components - if 'alerts' in comp: - try: - del comp['alerts'] - except Exception: - comp.pop('alerts', None) - - # Store generated alerts in a separate map keyed by component name/id - try: - comp_key = comp.get('name') or comp.get('id') or '-' - generated_alerts_map[comp_key] = alerts - except Exception: - # ignore mapping errors - pass - + # Create a copy of the component with alerts for notifications only + # This is only used for generating notifications, NOT returned in components + if alerts: + comp_with_alerts = deepcopy(c) + comp_with_alerts['alerts'] = alerts + components_with_alerts_for_notifications.append(comp_with_alerts) - # Build notifications mapping so the notification manager uses our exact columns + # Build notifications for each notifier type using components with alerts + notifications_by_notifier = {} try: - # Build 5-column rows with explicit headers - rows_5col: List[List[str]] = [] - comps = out.get('components') or [] - for comp in comps: - comp_name = comp.get('name') or comp.get('id') or '-' - purl = self._make_purl(comp) or comp_name - # Include any alerts that already exist on the component - for a in comp.get('alerts', []): - props = a.get('props', {}) or {} - # first column: CVE/GHSA id or alert title - id_col = props.get('ghsaId') or props.get('cveId') or a.get('title') or '' - sev = a.get('severity') or props.get('severity') or '' - reach = str(props.get('reachability') or '').lower() - # trace: only include for reachable - trace_raw = props.get('trace') or '' - trace_str = '' - if isinstance(trace_raw, list): - trace_str = '\n'.join(str(x) for x in trace_raw) - elif isinstance(trace_raw, str): - trace_str = trace_raw - - if reach != 'reachable': - # per requirement: only reachable items have traces - trace_str = '' - - rows_5col.append([ - str(id_col), - str(sev), - str(reach), - str(purl), - str(trace_str), - ]) - - # If component did not have original alerts, include generated ones - comp_key = comp.get('name') or comp.get('id') or '-' - gen_alerts = generated_alerts_map.get(comp_key, []) - if gen_alerts and not comp.get('alerts'): - for a in gen_alerts: - props = a.get('props', {}) or {} - id_col = props.get('ghsaId') or props.get('cveId') or a.get('title') or '' - sev = a.get('severity') or props.get('severity') or '' - reach = str(props.get('reachability') or '').lower() - trace_raw = props.get('trace') or '' - trace_str = '' - if isinstance(trace_raw, list): - trace_str = '\n'.join(str(x) for x in trace_raw) - elif isinstance(trace_raw, str): - trace_str = trace_raw - if reach != 'reachable': - trace_str = '' - purl = props.get('purl') or self._make_purl(comp) or comp_key - rows_5col.append([ - str(id_col), - str(sev), - str(reach), - str(purl), - str(trace_str), - ]) - - if rows_5col: - # Attach connector-provided notifications with explicit headers - # Include a generatedBy column so attribution travels with each row - headers = ['CVE/GHSA', 'severity', 'reachability', 'purl', 'trace', 'generatedBy'] - out['notifications'] = [ - { - 'title': 'Socket Tier 1 Reachability', - 'headers': headers, - 'rows': [r + ['socket-tier1'] for r in rows_5col], - } - ] + if components_with_alerts_for_notifications: + notifications_by_notifier = self.generate_notifications(components_with_alerts_for_notifications) except Exception: # best-effort: do not fail conversion if notifications building errors logger.exception('Failed to build notifications for socket_tier1') - return {'components': out.get('components', []), 'notifications': out.get('notifications', [])} + # Return ORIGINAL components unchanged and notifications separately + return { + 'components': original_components, # Original components with NO modifications + 'notifications': notifications_by_notifier + } + + - # Note: consolidated `notification_rows` implementation follows below. def notification_rows(self, processed_results: Dict[str, Any]) -> List[List[str]]: """Produce consolidated notification rows compatible with the central notifier. Return canonical rows in the shape used by other connectors and the `normalize_components` helper: [file/component, severity, message/title, location/details]. - This method accepts either the processed wrapper (with 'components') or - the full `facts` dict that contains a `socket_tier1` key. + For Socket Tier1, since components are returned unchanged (without alerts), + we need to reconstruct the alert information from the notifications. """ rows: List[List[str]] = [] - # Resolve components from multiple possible shapes - comps = [] - if isinstance(processed_results, dict): - if 'components' in processed_results and isinstance(processed_results.get('components'), list): - comps = processed_results.get('components', []) - elif 'socket_tier1' in processed_results and isinstance(processed_results['socket_tier1'], dict): - comps = processed_results['socket_tier1'].get('components', []) - elif 'socket_tier1' in processed_results and isinstance(processed_results['socket_tier1'], list): - comps = processed_results['socket_tier1'] - else: - comps = processed_results.get('components', []) - - for comp in comps: - comp_name = comp.get('name') or comp.get('id') or '-' - for a in comp.get('alerts', []): - props = a.get('props', {}) or {} - # File/component column: prefer purl, fall back to component name - purl = props.get('purl') or '' - file_col = purl or comp_name - - # Severity - sev = a.get('severity') or props.get('severity') or '' - - # Message/title: use GHSA/CVE id if present, else alert title - title = props.get('ghsaId') or props.get('cveId') or a.get('title') or a.get('message') or '' - - # Location/details: for reachable include formatted trace (multi-line), otherwise include purl or component - trace_raw = props.get('trace') or '' - trace_str = '' - if isinstance(trace_raw, list): - trace_str = '\n'.join(str(x) for x in trace_raw) - elif isinstance(trace_raw, str): - trace_str = trace_raw - - if str(props.get('reachability') or '').lower() == 'reachable': - # prepend patterns if present for context - patterns = props.get('reachabilityPatterns') or props.get('patterns') or [] - pat_str = '\n'.join(str(p) for p in patterns) if patterns else '' - loc = '' - if pat_str: - loc = pat_str + ('\n' + trace_str if trace_str else '') - else: - loc = trace_str or purl or comp_name - else: - # non-reachable: location should be purl (or component name) and no trace - loc = purl or comp_name - + + # For Socket Tier1, alerts are not in components but in notifications + # We need to build rows from the notification data + notifications = processed_results.get('notifications', {}) + + # Extract alert information from any notification format that has structured data + # Priority: use console notifications if available as they're most direct + console_notifications = notifications.get('console', []) + if console_notifications: + for notif in console_notifications: + # Console notifications should have the alert data we need + file_col = notif.get('component') or notif.get('file') or '-' + sev = notif.get('severity') or '' + title = notif.get('title') or notif.get('message') or '' + loc = notif.get('location') or notif.get('details') or '' rows.append([str(file_col), str(sev), str(title), str(loc)]) + else: + # Fallback: try to extract from any other notification format + for notifier_type, notifier_data in notifications.items(): + if isinstance(notifier_data, list): + for notif in notifier_data: + if isinstance(notif, dict): + file_col = notif.get('component') or notif.get('file') or notif.get('purl') or '-' + sev = notif.get('severity') or '' + title = notif.get('title') or notif.get('message') or notif.get('vulnerability') or '' + loc = notif.get('location') or notif.get('details') or notif.get('trace') or '' + rows.append([str(file_col), str(sev), str(title), str(loc)]) + break # Only use first available notifier data to avoid duplicates + return rows + + def generate_notifications(self, components: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, str]]]: + """Generate pre-formatted notifications for all notifier types. + + Args: + components: List of component dictionaries with alerts + + Returns: + Dictionary mapping notifier keys to lists of notification dictionaries + """ + if not components: + return {} + + # Filter components by severity before formatting + filtered_components = [] + for component in components: + filtered_alerts = [] + for alert in component.get('alerts', []): + # Filter by severity - only include alerts that match allowed severities + alert_severity = (alert.get('severity') or '').strip().lower() + if alert_severity and hasattr(self, 'allowed_severities') and alert_severity not in self.allowed_severities: + continue # Skip this alert - severity not enabled + filtered_alerts.append(alert) + + # Only include component if it has filtered alerts + if filtered_alerts: + filtered_component = component.copy() + filtered_component['alerts'] = filtered_alerts + filtered_components.append(filtered_component) + + if not filtered_components: + return {} + + # Build notifications for each notifier type using Socket Tier1-specific modules + notifications_by_notifier = {} + notifications_by_notifier['github_pr'] = github_pr.format_notifications(filtered_components) + notifications_by_notifier['slack'] = slack.format_notifications(filtered_components) + notifications_by_notifier['msteams'] = ms_teams.format_notifications(filtered_components) + notifications_by_notifier['ms_sentinel'] = ms_sentinel.format_notifications(filtered_components) + notifications_by_notifier['sumologic'] = sumologic.format_notifications(filtered_components) + notifications_by_notifier['json'] = json_notifier.format_notifications(filtered_components) + notifications_by_notifier['console'] = console.format_notifications(filtered_components) + notifications_by_notifier['jira'] = jira.format_notifications(filtered_components) + notifications_by_notifier['webhook'] = webhook.format_notifications(filtered_components) + + return notifications_by_notifier diff --git a/socket_basics/core/connector/socket_tier1/slack.py b/socket_basics/core/connector/socket_tier1/slack.py new file mode 100644 index 0000000..35ca933 --- /dev/null +++ b/socket_basics/core/connector/socket_tier1/slack.py @@ -0,0 +1,81 @@ +"""Slack notifier formatting for Socket Tier1 reachability analysis.""" + +from typing import Dict, Any, List + + +def _make_purl(comp: Dict[str, Any]) -> str: + """Construct a best-effort purl from a component entry.""" + typ = comp.get('type') + namespace = comp.get('namespace') + name = comp.get('name') or comp.get('id') + version = comp.get('version') + if not name: + return '' + # Basic purl: pkg:type/namespace/name@version (percent-encode @ in namespace if needed) + if namespace: + # If namespace already contains @ (scoped npm), percent-encode + ns = namespace.replace('@', '%40') + p = f"pkg:{typ}/{ns}/{name}" + else: + p = f"pkg:{typ}/{name}" + if version: + p = p + f"@{version}" + return p + + +def format_notifications(components_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Format for Slack notifications - concise with emojis.""" + rows = [] + for comp in components_list: + comp_name = str(comp.get('name') or comp.get('id') or '-') + + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + purl = str(props.get('purl') or _make_purl(comp) or comp_name) + cve_id = str(props.get('ghsaId') or props.get('cveId') or a.get('title') or '') + severity = str(a.get('severity') or props.get('severity') or '') + reachability = str(props.get('reachability') or '').lower() + + # Add severity emojis and reachability emojis for Slack + severity_lower = severity.lower() + severity_emoji = { + 'critical': '🔴', + 'high': '🟠', + 'medium': '🟡', + 'low': '🟢' + }.get(severity_lower, '⚪') + + reach_emoji = { + 'reachable': '🔥', + 'unreachable': '✅', + 'unknown': '❓' + }.get(reachability, '⚪') + + # Truncate PURL for Slack readability + short_purl = purl[:50] + '...' if len(purl) > 50 else purl + + rows.append([ + cve_id, + f"{severity_emoji} {severity}", + f"{reach_emoji} {reachability}", + short_purl, + 'Yes' if reachability == 'reachable' else 'No' + ]) + + # Format as markdown table for Slack + if not rows: + content = "No Socket Tier1 vulnerabilities found." + else: + headers = ['CVE/GHSA', 'Severity', 'Reachability', 'Package', 'Has Trace'] + header_row = ' | '.join(headers) + separator_row = ' | '.join(['---'] * len(headers)) + content_rows = [] + for row in rows: + content_rows.append(' | '.join(str(cell) for cell in row)) + + content = '\n'.join([header_row, separator_row] + content_rows) + + return [{ + 'title': 'Socket Tier1 Reachability Analysis', + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/socket_tier1/sumologic.py b/socket_basics/core/connector/socket_tier1/sumologic.py new file mode 100644 index 0000000..434e059 --- /dev/null +++ b/socket_basics/core/connector/socket_tier1/sumologic.py @@ -0,0 +1,65 @@ +"""SumoLogic notifier formatting for Socket Tier1 reachability analysis.""" + +from typing import Dict, Any, List + + +def _make_purl(comp: Dict[str, Any]) -> str: + """Construct a best-effort purl from a component entry.""" + typ = comp.get('type') + namespace = comp.get('namespace') + name = comp.get('name') or comp.get('id') + version = comp.get('version') + if not name: + return '' + # Basic purl: pkg:type/namespace/name@version (percent-encode @ in namespace if needed) + if namespace: + # If namespace already contains @ (scoped npm), percent-encode + ns = namespace.replace('@', '%40') + p = f"pkg:{typ}/{ns}/{name}" + else: + p = f"pkg:{typ}/{name}" + if version: + p = p + f"@{version}" + return p + + +def format_notifications(components_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Format for SumoLogic - structured logging format.""" + rows = [] + for comp in components_list: + comp_name = str(comp.get('name') or comp.get('id') or '-') + + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + purl = str(props.get('purl') or _make_purl(comp) or comp_name) + cve_id = str(props.get('ghsaId') or props.get('cveId') or a.get('title') or '') + severity = str(a.get('severity') or props.get('severity') or '') + reachability = str(props.get('reachability') or '') + + # Key-value format suitable for log parsing + rows.append([ + f"vuln={cve_id}", + f"severity={severity}", + f"reachability={reachability}", + f"purl={purl}", + f"component={comp_name}", + f"scanner=socket-tier1" + ]) + + # Format as structured data for SumoLogic + if not rows: + content = "No Socket Tier1 vulnerabilities found." + else: + headers = ['Vulnerability', 'Severity', 'Reachability', 'Package', 'Component', 'Scanner'] + header_row = ' | '.join(headers) + separator_row = ' | '.join(['---'] * len(headers)) + content_rows = [] + for row in rows: + content_rows.append(' | '.join(str(cell) for cell in row)) + + content = '\n'.join([header_row, separator_row] + content_rows) + + return [{ + 'title': 'Socket Tier1 Vulnerability Events', + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/socket_tier1/webhook.py b/socket_basics/core/connector/socket_tier1/webhook.py new file mode 100644 index 0000000..df5ed20 --- /dev/null +++ b/socket_basics/core/connector/socket_tier1/webhook.py @@ -0,0 +1,66 @@ +"""Webhook notifier formatting for Socket Tier1 reachability analysis.""" + +from typing import Dict, Any, List + + +def _make_purl(comp: Dict[str, Any]) -> str: + """Construct a best-effort purl from a component entry.""" + typ = comp.get('type') + namespace = comp.get('namespace') + name = comp.get('name') or comp.get('id') + version = comp.get('version') + if not name: + return '' + # Basic purl: pkg:type/namespace/name@version (percent-encode @ in namespace if needed) + if namespace: + # If namespace already contains @ (scoped npm), percent-encode + ns = namespace.replace('@', '%40') + p = f"pkg:{typ}/{ns}/{name}" + else: + p = f"pkg:{typ}/{name}" + if version: + p = p + f"@{version}" + return p + + +def format_notifications(components_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Format for generic webhook - flexible structured format.""" + rows = [] + for comp in components_list: + comp_name = str(comp.get('name') or comp.get('id') or '-') + + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + purl = str(props.get('purl') or _make_purl(comp) or comp_name) + cve_id = str(props.get('ghsaId') or props.get('cveId') or a.get('title') or '') + severity = str(a.get('severity') or props.get('severity') or '') + reachability = str(props.get('reachability') or '') + + rows.append([ + cve_id, + severity, + reachability, + purl, + comp_name, + str(comp.get('version', '')), + str(props.get('ghsaId', '')), + 'socket-tier1' + ]) + + # Format as structured data for webhook + if not rows: + content = "No Socket Tier1 vulnerabilities found." + else: + headers = ['ID', 'Severity', 'Reachability', 'PURL', 'Component', 'Version', 'GHSA', 'Scanner'] + header_row = ' | '.join(headers) + separator_row = ' | '.join(['---'] * len(headers)) + content_rows = [] + for row in rows: + content_rows.append(' | '.join(str(cell) for cell in row)) + + content = '\n'.join([header_row, separator_row] + content_rows) + + return [{ + 'title': 'Socket Tier1 Reachability Analysis', + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/trivy/__init__.py b/socket_basics/core/connector/trivy/__init__.py index e423c71..d925dd9 100644 --- a/socket_basics/core/connector/trivy/__init__.py +++ b/socket_basics/core/connector/trivy/__init__.py @@ -3,676 +3,6 @@ Trivy Container Scanner Connector Handles Trivy Dockerfile and Image scanning with result processing """ +from .trivy import TrivyScanner -import json -import logging -import subprocess -import tempfile -import os -from pathlib import Path -from typing import Dict, List, Any -from ..base import BaseConnector -# Trivy builds canonical components/notifications directly - -logger = logging.getLogger(__name__) - - -class TrivyScanner(BaseConnector): - """Trivy container scanner implementation""" - - def __init__(self, config): - super().__init__(config) - - def is_enabled(self) -> bool: - """Check if container scanning should be enabled. - - Returns True if either Dockerfile or container image scanning is enabled. - This method supports both the new parameter names and legacy ones. - """ - dockerfile_flag = bool(self.config.get('dockerfile_scanning_enabled', False) or self.config.get('dockerfile_enabled', False)) - image_flag = bool(self.config.get('container_image_scanning_enabled', False) or self.config.get('image_enabled', False)) - return dockerfile_flag or image_flag - - def scan(self) -> Dict[str, Any]: - """Run both Dockerfile and Image scanning""" - if not self.is_enabled(): - logger.info("Container scanning disabled, skipping Trivy") - return {} - - results_map: Dict[str, Any] = {} - all_notifications: List[List[str]] = [] - - # Run Dockerfile scanning - try: - dockerfile_results = self.scan_dockerfiles() or {} - if isinstance(dockerfile_results, dict): - results_map.update(dockerfile_results) - except Exception: - logger.exception('Trivy: dockerfile scan failed') - - # Run Image scanning - try: - image_results = self.scan_images() or {} - if isinstance(image_results, dict): - results_map.update(image_results) - except Exception: - logger.exception('Trivy: image scan failed') - try: - # The connector produces a mapping of id->component in results_map. - # Convert to canonical components list and let connector build - # presentation-ready notification tables via notification_rows(). - components_list: List[Dict[str, Any]] = [] - mapping: Dict[str, Any] = {} - if isinstance(results_map, dict): - # results_map may already be mapping id->component - all_vals = [] - for k, v in results_map.items(): - if isinstance(v, dict): - mapping[k] = v - all_vals.append(v) - components_list = all_vals - - # Build notification tables from mapping - try: - tables = self.notification_rows(mapping) - except Exception: - tables = [] - - # Fallback: synthesize a simple results table from alerts if needed - if not tables: - notifications = [] - for c in components_list: - cid = c.get('id') or c.get('name') or '' - for a in c.get('alerts', []) or []: - path = (a.get('props') or {}).get('filePath') or (a.get('location') or {}).get('path') or c.get('name') or cid - line = (a.get('props') or {}).get('startLine') or (a.get('location') or {}).get('line') or '' - notifications.append([cid, str(a.get('severity') or ''), a.get('title') or a.get('description') or '', f"{path}:{line}" if line else (path or '')]) - if notifications: - tables = [{'title': 'results', 'headers': ['component','severity','title','location'], 'rows': notifications}] - - return {'components': components_list, 'notifications': tables} - except Exception: - logger.exception('Trivy: normalization failed') - return {'components': list(results_map.values()), 'notifications': []} - - def scan_dockerfiles(self) -> Dict[str, Any]: - """Run Trivy Dockerfile scanning""" - # Consider both new and legacy dockerfile flags - dockerfile_enabled = self.config.get('dockerfile_scanning_enabled', False) or self.config.get('dockerfile_enabled', False) - if not dockerfile_enabled: - logger.info("Dockerfile scanning disabled, skipping Trivy Dockerfile") - return {} - - dockerfiles = self.config.get('dockerfiles', '') - if isinstance(dockerfiles, str): - dockerfiles = [f.strip() for f in dockerfiles.split(',') if f.strip()] - elif isinstance(dockerfiles, list): - dockerfiles = [str(f).strip() for f in dockerfiles if str(f).strip()] - else: - dockerfiles = [] - - # Try to detect changed Dockerfiles even if none explicitly configured - changed_files = self.config.get('changed_files', []) if hasattr(self.config, '_config') else [] - if not changed_files: - try: - from socket_basics.core.config import _detect_git_changed_files - changed_files = _detect_git_changed_files(str(self.config.workspace), mode='staged') - except Exception: - changed_files = [] - - # If explicit dockerfiles are not set, but changed Dockerfiles exist, use them - if not dockerfiles and changed_files: - # Filter changed files for Dockerfile candidates - possible = [] - for cf in changed_files: - base = Path(cf).name - if base == 'Dockerfile' or 'dockerfile' in base.lower() or base.lower().endswith('.dockerfile'): - if (self.config.workspace / cf).exists(): - possible.append(cf) - if possible: - dockerfiles = possible - - if not dockerfiles: - logger.info("No Dockerfiles specified, skipping Trivy Dockerfile scanning") - return {} - - logger.info("Running Trivy Dockerfile scanning") - results = {} - - # If changed_files is provided, prefer scanning only changed Dockerfiles - changed_files = self.config.get('changed_files', []) if hasattr(self.config, '_config') else [] - # Fallback: attempt to detect staged changed files if none present - if not changed_files: - try: - # import helper from config module - from socket_basics.core.config import _detect_git_changed_files - changed_files = _detect_git_changed_files(str(self.config.workspace), mode='staged') - except Exception: - changed_files = [] - if changed_files: - # Filter changed files down to ones that are Dockerfiles or named 'Dockerfile' - changed_dockerfiles = [] - for cf in changed_files: - cf_path = Path(cf) - base = cf_path.name - if base == 'Dockerfile' or base.lower().endswith('dockerfile') or base.lower().endswith('.dockerfile') or 'dockerfile' in base.lower(): - # Ensure the file exists in workspace - full = self.config.workspace / cf - if full.exists(): - changed_dockerfiles.append(cf) - - if changed_dockerfiles: - logger.info(f"Detected {len(changed_dockerfiles)} changed Dockerfile(s); restricting Trivy to them") - dockerfiles = changed_dockerfiles - - for dockerfile in dockerfiles: - # Resolve dockerfile path: prefer given path if it exists, otherwise join with workspace - candidate_a = Path(dockerfile) - candidate_b = self.config.workspace / dockerfile - if candidate_a.exists(): - dockerfile_path = candidate_a - elif candidate_b.exists(): - dockerfile_path = candidate_b - else: - logger.warning(f"Dockerfile not found: {candidate_a} or {candidate_b}") - continue - - try: - with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file: - cmd = [ - 'trivy', - 'config', - '--format', 'json', - '--output', temp_file.name, - str(dockerfile_path) - ] - - logger.info(f"Running: {' '.join(cmd)}") - result = subprocess.run(cmd, capture_output=True, text=True) - - if result.returncode != 0: - logger.error(f"Trivy dockerfile scan failed for {dockerfile}: {result.stderr}") - continue - - with open(temp_file.name, 'r') as f: - trivy_output = json.load(f) - - dockerfile_results = self._process_dockerfile_results(trivy_output, dockerfile) - # dockerfile_results should already be a canonical wrapper - if isinstance(dockerfile_results, dict) and isinstance(dockerfile_results.get('components'), list): - for c in dockerfile_results.get('components', []): - cid = c.get('id') or c.get('name') or '' - if cid: - results[cid] = c - - except FileNotFoundError: - logger.error("Trivy not found. Please install Trivy") - except Exception as e: - logger.error(f"Error running Trivy on {dockerfile}: {e}") - finally: - if 'temp_file' in locals(): - try: - os.unlink(temp_file.name) - except: - pass - - return results - - def scan_images(self) -> Dict[str, Any]: - """Run Trivy image scanning""" - # Consider both new and legacy image flags - image_enabled = self.config.get('container_image_scanning_enabled', False) or self.config.get('image_enabled', False) - if not image_enabled: - logger.info("Image scanning disabled, skipping Trivy Image") - return {} - - # Check both new and legacy parameter names for images - images_str = self.config.get('container_images_to_scan', '') or self.config.get('docker_images', '') - # Also accept list types if provided programmatically - if isinstance(images_str, list): - images = [img for img in images_str if img] - else: - images = [img.strip() for img in str(images_str).split(',') if img.strip()] - - if not images: - logger.info("No Docker images specified, skipping Trivy Image scanning") - return {} - - logger.info("Running Trivy Image scanning") - results: Dict[str, Any] = {} - - for image in images: - try: - with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file: - cmd = [ - 'trivy', - 'image', - '--format', 'json', - '--output', temp_file.name, - image - ] - - logger.info(f"Running: {' '.join(cmd)}") - result = subprocess.run(cmd, capture_output=True, text=True) - - if result.returncode != 0: - logger.error(f"Trivy image scan failed for {image}: {result.stderr}") - continue - - with open(temp_file.name, 'r') as f: - trivy_output = json.load(f) - - image_results = self._process_image_results(trivy_output, image) - if isinstance(image_results, dict): - results.update(image_results) - - except FileNotFoundError: - logger.error("Trivy not found. Please install Trivy") - except Exception as e: - logger.error(f"Error running Trivy on {image}: {e}") - finally: - if 'temp_file' in locals(): - try: - os.unlink(temp_file.name) - except: - pass - - return results - - def _convert_to_socket_facts(self, raw_results: Any) -> Dict[str, Any]: - """Convert raw Trivy results to Socket facts format - - This method implements the BaseConnector interface. - Since Trivy has multiple scan types, this method delegates to the appropriate - processing methods based on the result structure. - """ - # This is a unified method that can handle both dockerfile and image results - # The scan() method already processes results appropriately - if isinstance(raw_results, dict): - return raw_results - return {} - - def _process_dockerfile_results(self, trivy_output: Dict[str, Any], dockerfile: str) -> Dict[str, Any]: - """Convert Trivy Dockerfile results to Socket facts format""" - results = trivy_output.get('Results', []) - - if not results: - return {} - - import hashlib - # Create a single component per Dockerfile and append all misconfiguration alerts - try: - from pathlib import Path as _P - p = _P(dockerfile) - try: - ws = getattr(self.config, 'workspace', None) - ws_root = getattr(ws, 'path', None) or getattr(ws, 'root', None) or ws - if ws and not p.is_absolute(): - p = _P(ws) / dockerfile - # If path includes workspace prefix like '../NodeGoat' or 'NodeGoat', strip it - if ws_root: - try: - ws_name = os.path.basename(str(ws_root)) - parts = str(p).split(os.sep) - if parts and (parts[0] == ws_name or (len(parts) >= 2 and parts[0] in ('.', '..') and parts[1] == ws_name)): - if parts[0] == ws_name: - parts = parts[1:] - else: - parts = parts[2:] - p = _P(os.path.join(*parts)) if parts else _P('') - except Exception: - pass - except Exception: - pass - norm = str(p.as_posix()) - cid = hashlib.sha256(norm.encode('utf-8')).hexdigest() - except Exception: - import hashlib as _hash - cid = _hash.sha256(str(dockerfile).encode('utf-8')).hexdigest() - - component = { - "id": cid, - "type": "generic", - "name": f"{dockerfile}", - "internal": True, - "version": "", - "direct": True, - "dev": False, - "dead": False, - "dependencies": [], - "manifestFiles": [{"file": dockerfile}] if dockerfile else [], - "alerts": [] - } - - for result in results: - misconfigurations = result.get('Misconfigurations', []) - for misconfig in misconfigurations: - alert = self._create_dockerfile_alert(misconfig, dockerfile) - if alert: - component["alerts"].append(alert) - - return {cid: component} - - def _process_image_results(self, trivy_output: Dict[str, Any], image: str) -> Dict[str, Any]: - """Convert Trivy Image results to Socket facts format""" - results = trivy_output.get('Results', []) - - if not results: - return {} - - import hashlib - components: Dict[str, Any] = {} - - # For image vulnerabilities, create a component per vulnerability - # using sha256(image + vuln_id + purl_if_present) so identical - # vuln hits map across runs and images with purls are distinguished. - for result in results: - vulnerabilities = result.get('Vulnerabilities', []) - for vuln in vulnerabilities: - vuln_id = vuln.get('VulnerabilityID', 'unknown') - # build purl from created alert if possible - purl = None - try: - installed_version = vuln.get('InstalledVersion') or vuln.get('FixedVersion') or 'unknown' - pkg_name = vuln.get('PkgName') or vuln.get('Package') or 'unknown' - if pkg_name: - purl = f"pkg:deb/{pkg_name}@{installed_version}" - except Exception: - purl = None - - seed = f"{image}:{vuln_id}:{purl or ''}" - cid = hashlib.sha256(seed.encode('utf-8')).hexdigest() - - if cid not in components: - components[cid] = { - "id": cid, - "type": "generic", - "name": f"{image}", - "internal": True, - "version": "", - "subpath": f"image:{image}", - "direct": True, - "dev": False, - "dead": False, - "dependencies": [], - "manifestFiles": [{"file": image}], - "alerts": [] - } - - alert = self._create_image_alert(vuln, image) - if alert: - components[cid]["alerts"].append(alert) - - return components - - def _create_dockerfile_alert(self, misconfig: Dict[str, Any], dockerfile: str) -> Dict[str, Any]: - """Create a generic alert from a Trivy Dockerfile misconfiguration""" - severity_map = { - 'CRITICAL': 'critical', - 'HIGH': 'high', - 'MEDIUM': 'medium', - 'LOW': 'low' - } - - severity = severity_map.get(misconfig.get('Severity', 'LOW'), 'low') - rule_id = misconfig.get('ID', 'unknown') - try: - if isinstance(rule_id, str) and rule_id.startswith('socket_basics.rules.'): - rule_id = rule_id.replace('socket_basics.rules.', '', 1) - except Exception: - pass - title = misconfig.get('Title', 'Configuration issue') - description = misconfig.get('Description', 'Dockerfile configuration issue detected') - - markdown_content = f"""## Dockerfile Configuration Issue: {rule_id} - -### Description -{description} - -### File Location -- **Dockerfile**: `{dockerfile}` -- **Rule ID**: {rule_id} - -### Issue Details -{misconfig.get('Message', 'No additional details available')} - -### Resolution -{misconfig.get('Resolution', 'Review Dockerfile configuration and apply security best practices')} - -### References -{chr(10).join([f"- [{ref}]({ref})" for ref in misconfig.get('References', [])])} - -### Security Impact -Dockerfile misconfigurations can lead to: -- Privilege escalation vulnerabilities -- Information disclosure -- Increased attack surface -- Compliance violations -""" - - return { - "type": "generic", - "severity": severity, - "title": f"Dockerfile: {title}", - "description": description, - "category": "vulnerability", - "subType": "dockerfile", - "generatedBy": "trivy-dockerfile", - "action": self.config.get_action_for_severity(severity), - "props": { - "ruleId": rule_id, - "dockerfile": dockerfile, - "tool": "trivy", - "scanType": "dockerfile", - "impact": severity, - "resolution": misconfig.get('Resolution', ''), - "references": misconfig.get('References', []), - "detailedReport": { - "content-type": "text/markdown", - "content": markdown_content - } - } - } - - def _get_cve_alert_type(self, severity: str, vuln_id: str) -> str: - """Get the appropriate alert type for CVE findings based on severity""" - # Only use CVE-specific types for actual CVE identifiers - if not vuln_id.startswith('CVE-'): - return "generic" - - severity_to_cve_type = { - 'critical': 'criticalCVE', - 'high': 'cve', - 'medium': 'mediumCVE', - 'low': 'mildCVE' - } - return severity_to_cve_type.get(severity, 'generic') - - def _create_image_alert(self, vuln: Dict[str, Any], image: str) -> Dict[str, Any]: - """Create a CVE alert from a Trivy image vulnerability""" - severity_map = { - 'CRITICAL': 'critical', - 'HIGH': 'high', - 'MEDIUM': 'medium', - 'LOW': 'low' - } - - severity = severity_map.get(vuln.get('Severity', 'LOW'), 'low') - vuln_id = vuln.get('VulnerabilityID', 'unknown') - try: - if isinstance(vuln_id, str) and vuln_id.startswith('socket_basics.rules.'): - vuln_id = vuln_id.replace('socket_basics.rules.', '', 1) - except Exception: - pass - title = vuln.get('Title', 'Vulnerability detected') - description = vuln.get('Description', 'Container image vulnerability detected') - - # Get the appropriate alert type for CVE findings - alert_type = self._get_cve_alert_type(severity, vuln_id) - - # Get package info - pkg_name = vuln.get('PkgName', 'unknown') - installed_version = vuln.get('InstalledVersion', 'unknown') - fixed_version = vuln.get('FixedVersion', 'Not available') - - markdown_content = f"""## Container Image Vulnerability: {vuln_id} - -### Vulnerability Details -- **CVE ID**: {vuln_id} -- **Package**: {pkg_name} -- **Installed Version**: {installed_version} -- **Fixed Version**: {fixed_version} -- **Severity**: {severity.upper()} - -### Description -{description} - -### Image Details -- **Image**: `{image}` -- **Package Path**: {vuln.get('PkgPath', 'N/A')} - -### CVSS Score -{vuln.get('CVSS', {}).get('nvd', {}).get('V3Score', 'Not available')} - -### References -{chr(10).join([f"- [{ref}]({ref})" for ref in vuln.get('References', [])])} - -### Remediation -{"Update to version " + fixed_version if fixed_version != "Not available" else "No fix available yet. Consider using alternative packages or implementing additional security controls."} - -### Impact Assessment -Container vulnerabilities can lead to: -- Container escape -- Privilege escalation -- Data exfiltration -- Denial of service -""" - - # Build purl-like locator when possible (best-effort) - purl = None - try: - # prefer explicit installed version, then fixed version, then 'unknown' - installed_version = vuln.get('InstalledVersion') or vuln.get('FixedVersion') or 'unknown' - pkg_name = pkg_name or vuln.get('Package') or 'unknown' - if pkg_name: - # assume deb-style purl by default; this is best-effort and may be adjusted later - purl = f"pkg:deb/{pkg_name}@{installed_version}" - except Exception: - purl = None - - title_text = f"{vuln_id} in {pkg_name}" - - return { - "type": alert_type, - "severity": severity, - "title": title_text, - "description": f"{title} in package {pkg_name}", - "category": "vulnerability", - "subType": "container-image", - "generatedBy": "trivy-image", - "action": self.config.get_action_for_severity(severity), - "props": { - "vulnerabilityId": vuln_id, - "packageName": pkg_name, - "installedVersion": installed_version, - "fixedVersion": fixed_version, - "image": image, - "purl": purl, - "tool": "trivy", - "scanType": "image", - "cvssScore": vuln.get('CVSS', {}).get('nvd', {}).get('V3Score'), - "references": vuln.get('References', []), - "impact": severity, - "detailedReport": { - "content-type": "text/markdown", - "content": markdown_content - } - } - } - - # Notification processor for Trivy - def notification_rows(self, processed_results: Dict[str, Any]) -> List[List[str]]: - # Build canonical list of tables: images and dockerfiles - tables: List[Dict[str, Any]] = [] - - image_groups: Dict[str, List[List[str]]] = {} - dockerfile_groups: Dict[str, List[List[str]]] = {} - - for comp in processed_results.values(): - comp_name = comp.get('name') or comp.get('id') or '-' - ctype = q.get('type') or comp.get('type') - if ctype == 'image' or str(comp.get('subpath', '')).startswith('image:'): - # treat as image; gather rows - for a in comp.get('alerts', []): - title = a.get('title', '') - sev = a.get('severity', '') - props = a.get('props', {}) or {} - locator = props.get('image') or props.get('dockerImage') or comp_name - if props.get('purl'): - loc = props.get('purl') - elif props.get('packageName'): - loc = f"pkg:deb/{props.get('packageName')}@{props.get('installedVersion', '')}" - else: - loc = '' - title_key = props.get('image') or props.get('dockerImage') or comp_name - image_groups.setdefault(title_key, []).append([title, sev, locator, loc]) - elif ctype == 'dockerfile' or any('dockerfile' in (mf.get('file') or '').lower() for mf in (comp.get('manifestFiles') or [])): - for a in comp.get('alerts', []): - props = a.get('props', {}) or {} - title = props.get('ruleId') or a.get('title') or '' - impact = a.get('severity') or '' - file_loc = props.get('dockerfile') or comp.get('name') or comp_name - dockerfile_groups.setdefault(comp_name, []).append([title, impact, file_loc, props.get('resolution','') or '']) - - # Consolidate image rows per image by (locator, purl, severity) and merge titles - def _merge_titles(titles: List[str]) -> str: - if not titles: - return '' - suffix = None - if all(' in ' in t for t in titles): - cand = titles[0] - idx = cand.rfind(' in ') - if idx != -1: - s = cand[idx:] - if all(t.endswith(s) for t in titles): - suffix = s - if suffix: - stripped = [t[: t.rfind(' in ')] if ' in ' in t else t for t in titles] - return ', '.join(stripped) + suffix - uniq = [] - for t in titles: - if t not in uniq: - uniq.append(t) - if len(uniq) > 10: - return ', '.join(uniq[:10]) + f' (+{len(uniq)-10} more)' - return ', '.join(uniq) - - image_headers = ['Title', 'Severity', 'Image', 'Location'] - for title, rows in image_groups.items(): - keyed: Dict[tuple, List[str]] = {} - others: List[List[str]] = [] - for r in rows: - if not isinstance(r, (list, tuple)) or len(r) < 4: - others.append(r) - continue - t = str(r[0] or '') - severity = str(r[1] or '') - locator = str(r[2] or '') - purl = str(r[3] or '') - key = (locator, purl, severity) - keyed.setdefault(key, []).append(t) - - new_rows: List[List[str]] = [] - for (locator, purl, severity), titles in keyed.items(): - merged = _merge_titles(titles) - new_rows.append([merged, severity, locator, purl]) - new_rows.extend(others) - tables.append({'title': title, 'headers': image_headers, 'rows': new_rows}) - - # Append dockerfile tables (no special consolidation required beyond grouping) - dockerfile_headers = ['Title', 'Severity', 'Dockerfile', 'Resolution'] - for df_name, rows in dockerfile_groups.items(): - tables.append({'title': df_name, 'headers': dockerfile_headers, 'rows': rows}) - - return tables diff --git a/socket_basics/core/connector/trivy/console.py b/socket_basics/core/connector/trivy/console.py new file mode 100644 index 0000000..3213c89 --- /dev/null +++ b/socket_basics/core/connector/trivy/console.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +""" +Console notifier for Trivy results. +Formats results for human-readable console output with truncated content. +""" + +from typing import Dict, Any, List +from collections import defaultdict + + +def format_notifications(mapping: Dict[str, Any], item_name: str, scan_type: str = "image") -> List[Dict[str, Any]]: + """Format for console output - human readable with truncated content. + + Args: + mapping: Component mapping with alerts + item_name: Name of the scanned item + scan_type: Type of scan - 'vuln', 'image', or 'dockerfile' + """ + rows = [] + severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3} + + if scan_type == 'dockerfile': + # Dockerfile format: Rule ID | Severity | Message | Resolution + package_groups = defaultdict(lambda: defaultdict(set)) + + for comp in mapping.values(): + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + rule_id = str(props.get('ruleId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + message = str(alert.get('description', '')) + resolution = str(props.get('resolution', '')) + + rule_info = f"{rule_id}|{message}|{resolution}" + package_groups[rule_id][severity].add(rule_info) + + for rule_id, severity_dict in package_groups.items(): + for severity in sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)): + rule_infos = list(severity_dict[severity]) + for rule_info in rule_infos: + parts = rule_info.split('|', 2) + if len(parts) >= 3: + _, message, resolution = parts + rows.append([rule_id, severity, message, resolution]) + + headers = ['Rule ID', 'Severity', 'Message', 'Resolution'] + + elif scan_type == 'image': + # Image format: Package | CVEs | Severity + package_groups = defaultdict(lambda: defaultdict(set)) + + for comp in mapping.values(): + comp_name = str(comp.get('name') or comp.get('id') or '-') + comp_version = str(comp.get('version', '')) + ecosystem = comp.get('qualifiers', {}).get('ecosystem', 'unknown') + + if comp_version: + package_key = f"pkg:{ecosystem}/{comp_name}@{comp_version}" + else: + package_key = f"pkg:{ecosystem}/{comp_name}" + + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + cve_id = str(props.get('vulnerabilityId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + package_groups[package_key][severity].add(cve_id) + + for package_name, severity_dict in package_groups.items(): + sorted_severities = sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)) + + for severity in sorted_severities: + cves = sorted(list(severity_dict[severity])) + cve_bullets = '\n'.join([f"- {cve}" for cve in cves]) + display_package = package_name[:40] + '...' if len(package_name) > 40 else package_name + + rows.append([display_package, cve_bullets, severity]) + + headers = ['Package', 'CVEs', 'Severity'] + + elif scan_type == 'vuln': + # Vuln format: Package | CVEs | Severity + package_groups = defaultdict(lambda: defaultdict(set)) + + for comp in mapping.values(): + comp_name = str(comp.get('name') or comp.get('id') or '-') + comp_version = str(comp.get('version', '')) + ecosystem = comp.get('qualifiers', {}).get('ecosystem', 'unknown') + + if comp_version: + package_key = f"pkg:{ecosystem}/{comp_name}@{comp_version}" + else: + package_key = f"pkg:{ecosystem}/{comp_name}" + + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + cve_id = str(props.get('vulnerabilityId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + package_groups[package_key][severity].add(cve_id) + + for package_name, severity_dict in package_groups.items(): + sorted_severities = sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)) + + for severity in sorted_severities: + cves = sorted(list(severity_dict[severity])) + cve_bullets = '\n'.join([f"- {cve}" for cve in cves]) + display_package = package_name[:40] + '...' if len(package_name) > 40 else package_name + + rows.append([display_package, cve_bullets, severity]) + + headers = ['Package', 'CVEs', 'Severity'] + + else: + return [] + + # Format as a table using tabulate + from tabulate import tabulate + + table_content = tabulate(rows, headers=headers, tablefmt='grid') if rows else "No vulnerabilities found." + + # Create title based on scan type + if scan_type == 'vuln': + title = f"Socket CVE Scanning Results: {item_name}" + elif scan_type == 'dockerfile': + title = f"Socket Dockerfile Results: {item_name}" + else: # image + title = f"Socket Image Scanning Results: {item_name}" + + return [{ + 'title': title, + 'content': table_content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/trivy/github_pr.py b/socket_basics/core/connector/trivy/github_pr.py new file mode 100644 index 0000000..fcee974 --- /dev/null +++ b/socket_basics/core/connector/trivy/github_pr.py @@ -0,0 +1,300 @@ +#!/usr/bin/env python3 +""" +GitHub PR notifier for Trivy results. +Formats results with markdown for better GitHub display using the new grouped format. +""" + +from typing import Dict, Any, List +from collections import defaultdict +from .utils import logger, get_notifier_result_limit + + +def format_notifications(mapping: Dict[str, Any], item_name: str = "Unknown", scan_type: str = "image") -> List[Dict[str, Any]]: + """Format for GitHub PR comments - grouped format with markdown formatting. + + Args: + mapping: Component mapping with alerts + item_name: Name of the scanned item + scan_type: Type of scan - 'vuln', 'image', or 'dockerfile' + """ + # Group vulnerabilities by package and severity + package_groups = defaultdict(lambda: defaultdict(set)) # Use set to avoid duplicates + + if scan_type == 'dockerfile': + # Process dockerfile components + for comp in mapping.values(): + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + rule_id = str(props.get('ruleId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + message = str(alert.get('description', '')) + resolution = str(props.get('resolution', '')) + + # Check for code snippets or detailed content + code_snippet = "" + detailed_report = props.get('detailedReport', {}) + if detailed_report and detailed_report.get('content'): + code_snippet = str(detailed_report.get('content', '')) + + # For dockerfile, store complete rule info including code snippet + rule_info = f"{rule_id}|{message}|{resolution}|{code_snippet}" + package_groups[rule_id][severity].add(rule_info) + + else: # image or vuln + # Process package vulnerability components + for comp in mapping.values(): + comp_name = str(comp.get('name') or comp.get('id') or '-') + comp_version = str(comp.get('version', '')) + ecosystem = comp.get('qualifiers', {}).get('ecosystem', 'unknown') + + # Create purl format: pkg:ecosystem/name@version + if comp_version: + package_key = f"pkg:{ecosystem}/{comp_name}@{comp_version}" + else: + package_key = f"pkg:{ecosystem}/{comp_name}" + + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + cve_id = str(props.get('vulnerabilityId', '')) + severity = str(alert.get('severity', '')) + + # Group CVEs by package and severity, use set to avoid duplicates + package_groups[package_key][severity].add(cve_id) + + # Create rows with proper formatting + rows = [] + severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3} + + if scan_type == 'dockerfile': + # Dockerfile format: Rule ID | Severity | Message | Resolution + # Collect and sort by severity first + unsorted_rows = [] + for rule_id, severity_dict in package_groups.items(): + for severity in sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)): + rule_infos = list(severity_dict[severity]) + for rule_info in rule_infos: + parts = rule_info.split('|', 3) + if len(parts) >= 3: + # parts[0] = rule_id, parts[1] = message, parts[2] = resolution, parts[3] = code_snippet + message = parts[1] + resolution = parts[2] + code_snippet = parts[3] if len(parts) > 3 else "" + + # Format code snippets for GitHub PR comments + if code_snippet and code_snippet.strip(): + # Extract actual code from markdown if present + if '```' in code_snippet: + # Extract code between markdown code blocks + import re + code_matches = re.findall(r'```[\w]*\n?(.*?)\n?```', code_snippet, re.DOTALL) + if code_matches: + actual_code = code_matches[0].strip() + # Format with
 tags and 
for line breaks + formatted_code = f"
{actual_code.replace(chr(10), '
')}
" + message = f"{message}

{formatted_code}" + + unsorted_rows.append(( + severity_order.get(severity, 4), + [f"**{rule_id}**", f"*{severity}*", message, resolution] + )) + + # Sort by severity and extract rows + unsorted_rows.sort(key=lambda x: x[0]) + rows = [row[1] for row in unsorted_rows] + + headers = ['Rule ID', 'Severity', 'Message', 'Resolution'] + + elif scan_type == 'image': + # Image format: Expandable panels for each CVE + # Store full alert details from original mapping for panels + vuln_details = [] + for comp in mapping.values(): + comp_name = str(comp.get('name') or comp.get('id') or '-') + comp_version = str(comp.get('version', '')) + ecosystem = comp.get('qualifiers', {}).get('ecosystem', 'unknown') + + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + cve_id = str(props.get('vulnerabilityId', '')) + severity = str(alert.get('severity', '')).lower() + description = str(alert.get('description', 'No description available')) + + # Build package identifier + if comp_version: + package = f"pkg:{ecosystem}/{comp_name}@{comp_version}" + else: + package = f"pkg:{ecosystem}/{comp_name}" + + # Get additional metadata + fixed_version = str(props.get('fixedVersion', 'Not available')) + installed_version = comp_version or 'Unknown' + + vuln_details.append({ + 'cve_id': cve_id, + 'severity': severity, + 'severity_order': severity_order.get(severity, 4), + 'package': package, + 'comp_name': comp_name, + 'ecosystem': ecosystem, + 'installed_version': installed_version, + 'fixed_version': fixed_version, + 'description': description + }) + + # Sort by severity + vuln_details.sort(key=lambda x: x['severity_order']) + rows = vuln_details # Store for panel rendering + headers = [] # No table headers for panel format + + elif scan_type == 'vuln': + # Vuln format: Expandable panels for each CVE + # Store full alert details from original mapping for panels + vuln_details = [] + for comp in mapping.values(): + comp_name = str(comp.get('name') or comp.get('id') or '-') + comp_version = str(comp.get('version', '')) + ecosystem = comp.get('qualifiers', {}).get('ecosystem', 'unknown') + + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + cve_id = str(props.get('vulnerabilityId', '')) + severity = str(alert.get('severity', '')).lower() + description = str(alert.get('description', 'No description available')) + + # Build package identifier + if comp_version: + package = f"pkg:{ecosystem}/{comp_name}@{comp_version}" + else: + package = f"pkg:{ecosystem}/{comp_name}" + + # Get additional metadata + fixed_version = str(props.get('fixedVersion', 'Not available')) + installed_version = comp_version or 'Unknown' + + vuln_details.append({ + 'cve_id': cve_id, + 'severity': severity, + 'severity_order': severity_order.get(severity, 4), + 'package': package, + 'comp_name': comp_name, + 'ecosystem': ecosystem, + 'installed_version': installed_version, + 'fixed_version': fixed_version, + 'description': description + }) + + # Sort by severity + vuln_details.sort(key=lambda x: x['severity_order']) + rows = vuln_details # Store for panel rendering + headers = [] # No table headers for panel format + + else: + rows = [] + headers = [] + + # Apply truncation for GitHub PR + max_rows = get_notifier_result_limit('github_pr') + original_count = len(rows) + truncated = False + if len(rows) > max_rows: + rows = rows[:max_rows] + truncated = True + logger.info(f"Truncated GitHub PR results from {original_count} to {max_rows}") + + # Format content based on scan_type + if not rows: + content = "No vulnerabilities found." + elif scan_type in ['image', 'vuln']: + # Panel format for vulnerability scanning + panels = [] + for vuln in rows: + # Determine panel color based on severity + severity_icons = { + 'critical': '🔴', + 'high': '🟠', + 'medium': '🟡', + 'low': '🟢' + } + icon = severity_icons.get(vuln['severity'], '⚪') + severity_label = vuln['severity'].upper() + + # Create expandable panel for each CVE + panel = f"""
+{icon} {vuln['cve_id']} + +**Severity:** {severity_label} + +**Package:** `{vuln['package']}` + +**Installed Version:** {vuln['installed_version']} + +**Fixed Version:** {vuln['fixed_version']} + +**Ecosystem:** {vuln['ecosystem']} + +### Description +{vuln['description']} + +
+ +""" + panels.append(panel) + + content = '\n'.join(panels) + + # Add truncation notice if needed + if truncated: + content += f"\n> ⚠️ **Showing top {max_rows} results (by severity).** {original_count - max_rows} additional results truncated.\n" + + else: + # Table format for dockerfile scanning + header_row = '| ' + ' | '.join(headers) + ' |' + separator_row = '| ' + ' | '.join(['---'] * len(headers)) + ' |' + content_rows = [] + for row in rows: + content_rows.append('| ' + ' | '.join(str(cell) for cell in row) + ' |') + + content = '\n'.join([header_row, separator_row] + content_rows) + + # Add truncation notice if needed + if truncated: + content += f"\n\n> ⚠️ **Showing top {max_rows} results (by severity).** {original_count - max_rows} additional results truncated.\n" + + # Build title based on scan type + if scan_type == 'vuln': + title_base = "Socket CVE Scanning Results" + scanner_name = "Trivy Vuln Scanning" + elif scan_type == 'dockerfile': + title_base = "Socket Dockerfile Results" + scanner_name = "Trivy Dockerfile" + else: # image + title_base = "Socket Image Scanning Results" + scanner_name = "Trivy Container" + + title = f"{title_base}: {item_name}" + + # Count total findings for summary + total_findings = len(rows) + + # Add summary section with scanner findings + summary_content = f"""## Summary + +| Scanner | Findings | +|---------|----------| +| {scanner_name} | {total_findings} | + +## Details + +{content}""" + + # Wrap content with HTML comment markers for section updates + wrapped_content = f""" +# {title} + +{summary_content} +""" + + return [{ + 'title': title, + 'content': wrapped_content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/trivy/jira.py b/socket_basics/core/connector/trivy/jira.py new file mode 100644 index 0000000..b744626 --- /dev/null +++ b/socket_basics/core/connector/trivy/jira.py @@ -0,0 +1,323 @@ +#!/usr/bin/env python3 +""" +Jira notifier for Trivy results. +Formats results using the new grouped format with priority mapping and detailed descriptions. +""" + +from typing import Dict, Any, List +from collections import defaultdict +from .utils import get_notifier_result_limit, logger + + +def format_notifications(mapping: Dict[str, Any], item_name: str = "Unknown", scan_type: str = "image") -> List[Dict[str, Any]]: + """Format for Jira tickets - grouped format with priority mapping. + + Args: + mapping: Component mapping with alerts + item_name: Name of the scanned item + scan_type: Type of scan - 'vuln', 'image', or 'dockerfile' + """ + # Group vulnerabilities by package and severity + package_groups = defaultdict(lambda: defaultdict(set)) # Use set to avoid duplicates + + if scan_type == 'dockerfile': + # Process dockerfile components + for comp in mapping.values(): + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + rule_id = str(props.get('ruleId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + message = str(alert.get('description', '')) + resolution = str(props.get('resolution', '')) + + # Check for code snippets or detailed content + code_snippet = "" + detailed_report = props.get('detailedReport', {}) + if detailed_report and detailed_report.get('content'): + code_snippet = str(detailed_report.get('content', '')) + + # Store complete rule info including code snippet + rule_info = f"{rule_id}|{message}|{resolution}|{code_snippet}" + package_groups[rule_id][severity].add(rule_info) + + elif scan_type == 'image': + # Process container image vulnerability components + for comp in mapping.values(): + comp_name = str(comp.get('name') or comp.get('id') or '-') + comp_version = str(comp.get('version', '')) + ecosystem = comp.get('qualifiers', {}).get('ecosystem', 'unknown') + + if comp_version: + package_key = f"pkg:{ecosystem}/{comp_name}@{comp_version}" + else: + package_key = f"pkg:{ecosystem}/{comp_name}" + + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + cve_id = str(props.get('vulnerabilityId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + package_groups[package_key][severity].add(cve_id) + + elif scan_type == 'vuln': + # Process filesystem vulnerability components + for comp in mapping.values(): + comp_name = str(comp.get('name') or comp.get('id') or '-') + comp_version = str(comp.get('version', '')) + ecosystem = comp.get('qualifiers', {}).get('ecosystem', 'unknown') + + if comp_version: + package_key = f"pkg:{ecosystem}/{comp_name}@{comp_version}" + else: + package_key = f"pkg:{ecosystem}/{comp_name}" + + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + cve_id = str(props.get('vulnerabilityId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + package_groups[package_key][severity].add(cve_id) + + # Create rows with proper formatting + rows = [] + severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3} + + if scan_type == 'dockerfile': + # Dockerfile format: Rule ID | Severity | Message | Resolution + for rule_id, severity_dict in package_groups.items(): + for severity in sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)): + rule_infos = list(severity_dict[severity]) + for rule_info in rule_infos: + parts = rule_info.split('|', 3) + if len(parts) >= 3: + # parts[0] = rule_id, parts[1] = message, parts[2] = resolution, parts[3] = code_snippet + message = parts[1] + resolution = parts[2] + code_snippet = parts[3] if len(parts) > 3 else "" + + # Extract code snippets for ADF format + code_content = None + if code_snippet and code_snippet.strip(): + # Extract actual code from markdown if present + if '```' in code_snippet: + import re + code_matches = re.findall(r'```[\w]*\n?(.*?)\n?```', code_snippet, re.DOTALL) + if code_matches: + actual_code = code_matches[0].strip() + # Create ADF code block for Dockerfile + code_content = { + "type": "codeBlock", + "attrs": {"language": "dockerfile"}, + "content": [{"type": "text", "text": actual_code}] + } + + # Map severity to Jira priority + severity_lower = severity.lower() + jira_priority = { + 'critical': 'Highest', + 'high': 'High', + 'medium': 'Medium', + 'low': 'Low' + }.get(severity_lower, 'Medium') + + # Create message content with optional code block + message_content = [{"type": "paragraph", "content": [{"type": "text", "text": message}]}] + if code_content: + message_content.append(code_content) + + rows.append([ + {"type": "paragraph", "content": [{"type": "text", "text": rule_id}]}, + {"type": "paragraph", "content": [{"type": "text", "text": jira_priority}]}, + {"type": "div", "content": message_content}, + {"type": "paragraph", "content": [{"type": "text", "text": resolution}]} + ]) + + headers = ['Rule ID', 'Priority', 'Message', 'Resolution'] + + elif scan_type == 'image': + # Container image format: Package | CVEs | Priority + for package_name, severity_dict in package_groups.items(): + # Sort severities by criticality + sorted_severities = sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)) + + for severity in sorted_severities: + cves = sorted(list(severity_dict[severity])) # Convert set to sorted list + + # Format CVEs as compact list for Jira (limit content length) + if len(cves) > 10: + # Truncate to avoid content limits + cve_list = ', '.join(cves[:10]) + f' ... (+{len(cves)-10} more)' + else: + cve_list = ', '.join(cves) + + # Map severity to Jira priority + severity_lower = severity.lower() + jira_priority = { + 'critical': 'Highest', + 'high': 'High', + 'medium': 'Medium', + 'low': 'Low' + }.get(severity_lower, 'Medium') + + # Truncate package name if too long + display_package = package_name[:80] + '...' if len(package_name) > 80 else package_name + + rows.append([ + {"type": "paragraph", "content": [{"type": "text", "text": display_package}]}, + {"type": "paragraph", "content": [{"type": "text", "text": cve_list}]}, + {"type": "paragraph", "content": [{"type": "text", "text": jira_priority}]} + ]) + + headers = ['Package', 'CVEs', 'Priority'] + + elif scan_type == 'vuln': + # Filesystem vulnerability format: Package | CVEs | Priority + for package_name, severity_dict in package_groups.items(): + # Sort severities by criticality + sorted_severities = sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)) + + for severity in sorted_severities: + cves = sorted(list(severity_dict[severity])) # Convert set to sorted list + + # Format CVEs as compact list for Jira (limit content length) + if len(cves) > 10: + # Truncate to avoid content limits + cve_list = ', '.join(cves[:10]) + f' ... (+{len(cves)-10} more)' + else: + cve_list = ', '.join(cves) + + # Map severity to Jira priority + severity_lower = severity.lower() + jira_priority = { + 'critical': 'Highest', + 'high': 'High', + 'medium': 'Medium', + 'low': 'Low' + }.get(severity_lower, 'Medium') + + # Truncate package name if too long + display_package = package_name[:80] + '...' if len(package_name) > 80 else package_name + + rows.append([ + {"type": "paragraph", "content": [{"type": "text", "text": display_package}]}, + {"type": "paragraph", "content": [{"type": "text", "text": cve_list}]}, + {"type": "paragraph", "content": [{"type": "text", "text": jira_priority}]} + ]) + + headers = ['Package', 'CVEs', 'Priority'] + + # Create ADF content + if not rows: + content = { + "type": "doc", + "version": 1, + "content": [ + { + "type": "paragraph", + "content": [{"type": "text", "text": "No vulnerabilities found."}] + } + ] + } + else: + # Get Jira-specific result limit from config + max_items = get_notifier_result_limit('jira') + + # Sort rows by severity (highest first) before truncating + def get_row_severity_order(row): + try: + if scan_type == 'dockerfile': + priority_text = row[1].get('content', [{}])[0].get('text', 'Medium') + else: + priority_text = row[2].get('content', [{}])[0].get('text', 'Medium') + priority_to_order = {'Highest': 0, 'High': 1, 'Medium': 2, 'Low': 3} + return priority_to_order.get(priority_text, 4) + except Exception: + return 4 + + sorted_rows = sorted(rows, key=get_row_severity_order) + display_rows = sorted_rows[:max_items] if len(sorted_rows) > max_items else sorted_rows + truncated_count = len(rows) - len(display_rows) + + # Build panels for image and vuln scan types + panels = [] + if scan_type in ('image', 'vuln'): + # Each row corresponds to [Package, CVEs, Priority] + for row in display_rows: + try: + pkg_cell = row[0] + cves_cell = row[1] + priority_cell = row[2] + pkg_text = pkg_cell.get('content', [{}])[0].get('text', '-') if isinstance(pkg_cell, dict) else str(pkg_cell) + cves_text = cves_cell.get('content', [{}])[0].get('text', '-') if isinstance(cves_cell, dict) else str(cves_cell) + priority_text = priority_cell.get('content', [{}])[0].get('text', 'Medium') if isinstance(priority_cell, dict) else str(priority_cell) + except Exception: + pkg_text = str(row[0]) if len(row) > 0 else '-' + cves_text = str(row[1]) if len(row) > 1 else '-' + priority_text = str(row[2]) if len(row) > 2 else 'Medium' + + # Build panel content similar to Tier1 + panel_type = 'warning' if priority_text in ('High', 'Highest') else 'info' + panel_content = [ + {"type": "paragraph", "content": [{"type": "text", "text": f"Package Vulnerability: {cves_text.split(',')[0].strip()}", "marks": [{"type": "strong"}]}]}, + {"type": "paragraph", "content": [{"type": "text", "text": "Vulnerability Details", "marks": [{"type": "strong"}]}]}, + {"type": "paragraph", "content": [{"type": "text", "text": "• CVE ID: "}, {"type": "text", "text": cves_text.split(',')[0].strip()}]}, + {"type": "paragraph", "content": [{"type": "text", "text": "• Package: "}, {"type": "text", "text": pkg_text, "marks": [{"type": "code"}]}]}, + {"type": "paragraph", "content": [{"type": "text", "text": "• Severity: "}, {"type": "text", "text": priority_text}]} + ] + + # Optional description/recommendation extraction not available from compact rows + # Add panel and a rule separator + panels.append({"type": "panel", "attrs": {"panelType": panel_type}, "content": panel_content}) + panels.append({"type": "rule"}) + + # Remove trailing rule + if panels and panels[-1].get('type') == 'rule': + panels.pop() + + # Add truncation panel if needed + if truncated_count > 0: + truncation_panel = { + "type": "panel", + "attrs": {"panelType": "info"}, + "content": [ + {"type": "paragraph", "content": [{"type": "text", "text": "⚠️ ", "marks": [{"type": "strong"}]}, {"type": "text", "text": f"Showing top {max_items} results (by severity). "}, {"type": "text", "text": f"{truncated_count} additional results truncated. View full results at the scan URL."} ]} + ] + } + panels.append(truncation_panel) + + content = {"type": "doc", "version": 1, "content": panels} + + else: + # Keep original table behavior for dockerfile + # Rebuild table rows from display_rows + header_cells = [] + for header in headers: + header_cells.append({ + "type": "tableHeader", + "attrs": {}, + "content": [{"type": "paragraph", "content": [{"type": "text", "text": header}]}] + }) + + table_rows = [{"type": "tableRow", "content": header_cells}] + for row in display_rows: + data_cells = [] + for cell_content in row: + data_cells.append({ + "type": "tableCell", + "attrs": {}, + "content": [cell_content] if isinstance(cell_content, dict) else [{"type": "paragraph", "content": [{"type": "text", "text": str(cell_content)}]}] + }) + table_rows.append({"type": "tableRow", "content": data_cells}) + + content = {"type": "doc", "version": 1, "content": [{"type": "table", "attrs": {"isNumberColumnEnabled": False, "layout": "default"}, "content": table_rows}]} + + # Create title based on scan type + if scan_type == 'vuln': + title = f'Trivy Vuln Scanning Results: {item_name}' + elif scan_type == 'dockerfile': + title = f'Trivy Dockerfile Results: {item_name}' + else: # image + title = f'Trivy Container Results: {item_name}' + + return [{ + 'title': title, + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/trivy/json_notifier.py b/socket_basics/core/connector/trivy/json_notifier.py new file mode 100644 index 0000000..2e7672a --- /dev/null +++ b/socket_basics/core/connector/trivy/json_notifier.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +""" +JSON notifier for Trivy results. +Formats results using the new grouped format with complete structured data for programmatic consumption. +""" + +from typing import Dict, Any, List +from collections import defaultdict +import json + + +def format_notifications(mapping: Dict[str, Any], item_name: str = "Unknown", scan_type: str = "image") -> List[Dict[str, Any]]: + """Format for JSON output - grouped format with complete structured data. + + Args: + mapping: Component mapping with alerts + item_name: Name of the scanned item + scan_type: Type of scan - 'vuln', 'image', or 'dockerfile' + """ + # Group vulnerabilities by package and severity + package_groups = defaultdict(lambda: defaultdict(set)) # Use set to avoid duplicates + + if scan_type == 'dockerfile': + # Process dockerfile components + for comp in mapping.values(): + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + rule_id = str(props.get('ruleId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + message = str(alert.get('description', '')) + resolution = str(props.get('resolution', '')) + + rule_info = f"{rule_id}|{message}|{resolution}" + package_groups[rule_id][severity].add(rule_info) + + else: # image or vuln + # Process package vulnerability components + for comp in mapping.values(): + comp_name = str(comp.get('name') or comp.get('id') or '-') + comp_version = str(comp.get('version', '')) + ecosystem = comp.get('qualifiers', {}).get('ecosystem', 'unknown') + + if comp_version: + package_key = f"pkg:{ecosystem}/{comp_name}@{comp_version}" + else: + package_key = f"pkg:{ecosystem}/{comp_name}" + + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + cve_id = str(props.get('vulnerabilityId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + package_groups[package_key][severity].add(cve_id) + + # Create structured data with proper formatting + structured_data = [] + severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3} + + if scan_type == 'dockerfile': + # Dockerfile format: Rule ID | Severity | Message | Resolution + for rule_id, severity_dict in package_groups.items(): + for severity in sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)): + rule_infos = list(severity_dict[severity]) + for rule_info in rule_infos: + parts = rule_info.split('|', 2) + if len(parts) >= 3: + _, message, resolution = parts + structured_data.append({ + 'rule_id': rule_id, + 'severity': severity, + 'message': message, + 'resolution': resolution, + 'type': 'dockerfile' + }) + format_type = 'dockerfile' + + elif scan_type == 'image': + # Image format: Package | CVEs | Severity + for package_name, severity_dict in package_groups.items(): + sorted_severities = sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)) + + for severity in sorted_severities: + cves = sorted(list(severity_dict[severity])) + + structured_data.append({ + 'package': package_name, + 'cves': cves, + 'severity': severity, + 'type': 'container-image', + 'cve_count': len(cves) + }) + format_type = 'container-image' + + elif scan_type == 'vuln': + # Vuln format: Package | CVEs | Severity + for package_name, severity_dict in package_groups.items(): + sorted_severities = sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)) + + for severity in sorted_severities: + cves = sorted(list(severity_dict[severity])) + + structured_data.append({ + 'package': package_name, + 'cves': cves, + 'severity': severity, + 'type': 'vulnerability-scan', + 'cve_count': len(cves) + }) + format_type = 'vulnerability-scan' + + else: + structured_data = [] + format_type = 'unknown' + + # Format as JSON for structured output + content = json.dumps({ + 'results': structured_data, + 'metadata': { + 'item_name': item_name, + 'total_groups': len(structured_data), + 'format_type': format_type, + 'scan_type': scan_type + } + }, indent=2) + + # Create title based on scan type + if scan_type == 'vuln': + title = f'Socket CVE Scanning Results: {item_name}' + elif scan_type == 'dockerfile': + title = f'Socket Dockerfile Results: {item_name}' + else: # image + title = f'Socket Image Scanning Results: {item_name}' + + return [{ + 'title': title, + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/trivy/ms_sentinel.py b/socket_basics/core/connector/trivy/ms_sentinel.py new file mode 100644 index 0000000..b98ff7c --- /dev/null +++ b/socket_basics/core/connector/trivy/ms_sentinel.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +""" +Microsoft Sentinel notifier for Trivy results. +Formats results using the new grouped format structured for SIEM ingestion. +""" + +from typing import Dict, Any, List +from collections import defaultdict +from .utils import get_notifier_result_limit, logger + + +def format_notifications(mapping: Dict[str, Any], item_name: str = "Unknown", scan_type: str = "image") -> List[Dict[str, Any]]: + """Format for Microsoft Sentinel - grouped format structured for SIEM ingestion. + + Args: + mapping: Component mapping with alerts + item_name: Name of the scanned item + scan_type: Type of scan - 'vuln', 'image', or 'dockerfile' + """ + # Group vulnerabilities by package and severity + package_groups = defaultdict(lambda: defaultdict(set)) # Use set to avoid duplicates + + if scan_type == 'dockerfile': + # Process dockerfile components + for comp in mapping.values(): + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + rule_id = str(props.get('ruleId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + message = str(alert.get('description', '')) + resolution = str(props.get('resolution', '')) + + rule_info = f"{rule_id}|{message}|{resolution}" + package_groups[rule_id][severity].add(rule_info) + + else: # image or vuln + # Process package vulnerability components + for comp in mapping.values(): + comp_name = str(comp.get('name') or comp.get('id') or '-') + comp_version = str(comp.get('version', '')) + ecosystem = comp.get('qualifiers', {}).get('ecosystem', 'unknown') + + if comp_version: + package_key = f"pkg:{ecosystem}/{comp_name}@{comp_version}" + else: + package_key = f"pkg:{ecosystem}/{comp_name}" + + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + cve_id = str(props.get('vulnerabilityId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + package_groups[package_key][severity].add(cve_id) + + # Create rows with proper formatting + rows = [] + severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3} + + if scan_type == 'dockerfile': + # Dockerfile format: Rule ID | Severity | Message | Resolution + for rule_id, severity_dict in package_groups.items(): + for severity in sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)): + rule_infos = list(severity_dict[severity]) + for rule_info in rule_infos: + parts = rule_info.split('|', 2) + if len(parts) >= 3: + _, message, resolution = parts + rows.append(( + severity_order.get(severity, 4), + [rule_id, severity, message, resolution] + )) + + # Sort by severity and extract rows + rows.sort(key=lambda x: x[0]) + rows = [row[1] for row in rows] + + headers = ['Rule ID', 'Severity', 'Message', 'Resolution'] + else: + # Image format: Package | CVEs | Severity + for package_name, severity_dict in package_groups.items(): + # Sort severities by criticality + sorted_severities = sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)) + + for severity in sorted_severities: + cves = sorted(list(severity_dict[severity])) # Convert set to sorted list + + # Format CVEs as list for SIEM parsing + cve_list = '; '.join(cves) + + rows.append(( + severity_order.get(severity, 4), + [package_name, cve_list, severity] + )) + + # Sort by severity and extract rows + rows.sort(key=lambda x: x[0]) + rows = [row[1] for row in rows] + + headers = ['Package', 'CVEs', 'Severity'] + + # Apply truncation for MS Sentinel + max_rows = get_notifier_result_limit('ms_sentinel') + original_count = len(rows) + truncated = False + if len(rows) > max_rows: + rows = rows[:max_rows] + truncated = True + logger.info(f"Truncated MS Sentinel results from {original_count} to {max_rows}") + + # Format for MS Sentinel - structured data + if not rows: + content = "No vulnerabilities found." + else: + content_lines = [' | '.join(headers)] + content_lines.append(' | '.join(['---'] * len(headers))) + for row in rows: + content_lines.append(' | '.join(str(cell) for cell in row)) + + # Add truncation notice if needed + if truncated: + content_lines.append('') + content_lines.append(f"⚠️ Showing top {max_rows} results (by severity). {original_count - max_rows} additional results truncated. View full results at the scan URL.") + + content = '\n'.join(content_lines) + + # Create title based on scan type + if scan_type == 'vuln': + title = f'Socket CVE Scanning Findings: {item_name}' + elif scan_type == 'dockerfile': + title = f'Socket Dockerfile Findings: {item_name}' + else: # image + title = f'Socket Image Scanning Findings: {item_name}' + + return [{ + 'title': title, + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/trivy/ms_teams.py b/socket_basics/core/connector/trivy/ms_teams.py new file mode 100644 index 0000000..bff59f9 --- /dev/null +++ b/socket_basics/core/connector/trivy/ms_teams.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +""" +Microsoft Teams notifier for Trivy results. +Formats results using the new grouped format suitable for Teams. +""" + +from typing import Dict, Any, List +from collections import defaultdict +from .utils import get_notifier_result_limit, logger + + +def format_notifications(mapping: Dict[str, Any], item_name: str = "Unknown", scan_type: str = "image") -> List[Dict[str, Any]]: + """Format for Microsoft Teams - grouped format. + + Args: + mapping: Component mapping with alerts + item_name: Name of the scanned item + scan_type: Type of scan - 'vuln', 'image', or 'dockerfile' + """ + # Group vulnerabilities by package and severity + package_groups = defaultdict(lambda: defaultdict(set)) # Use set to avoid duplicates + + if scan_type == 'dockerfile': + # Process dockerfile components + for comp in mapping.values(): + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + rule_id = str(props.get('ruleId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + message = str(alert.get('description', '')) + resolution = str(props.get('resolution', '')) + + rule_info = f"{rule_id}|{message}|{resolution}" + package_groups[rule_id][severity].add(rule_info) + + else: # image or vuln + # Process package vulnerability components + for comp in mapping.values(): + comp_name = str(comp.get('name') or comp.get('id') or '-') + comp_version = str(comp.get('version', '')) + ecosystem = comp.get('qualifiers', {}).get('ecosystem', 'unknown') + + if comp_version: + package_key = f"pkg:{ecosystem}/{comp_name}@{comp_version}" + else: + package_key = f"pkg:{ecosystem}/{comp_name}" + + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + cve_id = str(props.get('vulnerabilityId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + package_groups[package_key][severity].add(cve_id) + + # Create rows with proper formatting + rows = [] + severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3} + + if scan_type == 'dockerfile': + # Dockerfile format: Rule ID | Severity | Message | Resolution + for rule_id, severity_dict in package_groups.items(): + for severity in sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)): + rule_infos = list(severity_dict[severity]) + for rule_info in rule_infos: + parts = rule_info.split('|', 2) + if len(parts) >= 3: + _, message, resolution = parts + rows.append(( + severity_order.get(severity, 4), + [rule_id, severity, message, resolution[:150] + '...' if len(resolution) > 150 else resolution] + )) + + # Sort by severity and extract rows + rows.sort(key=lambda x: x[0]) + rows = [row[1] for row in rows] + + headers = ['Rule ID', 'Severity', 'Message', 'Resolution'] + else: + # Image format: Package | CVEs | Severity + for package_name, severity_dict in package_groups.items(): + # Sort severities by criticality + sorted_severities = sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)) + + for severity in sorted_severities: + cves = sorted(list(severity_dict[severity])) # Convert set to sorted list + + # Format CVEs as compact list for Teams + if len(cves) > 3: + cve_list = ', '.join(cves[:3]) + f', ... (+{len(cves)-3} more)' + else: + cve_list = ', '.join(cves) + + # Truncate package name for readability if needed + display_package = package_name[:40] + '...' if len(package_name) > 40 else package_name + + rows.append(( + severity_order.get(severity, 4), + [display_package, cve_list, severity] + )) + + # Sort by severity and extract rows + rows.sort(key=lambda x: x[0]) + rows = [row[1] for row in rows] + + headers = ['Package', 'CVEs', 'Severity'] + + # Apply truncation for MS Teams + max_rows = get_notifier_result_limit('msteams') + original_count = len(rows) + truncated = False + if len(rows) > max_rows: + rows = rows[:max_rows] + truncated = True + logger.info(f"Truncated MS Teams results from {original_count} to {max_rows}") + + # Format as simple table for MS Teams + if not rows: + content = "No vulnerabilities found." + else: + content_lines = [' | '.join(headers)] + content_lines.append(' | '.join(['---'] * len(headers))) + for row in rows: + content_lines.append(' | '.join(str(cell) for cell in row)) + + # Add truncation notice if needed + if truncated: + content_lines.append('') + content_lines.append(f"⚠️ **Showing top {max_rows} results (by severity).** {original_count - max_rows} additional results truncated. View full results at the scan URL below.") + + content = '\n'.join(content_lines) + + # Create title based on scan type + if scan_type == 'vuln': + title = f'Socket CVE Scanning Results: {item_name}' + elif scan_type == 'dockerfile': + title = f'Socket Dockerfile Results: {item_name}' + else: # image + title = f'Socket Image Scanning Results: {item_name}' + + return [{ + 'title': title, + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/trivy/slack.py b/socket_basics/core/connector/trivy/slack.py new file mode 100644 index 0000000..ee217a4 --- /dev/null +++ b/socket_basics/core/connector/trivy/slack.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python3 +""" +Slack notifier for Trivy results. +Formats results using the new grouped format with emojis for visual appeal. +""" + +from typing import Dict, Any, List +from collections import defaultdict +from .utils import get_notifier_result_limit, logger + + +def format_notifications(mapping: Dict[str, Any], item_name: str = "Unknown", scan_type: str = "image") -> List[Dict[str, Any]]: + """Format for Slack notifications - grouped format with emojis. + + Args: + mapping: Component mapping with alerts + item_name: Name of the scanned item + scan_type: Type of scan - 'vuln', 'image', or 'dockerfile' + """ + # Group vulnerabilities by package and severity + package_groups = defaultdict(lambda: defaultdict(set)) # Use set to avoid duplicates + + if scan_type == 'dockerfile': + # Process dockerfile components + for comp in mapping.values(): + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + rule_id = str(props.get('ruleId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + message = str(alert.get('description', '')) + resolution = str(props.get('resolution', '')) + + rule_info = f"{rule_id}|{message}|{resolution}" + package_groups[rule_id][severity].add(rule_info) + + else: # image or vuln + # Process package vulnerability components + for comp in mapping.values(): + comp_name = str(comp.get('name') or comp.get('id') or '-') + comp_version = str(comp.get('version', '')) + ecosystem = comp.get('qualifiers', {}).get('ecosystem', 'unknown') + + if comp_version: + package_key = f"pkg:{ecosystem}/{comp_name}@{comp_version}" + else: + package_key = f"pkg:{ecosystem}/{comp_name}" + + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + cve_id = str(props.get('vulnerabilityId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + package_groups[package_key][severity].add(cve_id) + + # Create rows with proper formatting + rows = [] + severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3} + + if scan_type == 'dockerfile': + # Dockerfile format: Rule ID | Severity | Message | Resolution + for rule_id, severity_dict in package_groups.items(): + for severity in sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)): + rule_infos = list(severity_dict[severity]) + for rule_info in rule_infos: + parts = rule_info.split('|', 2) + if len(parts) >= 3: + _, message, resolution = parts + + # Add severity emojis for Slack + severity_lower = severity.lower() + severity_emoji = { + 'critical': '🔴', + 'high': '🟠', + 'medium': '🟡', + 'low': '🟢' + }.get(severity_lower, '⚪') + + rows.append(( + severity_order.get(severity, 4), + [rule_id, f"{severity_emoji} {severity}", message, resolution[:100] + '...' if len(resolution) > 100 else resolution] + )) + + # Sort by severity and extract rows + rows.sort(key=lambda x: x[0]) + rows = [row[1] for row in rows] + + headers = ['Rule ID', 'Severity', 'Message', 'Resolution'] + else: + # Image format: Package | CVEs | Severity + for package_name, severity_dict in package_groups.items(): + # Sort severities by criticality + sorted_severities = sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)) + + for severity in sorted_severities: + cves = sorted(list(severity_dict[severity])) # Convert set to sorted list + + # Format CVEs as bullet points - limit for Slack readability + if len(cves) > 5: + cve_bullets = '\n'.join([f"• {cve}" for cve in cves[:5]]) + f"\n• ... and {len(cves)-5} more" + else: + cve_bullets = '\n'.join([f"• {cve}" for cve in cves]) + + # Add severity emojis for Slack + severity_lower = severity.lower() + severity_emoji = { + 'critical': '🔴', + 'high': '🟠', + 'medium': '🟡', + 'low': '🟢' + }.get(severity_lower, '⚪') + + # Truncate package name for readability if needed + display_package = package_name[:40] + '...' if len(package_name) > 40 else package_name + + rows.append(( + severity_order.get(severity, 4), + [display_package, cve_bullets, f"{severity_emoji} {severity}"] + )) + + # Sort by severity and extract rows + rows.sort(key=lambda x: x[0]) + rows = [row[1] for row in rows] + + headers = ['Package', 'CVEs', 'Severity'] + + # Apply truncation for Slack + max_rows = get_notifier_result_limit('slack') + original_count = len(rows) + truncated = False + if len(rows) > max_rows: + rows = rows[:max_rows] + truncated = True + logger.info(f"Truncated Slack results from {original_count} to {max_rows}") + + # Format rows as markdown table for Slack + if not rows: + content = "No vulnerabilities found." + else: + # Create markdown table using the correct headers + header_row = ' | '.join(headers) + separator_row = ' | '.join(['---'] * len(headers)) + content_rows = [] + for row in rows: + content_rows.append(' | '.join(str(cell) for cell in row)) + + # Add truncation notice if needed + if truncated: + content_rows.append('') + content_rows.append(f"⚠️ *Showing top {max_rows} results (by severity).* {original_count - max_rows} additional results truncated. View full results at the scan URL below.") + + content = '\n'.join([header_row, separator_row] + content_rows) + + # Create title based on scan type + if scan_type == 'vuln': + title = f'Socket CVE Scanning Results: {item_name}' + elif scan_type == 'dockerfile': + title = f'Socket Dockerfile Results: {item_name}' + else: # image + title = f'Socket Image Scanning Results: {item_name}' + + return [{ + 'title': title, + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/trivy/sumologic.py b/socket_basics/core/connector/trivy/sumologic.py new file mode 100644 index 0000000..3b84935 --- /dev/null +++ b/socket_basics/core/connector/trivy/sumologic.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +""" +SumoLogic notifier for Trivy results. +Formats results using the new grouped format in structured logging format suitable for log parsing. +""" + +from typing import Dict, Any, List +from collections import defaultdict +from .utils import get_notifier_result_limit, logger + + +def format_notifications(mapping: Dict[str, Any], item_name: str = "Unknown", scan_type: str = "image") -> List[Dict[str, Any]]: + """Format for Sumo Logic - grouped format structured for log analysis. + + Args: + mapping: Component mapping with alerts + item_name: Name of the scanned item + scan_type: Type of scan - 'vuln', 'image', or 'dockerfile' + """ + # Group vulnerabilities by package and severity + package_groups = defaultdict(lambda: defaultdict(set)) # Use set to avoid duplicates + + if scan_type == 'dockerfile': + # Process dockerfile components + for comp in mapping.values(): + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + rule_id = str(props.get('ruleId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + message = str(alert.get('description', '')) + resolution = str(props.get('resolution', '')) + + rule_info = f"{rule_id}|{message}|{resolution}" + package_groups[rule_id][severity].add(rule_info) + + else: # image or vuln + # Process package vulnerability components + for comp in mapping.values(): + comp_name = str(comp.get('name') or comp.get('id') or '-') + comp_version = str(comp.get('version', '')) + ecosystem = comp.get('qualifiers', {}).get('ecosystem', 'unknown') + + if comp_version: + package_key = f"pkg:{ecosystem}/{comp_name}@{comp_version}" + else: + package_key = f"pkg:{ecosystem}/{comp_name}" + + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + cve_id = str(props.get('vulnerabilityId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + package_groups[package_key][severity].add(cve_id) + + # Create rows with proper formatting + rows = [] + severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3} + + if scan_type == 'dockerfile': + # Dockerfile format: Rule ID | Severity | Message | Resolution + for rule_id, severity_dict in package_groups.items(): + for severity in sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)): + rule_infos = list(severity_dict[severity]) + for rule_info in rule_infos: + parts = rule_info.split('|', 2) + if len(parts) >= 3: + _, message, resolution = parts + # Key-value format for SumoLogic + rows.append(( + severity_order.get(severity, 4), + [f"rule_id={rule_id}", f"severity={severity}", f"message={message}", + f"resolution={resolution}", "scanner=trivy", "type=dockerfile"] + )) + + # Sort by severity and extract rows + rows.sort(key=lambda x: x[0]) + rows = [row[1] for row in rows] + + # No headers needed for log format + headers = [] + else: + # Image format: Package | CVEs | Severity + for package_name, severity_dict in package_groups.items(): + # Sort severities by criticality + sorted_severities = sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)) + + for severity in sorted_severities: + cves = sorted(list(severity_dict[severity])) # Convert set to sorted list + + # Format CVEs as comma-separated for log parsing + cve_list = ','.join(cves) + + # Key-value format for SumoLogic + rows.append(( + severity_order.get(severity, 4), + [f"package={package_name}", f"cves={cve_list}", f"severity={severity}", + "scanner=trivy", "type=container-image"] + )) + + # Sort by severity and extract rows + rows.sort(key=lambda x: x[0]) + rows = [row[1] for row in rows] + + # No headers needed for log format + headers = [] + + # Apply truncation + result_limit = get_notifier_result_limit('sumologic') + total_results = len(rows) + was_truncated = False + + if total_results > result_limit: + logger.info(f"Truncating SumoLogic results from {total_results} to {result_limit} (prioritized by severity)") + rows = rows[:result_limit] + was_truncated = True + + # Format for SumoLogic - key=value pairs, one per line + if not rows: + content = "No vulnerabilities found." + else: + content_lines = [] + for row in rows: + content_lines.append(' '.join(str(cell) for cell in row)) + + # Add truncation notice if needed + if was_truncated: + content_lines.append('') + content_lines.append(f"results_truncated=true total_results={total_results} displayed_results={result_limit} note=\"Results truncated to {result_limit} highest severity findings. See full scan URL for complete results.\"") + + content = '\n'.join(content_lines) + + # Create title based on scan type + if scan_type == 'vuln': + title = f'Socket CVE Scanning Events: {item_name}' + elif scan_type == 'dockerfile': + title = f'Socket Dockerfile Events: {item_name}' + else: # image + title = f'Socket Image Scanning Events: {item_name}' + + return [{ + 'title': title, + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/trivy/trivy.py b/socket_basics/core/connector/trivy/trivy.py new file mode 100644 index 0000000..26bf2b5 --- /dev/null +++ b/socket_basics/core/connector/trivy/trivy.py @@ -0,0 +1,1099 @@ +import json +import subprocess +import tempfile +import os +from pathlib import Path +from typing import Dict, List, Any + +from .utils import logger +from ..base import BaseConnector +from ...utils import make_json_safe +# Trivy builds canonical components/notifications directly +# Import individual notifier modules +from . import github_pr, slack, ms_teams, ms_sentinel, sumologic, console, jira, webhook, json_notifier + +# Import shared formatters +from ...formatters import get_all_formatters + +class TrivyScanner(BaseConnector): + """Trivy container scanner implementation""" + + def __init__(self, config): + super().__init__(config) + + def is_enabled(self) -> bool: + """Check if container scanning should be enabled. + + Returns True if either Dockerfile, container image, or vulnerability scanning is enabled. + This method supports both the new parameter names and legacy ones. + """ + dockerfile_flag = bool( + self.config.get('dockerfile_scanning_enabled', False) or self.config.get('dockerfile_enabled', False)) + image_flag = bool( + self.config.get('container_image_scanning_enabled', False) or self.config.get('image_enabled', False)) + + # Check if Trivy vulnerability scanning is enabled + vuln_flag = bool(self.config.get('trivy_vuln_enabled', False)) + + return dockerfile_flag or image_flag or vuln_flag + + def scan(self) -> Dict[str, Any]: + """Run both Dockerfile and Image scanning""" + if not self.is_enabled(): + logger.info("Container scanning disabled, skipping Trivy") + return {} + + results_map: Dict[str, Any] = {} + all_notifications: List[List[str]] = [] + + # Run Dockerfile scanning + try: + dockerfile_results = self.scan_dockerfiles() or {} + if isinstance(dockerfile_results, dict): + results_map.update(dockerfile_results) + except Exception: + logger.exception('Trivy: dockerfile scan failed') + + # Run Image scanning + try: + image_results = self.scan_images() or {} + if isinstance(image_results, dict): + results_map.update(image_results) + except Exception: + logger.exception('Trivy: image scan failed') + + # Run Trivy vulnerability scanning + try: + vuln_results = self.scan_vulnerabilities() or {} + if isinstance(vuln_results, dict): + results_map.update(vuln_results) + except Exception: + logger.exception('Trivy: vulnerability scan failed') + + try: + # The connector produces a mapping of id->component in results_map. + # Convert to canonical components list and let connector build + # presentation-ready notification tables via notification_rows(). + components_list: List[Dict[str, Any]] = [] + mapping: Dict[str, Any] = {} + if isinstance(results_map, dict): + # results_map may already be mapping id->component + all_vals = [] + for k, v in results_map.items(): + if isinstance(v, dict): + mapping[k] = v + all_vals.append(v) + components_list = all_vals + + # Build notifications using new shared formatters + # Determine scan type based on component properties + scan_type = self._detect_scan_type(components_list) + + # Get the first image/dockerfile name from config for the title + item_name = "Unknown" + images_str = self.config.get('container_images', '') or self.config.get('container_images_to_scan', '') or self.config.get('docker_images', '') + if images_str: + if isinstance(images_str, list): + item_name = images_str[0] if images_str else "Unknown" + else: + images = [img.strip() for img in str(images_str).split(',') if img.strip()] + item_name = images[0] if images else "Unknown" + else: + dockerfiles = self.config.get('dockerfiles', '') + if dockerfiles: + if isinstance(dockerfiles, list): + item_name = dockerfiles[0] if dockerfiles else "Unknown" + else: + docker_list = [df.strip() for df in str(dockerfiles).split(',') if df.strip()] + item_name = docker_list[0] if docker_list else "Unknown" + + # For vuln scanning, use workspace name if item_name is still Unknown + if scan_type == 'vuln' and item_name == "Unknown": + try: + workspace = self.config.workspace + item_name = os.path.basename(str(workspace)) + except Exception: + item_name = "Workspace" + + notifications_by_notifier = self.generate_notifications(components_list, item_name, scan_type) + + return {'components': components_list, 'notifications': notifications_by_notifier} + except Exception: + logger.exception('Trivy: normalization failed') + return {'components': list(results_map.values()), 'notifications': {}} + + def scan_dockerfiles(self) -> Dict[str, Any]: + """Run Trivy Dockerfile scanning""" + # Consider both new and legacy dockerfile flags + dockerfile_enabled = self.config.get('dockerfile_scanning_enabled', False) or self.config.get( + 'dockerfile_enabled', False) + if not dockerfile_enabled: + logger.info("Dockerfile scanning disabled, skipping Trivy Dockerfile") + return {} + + dockerfiles = self.config.get('dockerfiles', '') + if isinstance(dockerfiles, str): + dockerfiles = [f.strip() for f in dockerfiles.split(',') if f.strip()] + elif isinstance(dockerfiles, list): + dockerfiles = [str(f).strip() for f in dockerfiles if str(f).strip()] + else: + dockerfiles = [] + + # Try to detect changed Dockerfiles even if none explicitly configured + changed_files = self.config.get('changed_files', []) if hasattr(self.config, '_config') else [] + if not changed_files: + try: + from socket_basics.core.config import _detect_git_changed_files + changed_files = _detect_git_changed_files(str(self.config.workspace), mode='staged') + except Exception: + changed_files = [] + + # If explicit dockerfiles are not set, but changed Dockerfiles exist, use them + if not dockerfiles and changed_files: + # Filter changed files for Dockerfile candidates + possible = [] + for cf in changed_files: + base = Path(cf).name + if base == 'Dockerfile' or 'dockerfile' in base.lower() or base.lower().endswith('.dockerfile'): + if (self.config.workspace / cf).exists(): + possible.append(cf) + if possible: + dockerfiles = possible + + if not dockerfiles: + logger.info("No Dockerfiles specified, skipping Trivy Dockerfile scanning") + return {} + + logger.info("Running Trivy Dockerfile scanning") + results = {} + + # If changed_files is provided, prefer scanning only changed Dockerfiles + changed_files = self.config.get('changed_files', []) if hasattr(self.config, '_config') else [] + # Fallback: attempt to detect staged changed files if none present + if not changed_files: + try: + # import helper from config module + from socket_basics.core.config import _detect_git_changed_files + changed_files = _detect_git_changed_files(str(self.config.workspace), mode='staged') + except Exception: + changed_files = [] + if changed_files: + # Filter changed files down to ones that are Dockerfiles or named 'Dockerfile' + changed_dockerfiles = [] + for cf in changed_files: + cf_path = Path(cf) + base = cf_path.name + if base == 'Dockerfile' or base.lower().endswith('dockerfile') or base.lower().endswith( + '.dockerfile') or 'dockerfile' in base.lower(): + # Ensure the file exists in workspace + full = self.config.workspace / cf + if full.exists(): + changed_dockerfiles.append(cf) + + if changed_dockerfiles: + logger.info(f"Detected {len(changed_dockerfiles)} changed Dockerfile(s); restricting Trivy to them") + dockerfiles = changed_dockerfiles + + for dockerfile in dockerfiles: + # Resolve dockerfile path: prefer given path if it exists, otherwise join with workspace + candidate_a = Path(dockerfile) + candidate_b = self.config.workspace / dockerfile + if candidate_a.exists(): + dockerfile_path = candidate_a + elif candidate_b.exists(): + dockerfile_path = candidate_b + else: + logger.warning(f"Dockerfile not found: {candidate_a} or {candidate_b}") + continue + + try: + with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file: + cmd = [ + 'trivy', + 'config', + '--format', 'json', + '--output', temp_file.name, + str(dockerfile_path) + ] + + logger.info(f"Running: {' '.join(cmd)}") + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode != 0: + logger.error(f"Trivy dockerfile scan failed for {dockerfile}: {result.stderr}") + continue + + with open(temp_file.name, 'r') as f: + trivy_output = json.load(f) + + dockerfile_results = self._process_dockerfile_results(trivy_output, dockerfile) + # dockerfile_results is a dict mapping id->component + if isinstance(dockerfile_results, dict): + results.update(dockerfile_results) + + except FileNotFoundError: + logger.error("Trivy not found. Please install Trivy") + except Exception as e: + logger.error(f"Error running Trivy on {dockerfile}: {e}") + finally: + if 'temp_file' in locals(): + try: + os.unlink(temp_file.name) + except: + pass + + return results + + def scan_images(self) -> Dict[str, Any]: + """Run Trivy image scanning""" + # Check both new and legacy parameter names for images + images_str = self.config.get('container_images', '') or self.config.get('container_images_to_scan', '') or self.config.get('docker_images', '') + + # Also accept list types if provided programmatically + if isinstance(images_str, list): + images = [img for img in images_str if img] + else: + images = [img.strip() for img in str(images_str).split(',') if img.strip()] + + if not images: + logger.debug("No Docker images specified, skipping Trivy Image scanning") + return {} + + # Consider both new and legacy image flags (auto-enabled if images provided) + image_enabled = self.config.get('container_image_scanning_enabled', False) or self.config.get('image_enabled', False) or bool(images) + if not image_enabled: + logger.info("Image scanning disabled, skipping Trivy Image") + return {} + + logger.info("Running Trivy Image scanning") + results: Dict[str, Any] = {} + + for image in images: + try: + with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file: + cmd = [ + 'trivy', + 'image', + '--format', 'json', + '--output', temp_file.name, + image + ] + + logger.info(f"Running: {' '.join(cmd)}") + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode != 0: + logger.error(f"Trivy image scan failed for {image}: {result.stderr}") + continue + + with open(temp_file.name, 'r') as f: + trivy_output = json.load(f) + + image_results = self._process_image_results(trivy_output, image) + if isinstance(image_results, dict): + results.update(image_results) + + except FileNotFoundError: + logger.error("Trivy not found. Please install Trivy") + except Exception as e: + logger.error(f"Error running Trivy on {image}: {e}") + finally: + if 'temp_file' in locals(): + try: + os.unlink(temp_file.name) + except: + pass + + return results + + def scan_vulnerabilities(self) -> Dict[str, Any]: + """Run Trivy filesystem scanning for vulnerabilities""" + vuln_enabled = self.config.get('trivy_vuln_enabled', False) + + if not vuln_enabled: + logger.info("Trivy vulnerability scanning disabled, skipping") + return {} + + logger.info("Running Trivy filesystem vulnerability scanning") + results: Dict[str, Any] = {} + + # Get the workspace path + workspace_path = self.config.workspace + + # Check for changed files to restrict scanning + changed_files = self.config.get('changed_files', []) if hasattr(self.config, '_config') else [] + if not changed_files: + try: + from socket_basics.core.config import _detect_git_changed_files + changed_files = _detect_git_changed_files(str(self.config.workspace), mode='staged') + except Exception: + changed_files = [] + + # If we have changed files, scan only those directories + scan_paths = [] + if changed_files: + # Extract unique directories from changed files + dirs = set() + for cf in changed_files: + cf_path = Path(cf) + # Get the parent directory or the file itself if it's a manifest + if cf_path.suffix in ['.json', '.lock', '.toml', '.txt', '.gradle', '.xml', '.podspec', '.swift']: + dirs.add(str(cf_path.parent) if cf_path.parent != Path('.') else '.') + else: + dirs.add(str(cf_path.parent) if cf_path.parent != Path('.') else '.') + + # Convert relative paths to absolute + for d in dirs: + abs_path = workspace_path / d if not Path(d).is_absolute() else Path(d) + if abs_path.exists(): + scan_paths.append(abs_path) + + if scan_paths: + logger.info(f"Restricting Trivy scan to {len(scan_paths)} changed directory(ies)") + + # If no changed files or no valid paths, scan entire workspace + if not scan_paths: + scan_paths = [workspace_path] + + for scan_path in scan_paths: + try: + with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file: + cmd = [ + 'trivy', + 'fs', + '--format', 'json', + '--output', temp_file.name, + '--scanners', 'vuln', + str(scan_path) + ] + + logger.info(f"Running: {' '.join(cmd)}") + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode != 0: + logger.error(f"Trivy vulnerability scan failed for {scan_path}: {result.stderr}") + continue + + with open(temp_file.name, 'r') as f: + trivy_output = json.load(f) + + vuln_results = self._process_vulnerability_results(trivy_output) + if isinstance(vuln_results, dict): + results.update(vuln_results) + + except FileNotFoundError: + logger.error("Trivy not found. Please install Trivy") + except Exception as e: + logger.error(f"Error running Trivy vulnerability scan for {scan_path}: {e}") + finally: + if 'temp_file' in locals(): + try: + os.unlink(temp_file.name) + except: + pass + + return results + + def _convert_to_socket_facts(self, raw_results: Any) -> Dict[str, Any]: + """Convert raw Trivy results to Socket facts format + + This method implements the BaseConnector interface. + Since Trivy has multiple scan types, this method delegates to the appropriate + processing methods based on the result structure. + """ + # This is a unified method that can handle both dockerfile and image results + # The scan() method already processes results appropriately + if isinstance(raw_results, dict): + return raw_results + return {} + + def _process_dockerfile_results(self, trivy_output: Dict[str, Any], dockerfile: str) -> Dict[str, Any]: + """Convert Trivy Dockerfile results to Socket facts format""" + results = trivy_output.get('Results', []) + + if not results: + return {} + + import hashlib + # Create a single component per Dockerfile and append all misconfiguration alerts + try: + from pathlib import Path as _P + p = _P(dockerfile) + try: + ws = getattr(self.config, 'workspace', None) + ws_root = getattr(ws, 'path', None) or getattr(ws, 'root', None) or ws + if ws and not p.is_absolute(): + p = _P(ws) / dockerfile + # If path includes workspace prefix like '../NodeGoat' or 'NodeGoat', strip it + if ws_root: + try: + ws_name = os.path.basename(str(ws_root)) + parts = str(p).split(os.sep) + if parts and (parts[0] == ws_name or ( + len(parts) >= 2 and parts[0] in ('.', '..') and parts[1] == ws_name)): + if parts[0] == ws_name: + parts = parts[1:] + else: + parts = parts[2:] + p = _P(os.path.join(*parts)) if parts else _P('') + except Exception: + pass + except Exception: + pass + norm = str(p.as_posix()) + cid = hashlib.sha256(norm.encode('utf-8')).hexdigest() + except Exception: + import hashlib as _hash + cid = _hash.sha256(str(dockerfile).encode('utf-8')).hexdigest() + + component = { + "id": cid, + "type": "generic", + "name": f"{dockerfile}", + "internal": True, + "version": "", + "direct": True, + "dev": False, + "dead": False, + "dependencies": [], + "manifestFiles": [{"file": dockerfile}] if dockerfile else [], + "alerts": [], + "qualifers": { + "ecosystem": "dockerfile" + } + } + + for result in results: + misconfigurations = result.get('Misconfigurations', []) + for misconfig in misconfigurations: + alert = self._create_dockerfile_alert(misconfig, dockerfile) + if alert: + component["alerts"].append(alert) + + return {cid: component} + + def _process_image_results(self, trivy_output: Dict[str, Any], image: str) -> Dict[str, Any]: + """Convert Trivy Image results to Socket facts format""" + results = trivy_output.get('Results', []) + + if not results: + return {} + + import hashlib + components: Dict[str, Any] = {} + package_components: Dict[str, Any] = {} + package_ids: List[str] = [] + + # Group vulnerabilities by package to create package components + for result in results: + result_type = result.get('Type', 'unknown') + result_class = result.get('Class', 'unknown') + + # Map Trivy type to Socket ecosystem and component type + ecosystem, component_type = self._get_socket_ecosystem(result_type) + + vulnerabilities = result.get('Vulnerabilities', []) + for vuln in vulnerabilities: + pkg_name = vuln.get('PkgName') or vuln.get('Package') or 'unknown' + installed_version = vuln.get('InstalledVersion') or vuln.get('FixedVersion') or 'unknown' + + # Create unique package component ID + package_seed = f"{image}:{pkg_name}@{installed_version}:{ecosystem}" + package_id = hashlib.sha256(package_seed.encode('utf-8')).hexdigest() + + # Create package component if it doesn't exist + if package_id not in package_components: + package_components[package_id] = { + "id": package_id, + # "type": component_type, # Use Socket ecosystem type if available + "type": "generic", + "name": pkg_name, + "internal": True, + "version": installed_version, + "direct": True, + "dev": False, + "dead": False, + "dependencies": [], + "manifestFiles": [{"file": image}], + "alerts": [], + "qualifiers": { + "ecosystem": ecosystem + }, + # "topLevelAncestors": [] # Will be set later + } + package_ids.append(package_id) + + # Create alert and add to package component + alert = self._create_image_alert(vuln, image, ecosystem or 'deb') + if alert: + package_components[package_id]["alerts"].append(alert) + + # Create top-level image component + image_version = "latest" # Default version + if ":" in image and not image.split(":")[-1].startswith("sha256"): + image_parts = image.split(":") + image_version = image_parts[-1] + image_name = ":".join(image_parts[:-1]) + else: + image_name = image + + image_seed = f"image:{image}" + image_id = hashlib.sha256(image_seed.encode('utf-8')).hexdigest() + + image_component = { + "id": image_id, + "type": "generic", + "name": image_name, + "internal": True, + "version": image_version, + "subPath": f"image:{image}", + "direct": True, + "dev": False, + "dead": False, + "dependencies": package_ids, + "manifestFiles": [{"file": image}], + "alerts": [] + } + + # Set topLevelAncestors for all package components + # for package_id in package_ids: + # package_components[package_id]["topLevelAncestors"] = [image_id] + + # Combine all components (packages first, then image) + components.update(package_components) + # components[image_id] = image_component + + return components + + def _get_socket_ecosystem(self, trivy_type: str) -> tuple[str, str]: + """Map Trivy type to Socket ecosystem and component type. + + Returns: + tuple: (ecosystem, component_type) where component_type is the Socket supported type + or 'generic' if not a Socket supported ecosystem + """ + # Supported Socket ecosystems + socket_ecosystems = { + 'npm', 'pypi', 'maven', 'cargo', 'gem', 'golang', + 'nuget', 'github', 'chrome', 'huggingface', 'vscode' + } + + # Map Trivy types to Socket ecosystems + trivy_to_socket = { + # Node.js + 'node-pkg': ('npm', 'npm'), + 'npm': ('npm', 'npm'), + 'yarn': ('npm', 'npm'), + 'pnpm': ('npm', 'npm'), + + # Python + 'python-pkg': ('pypi', 'pypi'), + 'pip': ('pypi', 'pypi'), + 'pipenv': ('pypi', 'pypi'), + 'poetry': ('pypi', 'pypi'), + + # Java + 'java-archive': ('maven', 'maven'), + 'jar': ('maven', 'maven'), + 'pom': ('maven', 'maven'), + 'gradle': ('maven', 'maven'), + + # Rust + 'rust-crate': ('cargo', 'cargo'), + 'cargo': ('cargo', 'cargo'), + 'cargo-lock': ('cargo', 'cargo'), + + # Ruby + 'ruby-gem': ('gem', 'gem'), + 'bundler': ('gem', 'gem'), + 'gemspec': ('gem', 'gem'), + + # Go + 'golang': ('golang', 'golang'), + 'gomod': ('golang', 'golang'), + 'go-module': ('golang', 'golang'), + 'go.mod': ('golang', 'golang'), + + # .NET + 'nuget': ('nuget', 'nuget'), + 'dotnet-core': ('nuget', 'nuget'), + 'packages-lock': ('nuget', 'nuget'), + + # Others that aren't Socket ecosystems + 'composer': ('composer', 'generic'), + 'php-composer': ('composer', 'generic'), + 'conan': ('conan', 'generic'), + 'cocoapods': ('cocoapods', 'generic'), + 'swift': ('cocoapods', 'generic'), + 'hex': ('hex', 'generic'), + 'apk': ('apk', 'generic'), + 'deb': ('deb', 'generic'), + 'debian': ('deb', 'generic'), + 'ubuntu': ('deb', 'generic'), + 'rpm': ('rpm', 'generic'), + 'redhat': ('rpm', 'generic'), + 'centos': ('rpm', 'generic'), + 'fedora': ('rpm', 'generic'), + 'amazon': ('rpm', 'generic'), + 'oracle': ('rpm', 'generic'), + 'photon': ('rpm', 'generic'), + 'suse': ('rpm', 'generic'), + } + + # Normalize the trivy_type to lowercase + trivy_type_lower = trivy_type.lower() + + if trivy_type_lower in trivy_to_socket: + ecosystem, component_type = trivy_to_socket[trivy_type_lower] + return ecosystem, component_type + + # Default fallback + return trivy_type, 'generic' + + def _process_vulnerability_results(self, trivy_output: Dict[str, Any]) -> Dict[str, Any]: + """Convert Trivy vulnerability results to Socket facts format""" + results = trivy_output.get('Results', []) + + if not results: + return {} + + import hashlib + components: Dict[str, Any] = {} + + # Process each result (which may contain multiple vulnerabilities) + for result in results: + result_type = result.get('Type', 'unknown') + result_class = result.get('Class', 'unknown') + target = result.get('Target', 'unknown') + + # Map Trivy type to Socket ecosystem + ecosystem, component_type = self._get_socket_ecosystem(result_type) + + vulnerabilities = result.get('Vulnerabilities', []) + for vuln in vulnerabilities: + pkg_name = vuln.get('PkgName') or vuln.get('PkgID') or 'unknown' + installed_version = vuln.get('InstalledVersion') or 'unknown' + + # Create unique package component ID + package_seed = f"trivy:{pkg_name}@{installed_version}:{ecosystem}" + package_id = hashlib.sha256(package_seed.encode('utf-8')).hexdigest() + + # Create package component if it doesn't exist + if package_id not in components: + components[package_id] = { + "id": package_id, + # "type": component_type, # Use Socket ecosystem type if available + "type": "generic", + "name": pkg_name, + "internal": True, + "version": installed_version, + "direct": True, + "dev": False, + "dead": False, + "dependencies": [], + "manifestFiles": [{"file": target}] if target != 'unknown' else [], + "alerts": [], + "subPath": f"trivy:{ecosystem}" + } + + # Create alert and add to package component + alert = self._create_vulnerability_alert(vuln, ecosystem, target, result_type) + if alert: + components[package_id]["alerts"].append(alert) + + return components + + def _create_dockerfile_alert(self, misconfig: Dict[str, Any], dockerfile: str) -> Dict[str, Any]: + """Create a generic alert from a Trivy Dockerfile misconfiguration""" + severity_map = { + 'CRITICAL': 'critical', + 'HIGH': 'high', + 'MEDIUM': 'medium', + 'LOW': 'low' + } + + severity = severity_map.get(misconfig.get('Severity', 'LOW'), 'low') + rule_id = misconfig.get('ID', 'unknown') + try: + if isinstance(rule_id, str) and rule_id.startswith('socket_basics.rules.'): + rule_id = rule_id.replace('socket_basics.rules.', '', 1) + except Exception: + pass + title = misconfig.get('Title', 'Configuration issue') + description = misconfig.get('Message', 'Dockerfile configuration issue detected') + + markdown_content = f"""## Dockerfile Configuration Issue: {rule_id} + +### Description +{description} + +### File Location +- **Dockerfile**: `{dockerfile}` +- **Rule ID**: {rule_id} + +### Issue Details +{misconfig.get('Message', 'No additional details available')} + +### Resolution +{misconfig.get('Resolution', 'Review Dockerfile configuration and apply security best practices')} + +### Security Impact +Dockerfile misconfigurations can lead to: +- Privilege escalation vulnerabilities +- Information disclosure +- Increased attack surface +- Compliance violations +""" + + return { + "type": "generic", + "severity": severity, + "title": f"Dockerfile: {title}", + "description": description, + "category": "vulnerability", + "subType": "dockerfile", + "generatedBy": "trivy-dockerfile", + "action": self.config.get_action_for_severity(severity), + "props": { + "ruleId": rule_id, + "dockerfile": dockerfile, + "connector": "Trivy Dockerfile Scanning", + "scanType": "dockerfile", + "impact": severity, + "resolution": misconfig.get('Resolution', ''), + # "references": misconfig.get('References', []), + "detailedReport": { + "content-type": "text/markdown", + "content": markdown_content + } + } + } + + def _get_cve_alert_type(self, severity: str, vuln_id: str) -> str: + """Get the appropriate alert type for CVE findings based on severity""" + # Only use CVE-specific types for actual CVE identifiers + if not vuln_id.startswith('CVE-'): + return "generic" + + severity_to_cve_type = { + 'critical': 'criticalCVE', + 'high': 'cve', + 'medium': 'mediumCVE', + 'low': 'mildCVE' + } + return severity_to_cve_type.get(severity, 'generic') + + def _create_image_alert(self, vuln: Dict[str, Any], image: str, ecosystem: str = 'deb') -> Dict[str, Any]: + """Create a CVE alert from a Trivy image vulnerability""" + severity_map = { + 'CRITICAL': 'critical', + 'HIGH': 'high', + 'MEDIUM': 'medium', + 'LOW': 'low' + } + + severity = severity_map.get(vuln.get('Severity', 'LOW'), 'low') + vuln_id = vuln.get('VulnerabilityID', 'unknown') + try: + if isinstance(vuln_id, str) and vuln_id.startswith('socket_basics.rules.'): + vuln_id = vuln_id.replace('socket_basics.rules.', '', 1) + except Exception: + pass + title = vuln.get('Title', 'Vulnerability detected') + description = vuln.get('Description', 'Container image vulnerability detected') + + # Get the appropriate alert type for CVE findings using the severity mapping + alert_type = self._get_cve_alert_type(severity, vuln_id) + + # Get package info + pkg_name = vuln.get('PkgName', 'unknown') + installed_version = vuln.get('InstalledVersion', 'unknown') + fixed_version = vuln.get('FixedVersion', 'Not available') + + markdown_content = f"""## Container Image Vulnerability: {vuln_id} + +### Vulnerability Details +- **CVE ID**: {vuln_id} +- **Package**: {pkg_name} +- **Installed Version**: {installed_version} +- **Fixed Version**: {fixed_version} +- **Severity**: {severity.upper()} + +### Description +{description} + +### Image Details +- **Image**: `{image}` +- **Package Path**: {vuln.get('PkgPath', 'N/A')} + +### CVSS Score +{vuln.get('CVSS', {}).get('nvd', {}).get('V3Score', 'Not available')} + +### Remediation +{"Update to version " + fixed_version if fixed_version != "Not available" else "No fix available yet. Consider using alternative packages or implementing additional security controls."} + +### Impact Assessment +Container vulnerabilities can lead to: +- Container escape +- Privilege escalation +- Data exfiltration +- Denial of service +""" + + # Build purl-like locator when possible (best-effort) + purl = None + try: + # prefer explicit installed version, then fixed version, then 'unknown' + installed_version = vuln.get('InstalledVersion') or vuln.get('FixedVersion') or 'unknown' + pkg_name = pkg_name or vuln.get('Package') or 'unknown' + if pkg_name: + # Use the ecosystem from the Trivy result type + purl = f"pkg:{ecosystem}/{pkg_name}@{installed_version}" + except Exception: + purl = None + + # title_text = f"{vuln_id} in {pkg_name}" + title_text = severity.capitalize() + " CVE" + + return { + # "type": alert_type, # Use the CVE type mapping + "type": "generic", + "severity": severity, + "title": title_text, + "description": f"{title} in package {pkg_name}", + "category": "vulnerability", + "subType": "container-image", + "generatedBy": "trivy-image", + "action": self.config.get_action_for_severity(severity), + "props": { + "vulnerabilityId": vuln_id, + "packageName": pkg_name, + "installedVersion": installed_version, + "fixedVersion": fixed_version, + "image": image, + "connector": "Trivy Image Scanning", + "scanType": "image", + "cvssScore": vuln.get('CVSS', {}).get('nvd', {}).get('V3Score'), + "impact": severity, + "detailedReport": { + "content-type": "text/markdown", + "content": markdown_content + } + } + } + + def _create_vulnerability_alert(self, vuln: Dict[str, Any], ecosystem: str, target: str, trivy_type: str) -> Dict[str, Any]: + """Create a CVE alert from a Trivy vulnerability""" + severity_map = { + 'CRITICAL': 'critical', + 'HIGH': 'high', + 'MEDIUM': 'medium', + 'LOW': 'low' + } + + severity = severity_map.get(vuln.get('Severity', 'LOW'), 'low') + vuln_id = vuln.get('VulnerabilityID', 'unknown') + try: + if isinstance(vuln_id, str) and vuln_id.startswith('socket_basics.rules.'): + vuln_id = vuln_id.replace('socket_basics.rules.', '', 1) + except Exception: + pass + title = vuln.get('Title', 'Vulnerability detected') + description = vuln.get('Description', 'Package vulnerability detected') + + # Get the appropriate alert type for CVE findings using the severity mapping + alert_type = self._get_cve_alert_type(severity, vuln_id) + + # Get package info + pkg_name = vuln.get('PkgName') or vuln.get('PkgID') or 'unknown' + installed_version = vuln.get('InstalledVersion', 'unknown') + fixed_version = vuln.get('FixedVersion', 'Not available') + + markdown_content = f"""## Package Vulnerability: {vuln_id} + +### Vulnerability Details +- **CVE ID**: {vuln_id} +- **Package**: {pkg_name} +- **Installed Version**: {installed_version} +- **Fixed Version**: {fixed_version} +- **Severity**: {severity.upper()} +- **Ecosystem**: {ecosystem} + +### Description +{description} + +### File Details +- **Target**: `{target}` + +### CVSS Score +{vuln.get('CVSS', {}).get('nvd', {}).get('V3Score', 'Not available')} + +### Remediation +{"Update to version " + fixed_version if fixed_version != "Not available" else "No fix available yet. Consider using alternative packages or implementing additional security controls."} + +### Impact Assessment +Package vulnerabilities can lead to: +- Remote code execution +- Privilege escalation +- Data exfiltration +- Denial of service +""" + + # Build purl-like locator + purl = None + try: + if pkg_name and pkg_name != 'unknown': + purl = f"pkg:{ecosystem}/{pkg_name}@{installed_version}" + except Exception: + purl = None + + title_text = severity.capitalize() + " CVE" + + return { + "type": alert_type, # Use the CVE type mapping + "severity": severity, + "title": title_text, + "description": f"{title} in package {pkg_name}", + "category": "vulnerability", + "subType": f"trivy-vuln-{ecosystem}", + "generatedBy": f"trivy-{ecosystem}", + "action": self.config.get_action_for_severity(severity), + "props": { + "vulnerabilityId": vuln_id, + "packageName": pkg_name, + "installedVersion": installed_version, + "fixedVersion": fixed_version, + "ecosystem": ecosystem, + "target": target, + "connector": f"Trivy Vulnerability Scanning", + "scanType": "trivy-vuln", + "cvssScore": vuln.get('CVSS', {}).get('nvd', {}).get('V3Score'), + "impact": severity, + "purl": purl, + "detailedReport": { + "content-type": "text/markdown", + "content": markdown_content + } + } + } + + # Notification processor for Trivy + def notification_rows(self, processed_results: Dict[str, Any]) -> List[List[str]]: + # Legacy method - returns flat list of rows (not grouped tables) + # This is kept for backward compatibility + rows: List[List[str]] = [] + if not processed_results: + return rows + + for comp in processed_results.values(): + comp_name = str(comp.get('name') or comp.get('id') or '-') + ctype = comp.get('type') + + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + title = str(a.get('title', '') or props.get('ruleId', '')) + severity = str(a.get('severity', '')) + + if ctype == 'image' or str(comp.get('subpath', '')).startswith('image:'): + # Image vulnerability + locator = str(props.get('image') or props.get('dockerImage') or comp_name) + if props.get('purl'): + location = str(props.get('purl')) + elif props.get('packageName'): + location = f"pkg:deb/{props.get('packageName')}@{props.get('installedVersion', '')}" + else: + location = locator + rows.append([title, severity, locator, location]) + else: + # Dockerfile or other + file_loc = str(props.get('dockerfile') or comp_name) + resolution = str(props.get('resolution', '')) + rows.append([title, severity, file_loc, resolution]) + + return rows + + def _detect_scan_type(self, components: List[Dict[str, Any]]) -> str: + """Detect the type of scan based on component properties. + + Returns: + 'vuln' for vulnerability scanning, 'image' for image scanning, 'dockerfile' for dockerfile scanning + """ + if not components: + return 'unknown' + + # Check first component for indicators + for comp in components: + subpath = comp.get('subPath', '') + qualifiers = comp.get('qualifiers', {}) + ecosystem = qualifiers.get('ecosystem', '') + + # Vuln scanning has subPath starting with "trivy:" + if subpath and subpath.startswith('trivy:'): + return 'vuln' + + # Dockerfile scanning has ecosystem "dockerfile" + if ecosystem == 'dockerfile': + return 'dockerfile' + + # Image scanning has subPath starting with "image:" + if subpath and subpath.startswith('image:'): + return 'image' + + # Default to image if we have components with qualifiers/ecosystem + return 'image' + + def generate_notifications(self, components: List[Dict[str, Any]], item_name: str = "Unknown", + scan_type: str = "image") -> Dict[str, List[Dict[str, str]]]: + """Generate pre-formatted notifications for all notifier types. + + Args: + components: List of component dictionaries with alerts + item_name: Name of the item being scanned + scan_type: Type of scan - 'vuln', 'image', or 'dockerfile' + + Returns: + Dictionary mapping notifier keys to lists of notification dictionaries + """ + if not components: + return {} + + # Create component mapping and apply severity filtering + comps_map = {} + for component in components: + comp_id = component.get('id') or component.get('name') or str(id(component)) + filtered_alerts = [] + + for alert in component.get('alerts', []): + # Filter by severity - only include alerts that match allowed severities + alert_severity = (alert.get('severity') or '').strip().lower() + if alert_severity and hasattr(self, + 'allowed_severities') and alert_severity not in self.allowed_severities: + continue # Skip this alert - severity not enabled + filtered_alerts.append(alert) + + # Only include component if it has filtered alerts + if filtered_alerts: + filtered_component = component.copy() + filtered_component['alerts'] = filtered_alerts + comps_map[comp_id] = filtered_component + + if not comps_map: + return {} + + # Build notifications for each notifier type using Trivy-specific modules + notifications_by_notifier = {} + notifications_by_notifier['github_pr'] = github_pr.format_notifications(comps_map, item_name, scan_type) + notifications_by_notifier['slack'] = slack.format_notifications(comps_map, item_name, scan_type) + notifications_by_notifier['msteams'] = ms_teams.format_notifications(comps_map, item_name, scan_type) + notifications_by_notifier['ms_sentinel'] = ms_sentinel.format_notifications(comps_map, item_name, scan_type) + notifications_by_notifier['sumologic'] = sumologic.format_notifications(comps_map, item_name, scan_type) + notifications_by_notifier['json'] = json_notifier.format_notifications(comps_map, item_name, scan_type) + notifications_by_notifier['console'] = console.format_notifications(comps_map, item_name, scan_type) + notifications_by_notifier['jira'] = jira.format_notifications(comps_map, item_name, scan_type) + notifications_by_notifier['webhook'] = webhook.format_notifications(comps_map, item_name, scan_type) + + return notifications_by_notifier + + def get_name(self) -> str: + """Return the display name for this connector""" + return "Trivy" \ No newline at end of file diff --git a/socket_basics/core/connector/trivy/trivy_jira_backup.py b/socket_basics/core/connector/trivy/trivy_jira_backup.py new file mode 100644 index 0000000..e69de29 diff --git a/socket_basics/core/connector/trivy/utils.py b/socket_basics/core/connector/trivy/utils.py new file mode 100644 index 0000000..f31d5e0 --- /dev/null +++ b/socket_basics/core/connector/trivy/utils.py @@ -0,0 +1,43 @@ +import logging +from pathlib import Path +import yaml + +logger = logging.getLogger("trivy-scanner") + + +def get_notifier_result_limit(notifier_name: str) -> int: + """Get the result limit for a specific notifier from notifications config. + + Args: + notifier_name: Name of the notifier (e.g., 'jira', 'slack', 'github_pr') + + Returns: + Maximum number of results for this notifier + """ + try: + # Try to load notifications.yaml to get the limit + base_dir = Path(__file__).parent.parent.parent + notifications_path = base_dir / "notifications.yaml" + + if notifications_path.exists(): + with open(notifications_path, 'r') as f: + config = yaml.safe_load(f) + result_limits = config.get('settings', {}).get('result_limits', {}) + return result_limits.get(notifier_name, result_limits.get('default', 50)) + except Exception as e: + logger.debug(f"Could not load {notifier_name} result limit from config: {e}") + + # Fallback defaults by notifier type + defaults = { + 'jira': 30, + 'slack': 50, + 'msteams': 50, + 'github_pr': 100, + 'webhook': 100, + 'console': 1000, + 'json': 10000, + 'sumologic': 500, + 'ms_sentinel': 500 + } + return defaults.get(notifier_name, 50) + diff --git a/socket_basics/core/connector/trivy/webhook.py b/socket_basics/core/connector/trivy/webhook.py new file mode 100644 index 0000000..d71eb0e --- /dev/null +++ b/socket_basics/core/connector/trivy/webhook.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 +""" +Webhook notifier for Trivy results. +Formats results using the new grouped format for generic webhook consumption. +""" + +from typing import Dict, Any, List +from collections import defaultdict + + +def format_notifications(mapping: Dict[str, Any], item_name: str = "Unknown", scan_type: str = "image") -> List[Dict[str, Any]]: + """Format for generic webhook - grouped format. + + Args: + mapping: Component mapping with alerts + item_name: Name of the scanned item + scan_type: Type of scan - 'vuln', 'image', or 'dockerfile' + """ + # Group vulnerabilities by package and severity + package_groups = defaultdict(lambda: defaultdict(set)) # Use set to avoid duplicates + + if scan_type == 'dockerfile': + # Process dockerfile components + for comp in mapping.values(): + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + rule_id = str(props.get('ruleId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + message = str(alert.get('description', '')) + resolution = str(props.get('resolution', '')) + + rule_info = f"{rule_id}|{message}|{resolution}" + package_groups[rule_id][severity].add(rule_info) + + else: # image or vuln + # Process package vulnerability components + for comp in mapping.values(): + comp_name = str(comp.get('name') or comp.get('id') or '-') + comp_version = str(comp.get('version', '')) + ecosystem = comp.get('qualifiers', {}).get('ecosystem', 'unknown') + + if comp_version: + package_key = f"pkg:{ecosystem}/{comp_name}@{comp_version}" + else: + package_key = f"pkg:{ecosystem}/{comp_name}" + + for alert in comp.get('alerts', []): + props = alert.get('props', {}) or {} + cve_id = str(props.get('vulnerabilityId', '') or alert.get('title', '')) + severity = str(alert.get('severity', '')) + package_groups[package_key][severity].add(cve_id) + + # Create rows with proper formatting + rows = [] + severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3} + + if scan_type == 'dockerfile': + # Dockerfile format: Rule ID | Severity | Message | Resolution + for rule_id, severity_dict in package_groups.items(): + for severity in sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)): + rule_infos = list(severity_dict[severity]) + for rule_info in rule_infos: + parts = rule_info.split('|', 2) + if len(parts) >= 3: + _, message, resolution = parts + rows.append([ + rule_id, + severity, + message, + resolution + ]) + + headers = ['Rule ID', 'Severity', 'Message', 'Resolution'] + else: + # Image format: Package | CVEs | Severity + for package_name, severity_dict in package_groups.items(): + # Sort severities by criticality + sorted_severities = sorted(severity_dict.keys(), key=lambda s: severity_order.get(s, 4)) + + for severity in sorted_severities: + cves = sorted(list(severity_dict[severity])) # Convert set to sorted list + + # Format CVEs as comma-separated list for webhook + cve_list = ', '.join(cves) + + rows.append([ + package_name, + cve_list, + severity + ]) + + headers = ['Package', 'CVEs', 'Severity'] + + # Format for webhook - simple structure + if not rows: + content = "No vulnerabilities found." + else: + content_lines = [' | '.join(headers)] + content_lines.append(' | '.join(['---'] * len(headers))) + for row in rows: + content_lines.append(' | '.join(str(cell) for cell in row)) + content = '\n'.join(content_lines) + + # Create title based on scan type + if scan_type == 'vuln': + title = f'Socket CVE Scanning Results: {item_name}' + elif scan_type == 'dockerfile': + title = f'Socket Dockerfile Results: {item_name}' + else: # image + title = f'Socket Image Scanning Results: {item_name}' + + return [{ + 'title': title, + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/trufflehog/__init__.py b/socket_basics/core/connector/trufflehog/__init__.py index 2e96445..9cba07d 100644 --- a/socket_basics/core/connector/trufflehog/__init__.py +++ b/socket_basics/core/connector/trufflehog/__init__.py @@ -12,6 +12,12 @@ from ..base import BaseConnector +# Import individual notifier modules +from . import github_pr, slack, ms_teams, ms_sentinel, sumologic, console, jira, webhook, json_notifier + +# Import shared formatters +from ...formatters import get_all_formatters + logger = logging.getLogger(__name__) @@ -119,7 +125,14 @@ def scan(self) -> Dict[str, Any]: if notifications: tables = [{'title': 'results', 'headers': ['component','severity','title','location'], 'rows': notifications}] - return {'components': components_list, 'notifications': tables} + # Build notifications using new shared formatters + notifications_by_notifier = self.generate_notifications(components_list) + + # return Socket facts format + return { + 'components': components_list, + 'notifications': notifications_by_notifier + } except FileNotFoundError: logger.error("Trufflehog not found. Please install Trufflehog") @@ -239,10 +252,7 @@ def _hash_file_or_path(file_path: str) -> str: "dead": False, "dependencies": [], "manifestFiles": [{"file": fp}] if fp else [], - "qualifiers": { - "scanner": "secrets", - "type": inferred_type - }, + "subPath": "secret-scanning", "alerts": [] } @@ -254,7 +264,16 @@ def _hash_file_or_path(file_path: str) -> str: alert.setdefault('category', 'supplyChainRisk') comps[comp_id]['alerts'].append(alert) - return comps + # Convert to Socket facts format + components_list = list(comps.values()) + + # Build notifications for all notifier types + notifications_by_notifier = self.generate_notifications(components_list) + + return { + 'components': components_list, + 'notifications': notifications_by_notifier + } def _create_alert(self, finding: Dict[str, Any]) -> Dict[str, Any]: """Create a generic alert from a Trufflehog finding""" @@ -348,13 +367,12 @@ def _create_alert(self, finding: Dict[str, Any]) -> Dict[str, Any]: "subType": "secrets", "action": self.config.get_action_for_severity(severity), "props": { - "detectorName": detector_name, + "ruleId": detector_name, "verified": verified, "filePath": file_path, "lineNumber": line, "secretType": detector_name.lower(), "redactedValue": redacted_secret, - "isActive": verified, "riskLevel": "critical" if verified else "low", "exposureType": "source-code", "detailedReport": { @@ -364,31 +382,93 @@ def _create_alert(self, finding: Dict[str, Any]) -> Dict[str, Any]: } } - # Notification processor for TruffleHog secrets - def notification_rows(self, processed_results: Dict[str, Any]) -> List[Dict[str, Any]]: - """Return a list of canonical notification table dicts. - Each table dict must include a `title`, optional `headers`, and `rows`. - Example: - [{"title": "Secrets", "headers": [...], "rows": [[...], ...]}] + + + # Notification processor for TruffleHog secrets + def notification_rows(self, processed_results: Dict[str, Any]) -> List[List[str]]: + """Legacy method - returns flat list of rows (not grouped tables). + + This is kept for backward compatibility. """ rows: List[List[str]] = [] - for comp in processed_results.values(): - for a in comp.get('alerts', []): - props = a.get('props', {}) or {} - detection = props.get('detectorName', '') or a.get('title') or '' - sev = a.get('severity', '') - file_path = props.get('filePath', '-') - line = props.get('lineNumber', '') - redacted = props.get('redactedValue', '') - # Build row as: Detection, Severity, File, Line, Secrets (redacted) - rows.append([detection, sev, file_path, f"{line}" if line else '-', redacted]) - - table = { - 'title': 'Secrets', - 'headers': ['Detection', 'Severity', 'File', 'Line', 'Secrets'], - 'rows': rows, - } - - # Return empty list when there are no rows to make callers skip it cleanly - return [table] if rows else [] + if not processed_results: + return rows + + # Handle new Socket facts format (with 'components' key) + components = processed_results.get('components', []) + if components and isinstance(components, list): + for comp in components: + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + detection = str(props.get('detectorName', '') or a.get('title') or '') + sev = str(a.get('severity', '')) + file_path = str(props.get('filePath', '-')) + line = str(props.get('lineNumber', '')) + redacted = str(props.get('redactedValue', '')) + # Build row as: Detection, Severity, File, Line, Secrets (redacted) + rows.append([detection, sev, file_path, f"{line}" if line else '-', redacted]) + else: + # Handle old format (direct component mapping) + for comp in processed_results.values(): + if hasattr(comp, 'get'): # It's a dict + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + detection = str(props.get('detectorName', '') or a.get('title') or '') + sev = str(a.get('severity', '')) + file_path = str(props.get('filePath', '-')) + line = str(props.get('lineNumber', '')) + redacted = str(props.get('redactedValue', '')) + # Build row as: Detection, Severity, File, Line, Secrets (redacted) + rows.append([detection, sev, file_path, f"{line}" if line else '-', redacted]) + + return rows + + def generate_notifications(self, components: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, str]]]: + """Generate pre-formatted notifications for all notifier types. + + Args: + components: List of component dictionaries with alerts + + Returns: + Dictionary mapping notifier keys to lists of notification dictionaries + """ + if not components: + return {} + + # Create component mapping for compatibility with TruffleHog-specific formatters + comps_map = {c.get('id') or c.get('name') or str(id(c)): c for c in components} + + # Filter components by severity + filtered_comps_map = {} + for comp_id, comp in comps_map.items(): + filtered_alerts = [] + for alert in comp.get('alerts', []): + # Filter by severity - only include alerts that match allowed severities + alert_severity = (alert.get('severity') or '').strip().lower() + if alert_severity and hasattr(self, 'allowed_severities') and alert_severity not in self.allowed_severities: + continue # Skip this alert - severity not enabled + filtered_alerts.append(alert) + + # Only include component if it has filtered alerts + if filtered_alerts: + filtered_comp = comp.copy() + filtered_comp['alerts'] = filtered_alerts + filtered_comps_map[comp_id] = filtered_comp + + if not filtered_comps_map: + return {} + + # Build notifications for each notifier type using TruffleHog-specific modules + notifications_by_notifier = {} + notifications_by_notifier['github_pr'] = github_pr.format_notifications(filtered_comps_map) + notifications_by_notifier['slack'] = slack.format_notifications(filtered_comps_map) + notifications_by_notifier['msteams'] = ms_teams.format_notifications(filtered_comps_map) + notifications_by_notifier['ms_sentinel'] = ms_sentinel.format_notifications(filtered_comps_map) + notifications_by_notifier['sumologic'] = sumologic.format_notifications(filtered_comps_map) + notifications_by_notifier['json'] = json_notifier.format_notifications(filtered_comps_map) + notifications_by_notifier['console'] = console.format_notifications(filtered_comps_map) + notifications_by_notifier['jira'] = jira.format_notifications(filtered_comps_map) + notifications_by_notifier['webhook'] = webhook.format_notifications(filtered_comps_map) + + return notifications_by_notifier diff --git a/socket_basics/core/connector/trufflehog/console.py b/socket_basics/core/connector/trufflehog/console.py new file mode 100644 index 0000000..55f8bde --- /dev/null +++ b/socket_basics/core/connector/trufflehog/console.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +""" +Console notifier for TruffleHog results. +Formats results for human-readable console output with truncated content for secret detection. +""" + +from typing import Dict, Any, List + + +def format_notifications(mapping: Dict[str, Any]) -> List[Dict[str, Any]]: + """Format for console output - human readable with truncated content.""" + rows = [] + for comp in mapping.values(): + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + detector = str(props.get('detectorName', '') or a.get('title') or '') + severity = str(a.get('severity', '')) + file_path = str(props.get('filePath', '-')) + line = str(props.get('lineNumber', '')) + redacted = str(props.get('redactedValue', '')) + verified = props.get('verified', False) + + # Truncate for console readability + short_path = file_path[:30] + '...' if len(file_path) > 30 else file_path + location = f"{short_path}:{line}" if line else short_path + + short_secret = redacted[:15] + '...' if len(redacted) > 15 else redacted + status = 'OK' if verified else 'WARN' + + rows.append([ + detector, + severity, + status, + location, + short_secret + ]) + + # Format as a table using tabulate + from tabulate import tabulate + + headers = ['Detector', 'Severity', 'Status', 'Location', 'Secret'] + table_content = tabulate(rows, headers=headers, tablefmt='grid') if rows else "No secrets found." + + return [{ + 'title': 'TruffleHog Secret Detection Results', + 'content': table_content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/trufflehog/github_pr.py b/socket_basics/core/connector/trufflehog/github_pr.py new file mode 100644 index 0000000..7ea99d3 --- /dev/null +++ b/socket_basics/core/connector/trufflehog/github_pr.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +""" +GitHub PR notifier for TruffleHog results. +Formats results with markdown for better GitHub display of secret detection findings. +""" + +from typing import Dict, Any, List + + +def format_notifications(mapping: Dict[str, Any], config=None) -> List[Dict[str, Any]]: + """Format for GitHub PR comments - detailed with markdown formatting.""" + rows = [] + for comp in mapping.values(): + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + detector = str(props.get('detectorName', '') or a.get('title') or '') + severity = str(a.get('severity', '')) + file_path = str(props.get('filePath', '-')) + line = str(props.get('lineNumber', '')) + redacted = str(props.get('redactedValue', '')) + verified = props.get('verified', False) + + # Format with markdown for better GitHub display + status = '✅ **VERIFIED**' if verified else '⚠️ *Unverified*' + file_display = f"`{file_path}`" + if line: + file_display += f":{line}" + + rows.append([ + f"**{detector}**", + f"*{severity}*", + status, + file_display, + f"`{redacted}`" if redacted else '-' + ]) + + # Create markdown table + if not rows: + content = "No secrets detected." + else: + headers = ['Detector', 'Severity', 'Status', 'Location', 'Secret'] + header_row = '| ' + ' | '.join(headers) + ' |' + separator_row = '| ' + ' | '.join(['---'] * len(headers)) + ' |' + content_rows = [] + for row in rows: + content_rows.append('| ' + ' | '.join(str(cell) for cell in row) + ' |') + + content = '\n'.join([header_row, separator_row] + content_rows) + + # Build title with repo/branch/commit info from config + title_parts = ["Socket Security Results"] + if config: + if config.repo: + title_parts.append(config.repo) + if config.branch: + title_parts.append(config.branch) + if config.commit_hash: + title_parts.append(config.commit_hash) + + title = " - ".join(title_parts) + + # Count total findings for summary + total_findings = len(rows) + + # Add summary section with scanner findings + summary_content = f"""## Summary + +| Scanner | Findings | +|---------|----------| +| TruffleHog Secrets | {total_findings} | + +## Details + +{content}""" + + # Wrap content with HTML comment markers for section updates + wrapped_content = f""" +# {title} + +{summary_content} +""" + + return [{ + 'title': title, + 'content': wrapped_content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/trufflehog/jira.py b/socket_basics/core/connector/trufflehog/jira.py new file mode 100644 index 0000000..4006192 --- /dev/null +++ b/socket_basics/core/connector/trufflehog/jira.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +""" +Jira notifier for TruffleHog results. +Formats results for Jira tickets with priority mapping and detailed descriptions for secret detection. +""" + +from typing import Dict, Any, List + + +def format_notifications(mapping: Dict[str, Any], config=None) -> List[Dict[str, Any]]: + """Format for Jira tickets - generate ADF format directly for proper formatting.""" + rows = [] + for comp in mapping.values(): + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + detector = str(props.get('detectorName', '') or a.get('title') or '') + severity = str(a.get('severity', '')) + file_path = str(props.get('filePath', '')) + line = str(props.get('lineNumber', '')) + verified = props.get('verified', False) + + # Map severity to Jira priority + severity_lower = severity.lower() + jira_priority = { + 'critical': 'Highest', + 'high': 'High', + 'medium': 'Medium', + 'low': 'Low' + }.get(severity_lower, 'Medium') + + # Enhanced priority for verified secrets + if verified and jira_priority != 'Highest': + jira_priority = 'High' + + # Risk assessment + risk_level = 'CRITICAL' if verified else 'Medium' + + # Action needed + action = 'URGENT: Rotate credentials immediately' if verified else 'Review and validate' + + location = f"{file_path}:{line}" if line else file_path + + rows.append([ + {"type": "paragraph", "content": [{"type": "text", "text": detector}]}, + {"type": "paragraph", "content": [{"type": "text", "text": jira_priority}]}, + {"type": "paragraph", "content": [{"type": "text", "text": 'Verified' if verified else 'Unverified'}]}, + {"type": "paragraph", "content": [{"type": "text", "text": risk_level}]}, + {"type": "paragraph", "content": [{"type": "text", "text": location}]}, + {"type": "paragraph", "content": [{"type": "text", "text": action}]}, + {"type": "paragraph", "content": [{"type": "text", "text": str(a.get('description', ''))}]} + ]) + + # Build simple title with repo/branch/commit info from config + title_parts = ["Socket Security Issues found for"] + if config: + if config.repo: + title_parts.append(config.repo) + if config.branch: + title_parts.append(config.branch) + if config.commit_hash: + title_parts.append(config.commit_hash) + + title = " - ".join(title_parts) + + # Create ADF table format + if not rows: + content = { + "type": "doc", + "version": 1, + "content": [ + { + "type": "heading", + "attrs": {"level": 2}, + "content": [{"type": "text", "text": "TruffleHog Secret Detection"}] + }, + { + "type": "paragraph", + "content": [{"type": "text", "text": "No secrets detected."}] + } + ] + } + else: + # Create table headers + headers = ['Detector', 'Priority', 'Status', 'Risk', 'Location', 'Action', 'Description'] + header_cells = [] + for header in headers: + header_cells.append({ + "type": "tableHeader", + "attrs": {}, + "content": [ + { + "type": "paragraph", + "content": [{"type": "text", "text": header}] + } + ] + }) + + # Create table rows + table_rows = [{ + "type": "tableRow", + "content": header_cells + }] + + for row in rows: + data_cells = [] + for cell_content in row: + data_cells.append({ + "type": "tableCell", + "attrs": {}, + "content": [cell_content] + }) + table_rows.append({ + "type": "tableRow", + "content": data_cells + }) + + # Create complete ADF document + content = { + "type": "doc", + "version": 1, + "content": [ + { + "type": "heading", + "attrs": {"level": 2}, + "content": [{"type": "text", "text": "TruffleHog Secret Detection"}] + }, + { + "type": "table", + "attrs": { + "isNumberColumnEnabled": False, + "layout": "default" + }, + "content": table_rows + } + ] + } + + return [{ + 'title': title, + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/trufflehog/json_notifier.py b/socket_basics/core/connector/trufflehog/json_notifier.py new file mode 100644 index 0000000..f8966d9 --- /dev/null +++ b/socket_basics/core/connector/trufflehog/json_notifier.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +""" +JSON notifier for TruffleHog results. +Formats results with complete structured data for programmatic consumption of secret detection. +""" + +from typing import Dict, Any, List + + +def format_notifications(mapping: Dict[str, Any]) -> List[Dict[str, Any]]: + """Format for JSON output - complete structured data.""" + rows = [] + for comp in mapping.values(): + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + detector = str(props.get('detectorName', '') or a.get('title') or '') + severity = str(a.get('severity', '')) + file_path = str(props.get('filePath', '')) + line = str(props.get('lineNumber', '')) + redacted = str(props.get('redactedValue', '')) + verified = props.get('verified', False) + secret_type = str(props.get('secretType', '')) + risk_level = str(props.get('riskLevel', '')) + exposure_type = str(props.get('exposureType', '')) + + rows.append([ + detector, + severity, + secret_type, + file_path, + line, + redacted, + str(verified), + risk_level, + exposure_type, + str(a.get('description', '')) + ]) + + # Format as structured data + if not rows: + content = "No secrets found." + else: + headers = ['Detector', 'Severity', 'SecretType', 'FilePath', 'Line', 'RedactedValue', 'Verified', 'RiskLevel', 'ExposureType', 'Description'] + header_row = ' | '.join(headers) + separator_row = ' | '.join(['---'] * len(headers)) + content_rows = [] + for row in rows: + content_rows.append(' | '.join(str(cell) for cell in row)) + + content = '\n'.join([header_row, separator_row] + content_rows) + + return [{ + 'title': 'TruffleHog Secret Detection Results', + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/trufflehog/ms_sentinel.py b/socket_basics/core/connector/trufflehog/ms_sentinel.py new file mode 100644 index 0000000..59e316f --- /dev/null +++ b/socket_basics/core/connector/trufflehog/ms_sentinel.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +""" +Microsoft Sentinel notifier for TruffleHog results. +Formats results structured for SIEM ingestion of secret detection findings. +""" + +from typing import Dict, Any, List + + +def format_notifications(mapping: Dict[str, Any]) -> List[Dict[str, Any]]: + """Format for Microsoft Sentinel - structured for SIEM ingestion.""" + rows = [] + for comp in mapping.values(): + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + detector = str(props.get('detectorName', '') or a.get('title') or '') + severity = str(a.get('severity', '')) + file_path = str(props.get('filePath', '')) + line = str(props.get('lineNumber', '')) + verified = props.get('verified', False) + secret_type = str(props.get('secretType', '')) + risk_level = str(props.get('riskLevel', '')) + + # More structured format for SIEM + rows.append([ + detector, + severity, + secret_type, + file_path, + line, + str(verified), + risk_level, + 'source-code' + ]) + + # Format as structured data + if not rows: + content = "No secrets found." + else: + headers = ['Detector', 'Severity', 'SecretType', 'FilePath', 'LineNumber', 'Verified', 'RiskLevel', 'ExposureType'] + header_row = ' | '.join(headers) + separator_row = ' | '.join(['---'] * len(headers)) + content_rows = [] + for row in rows: + content_rows.append(' | '.join(str(cell) for cell in row)) + + content = '\n'.join([header_row, separator_row] + content_rows) + + return [{ + 'title': 'TruffleHog Secret Findings', + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/trufflehog/ms_teams.py b/socket_basics/core/connector/trufflehog/ms_teams.py new file mode 100644 index 0000000..b53a1cf --- /dev/null +++ b/socket_basics/core/connector/trufflehog/ms_teams.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +""" +Microsoft Teams notifier for TruffleHog results. +Formats results in clean tabular format suitable for Teams display of secret detection. +""" + +from typing import Dict, Any, List + + +def format_notifications(mapping: Dict[str, Any]) -> List[Dict[str, Any]]: + """Format for Microsoft Teams - clean tabular format.""" + rows = [] + for comp in mapping.values(): + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + detector = str(props.get('detectorName', '') or a.get('title') or '') + severity = str(a.get('severity', '')) + file_path = str(props.get('filePath', '-')) + line = str(props.get('lineNumber', '')) + redacted = str(props.get('redactedValue', '')) + verified = props.get('verified', False) + + location = f"{file_path}:{line}" if line else file_path + # Truncate for Teams + if len(location) > 60: + location = location[:57] + '...' + + rows.append([ + detector, + severity, + 'Verified' if verified else 'Unverified', + location, + redacted[:30] + '...' if len(redacted) > 30 else redacted # Truncate for Teams + ]) + + # Format as structured data + if not rows: + content = "No secrets found." + else: + headers = ['Detector', 'Severity', 'Status', 'Location', 'Secret'] + header_row = ' | '.join(headers) + separator_row = ' | '.join(['---'] * len(headers)) + content_rows = [] + for row in rows: + content_rows.append(' | '.join(str(cell) for cell in row)) + + content = '\n'.join([header_row, separator_row] + content_rows) + + return [{ + 'title': 'TruffleHog Secret Detection Results', + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/trufflehog/slack.py b/socket_basics/core/connector/trufflehog/slack.py new file mode 100644 index 0000000..7743df0 --- /dev/null +++ b/socket_basics/core/connector/trufflehog/slack.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +""" +Slack notifier for TruffleHog results. +Formats results concisely with emojis for visual appeal in secret detection. +""" + +from typing import Dict, Any, List + + +def format_notifications(mapping: Dict[str, Any]) -> List[Dict[str, Any]]: + """Format for Slack notifications - concise with emojis.""" + rows = [] + for comp in mapping.values(): + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + detector = str(props.get('detectorName', '') or a.get('title') or '') + severity = str(a.get('severity', '')) + file_path = str(props.get('filePath', '-')) + line = str(props.get('lineNumber', '')) + redacted = str(props.get('redactedValue', '')) + verified = props.get('verified', False) + + # Add emojis for Slack + severity_lower = severity.lower() + severity_emoji = { + 'critical': '🔴', + 'high': '🟠', + 'medium': '🟡', + 'low': '🟢' + }.get(severity_lower, '⚪') + + status_emoji = '✅' if verified else '⚠️' + + # Truncate file path for Slack + short_path = file_path[:40] + '...' if len(file_path) > 40 else file_path + location = f"{short_path}:{line}" if line else short_path + + rows.append([ + detector, + f"{severity_emoji} {severity}", + f"{status_emoji} {'Verified' if verified else 'Unverified'}", + location, + redacted[:20] + '...' if len(redacted) > 20 else redacted + ]) + + # Format as markdown table for Slack + if not rows: + content = "No secrets found." + else: + headers = ['Detector', 'Severity', 'Status', 'Location', 'Secret'] + header_row = ' | '.join(headers) + separator_row = ' | '.join(['---'] * len(headers)) + content_rows = [] + for row in rows: + content_rows.append(' | '.join(str(cell) for cell in row)) + + content = '\n'.join([header_row, separator_row] + content_rows) + + return [{ + 'title': 'TruffleHog Secret Detection Results', + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/trufflehog/sumologic.py b/socket_basics/core/connector/trufflehog/sumologic.py new file mode 100644 index 0000000..8b3666e --- /dev/null +++ b/socket_basics/core/connector/trufflehog/sumologic.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +""" +SumoLogic notifier for TruffleHog results. +Formats results in structured logging format suitable for log parsing of secret detection. +""" + +from typing import Dict, Any, List + + +def format_notifications(mapping: Dict[str, Any]) -> List[Dict[str, Any]]: + """Format for SumoLogic - structured logging format.""" + rows = [] + for comp in mapping.values(): + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + detector = str(props.get('detectorName', '') or a.get('title') or '') + severity = str(a.get('severity', '')) + file_path = str(props.get('filePath', '')) + line = str(props.get('lineNumber', '')) + verified = props.get('verified', False) + secret_type = str(props.get('secretType', '')) + + # Key-value format suitable for log parsing + rows.append([ + f"detector={detector}", + f"severity={severity}", + f"secret_type={secret_type}", + f"file={file_path}", + f"line={line}", + f"verified={verified}", + f"scanner=trufflehog" + ]) + + # Format as structured data + if not rows: + content = "No secrets found." + else: + headers = ['Detector', 'Severity', 'Type', 'File', 'Line', 'Verified', 'Scanner'] + header_row = ' | '.join(headers) + separator_row = ' | '.join(['---'] * len(headers)) + content_rows = [] + for row in rows: + content_rows.append(' | '.join(str(cell) for cell in row)) + + content = '\n'.join([header_row, separator_row] + content_rows) + + return [{ + 'title': 'TruffleHog Secret Events', + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/connector/trufflehog/webhook.py b/socket_basics/core/connector/trufflehog/webhook.py new file mode 100644 index 0000000..9bb4ffc --- /dev/null +++ b/socket_basics/core/connector/trufflehog/webhook.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +""" +Webhook notifier for TruffleHog results. +Formats results for generic webhook consumption with flexible structured format for secret detection. +""" + +from typing import Dict, Any, List + + +def format_notifications(mapping: Dict[str, Any]) -> List[Dict[str, Any]]: + """Format for generic webhook - flexible structured format.""" + rows = [] + for comp in mapping.values(): + for a in comp.get('alerts', []): + props = a.get('props', {}) or {} + detector = str(props.get('detectorName', '') or a.get('title') or '') + severity = str(a.get('severity', '')) + file_path = str(props.get('filePath', '')) + line = str(props.get('lineNumber', '')) + redacted = str(props.get('redactedValue', '')) + verified = props.get('verified', False) + secret_type = str(props.get('secretType', '')) + + rows.append([ + detector, + severity, + secret_type, + file_path, + line, + redacted, + str(verified), + 'trufflehog' + ]) + + # Format as structured data + if not rows: + content = "No secrets found." + else: + headers = ['Detector', 'Severity', 'SecretType', 'FilePath', 'Line', 'RedactedValue', 'Verified', 'Scanner'] + header_row = ' | '.join(headers) + separator_row = ' | '.join(['---'] * len(headers)) + content_rows = [] + for row in rows: + content_rows.append(' | '.join(str(cell) for cell in row)) + + content = '\n'.join([header_row, separator_row] + content_rows) + + return [{ + 'title': 'TruffleHog Secret Detection Results', + 'content': content + }] \ No newline at end of file diff --git a/socket_basics/core/formatters/__init__.py b/socket_basics/core/formatters/__init__.py new file mode 100644 index 0000000..f1700ad --- /dev/null +++ b/socket_basics/core/formatters/__init__.py @@ -0,0 +1,50 @@ +""" +Shared formatting library for Socket Security Basics. + +This module provides consistent formatting interfaces for converting security findings +into pre-formatted content for different notifier types. All formatters convert +raw findings data into text-based representations suitable for their target platforms. +""" + +from .console import ConsoleFormatter +from .markdown import MarkdownFormatter +from .json import JsonFormatter +from .slack import SlackFormatter +from .teams import TeamsFormatter +from .jira import JiraFormatter +from .sentinel import SentinelFormatter +from .sumologic import SumologicFormatter +from .webhook import WebhookFormatter + +# Convenience function to get all formatters +def get_all_formatters(): + """Get instances of all available formatters. + + Returns: + Dictionary mapping formatter names to instances + """ + return { + 'console': ConsoleFormatter(), + 'markdown': MarkdownFormatter(), + 'json': JsonFormatter(), + 'slack': SlackFormatter(), + 'teams': TeamsFormatter(), + 'jira': JiraFormatter(), + 'sentinel': SentinelFormatter(), + 'sumologic': SumologicFormatter(), + 'webhook': WebhookFormatter() + } + +# Export commonly used classes +__all__ = [ + 'ConsoleFormatter', + 'MarkdownFormatter', + 'JsonFormatter', + 'SlackFormatter', + 'TeamsFormatter', + 'JiraFormatter', + 'SentinelFormatter', + 'SumologicFormatter', + 'WebhookFormatter', + 'get_all_formatters' +] \ No newline at end of file diff --git a/socket_basics/core/formatters/base.py b/socket_basics/core/formatters/base.py new file mode 100644 index 0000000..cffda84 --- /dev/null +++ b/socket_basics/core/formatters/base.py @@ -0,0 +1,158 @@ +""" +Base formatter interface for Socket Security Basics. + +This module defines the abstract base class and common patterns for all formatters. +Each formatter converts raw security findings into text-based content suitable +for specific notification channels. +""" + +import abc +from typing import Any, Dict, List, Optional +import logging + +logger = logging.getLogger(__name__) + + +class BaseFormatter(abc.ABC): + """Abstract base class for security findings formatters.""" + + def __init__(self, max_content_length: Optional[int] = None): + """Initialize the formatter. + + Args: + max_content_length: Maximum content length before truncation + """ + self.max_content_length = max_content_length + + @abc.abstractmethod + def format_findings(self, findings: List[Dict[str, Any]], title: str = "Security Findings") -> str: + """Format security findings into text content. + + Args: + findings: List of finding dictionaries + title: Title for the formatted content + + Returns: + Formatted text content suitable for the target platform + """ + pass + + @abc.abstractmethod + def format_table(self, headers: List[str], rows: List[List[Any]], title: str = "Results") -> str: + """Format tabular data into text content. + + Args: + headers: Column headers + rows: Table rows as lists of values + title: Title for the formatted table + + Returns: + Formatted table as text content + """ + pass + + def truncate_content(self, content: str, suffix: str = "...") -> str: + """Truncate content if it exceeds maximum length. + + Args: + content: Content to potentially truncate + suffix: Suffix to append when truncating + + Returns: + Original content or truncated content with suffix + """ + if not self.max_content_length or len(content) <= self.max_content_length: + return content + + truncate_at = self.max_content_length - len(suffix) + return content[:truncate_at] + suffix + + def sanitize_text(self, text: Any) -> str: + """Sanitize and normalize text values. + + Args: + text: Text value to sanitize + + Returns: + Sanitized string + """ + if text is None: + return "" + + text_str = str(text) + # Remove or replace problematic characters for text display + return text_str.replace('\r\n', '\n').replace('\r', '\n') + + +class TableFormatter(BaseFormatter): + """Base class for table-based formatters.""" + + def format_findings_as_table( + self, + findings: List[Dict[str, Any]], + columns: List[str], + title: str = "Security Findings" + ) -> str: + """Format findings as a table using specified columns. + + Args: + findings: List of finding dictionaries + columns: Column names to extract from findings + title: Title for the table + + Returns: + Formatted table content + """ + if not findings: + return f"{title}: No findings to report." + + # Extract rows from findings + rows = [] + for finding in findings: + row = [] + for col in columns: + value = self._extract_column_value(finding, col) + row.append(value) + rows.append(row) + + return self.format_table(columns, rows, title) + + def _extract_column_value(self, finding: Dict[str, Any], column: str) -> Any: + """Extract a column value from a finding dictionary. + + Args: + finding: Finding dictionary + column: Column name to extract + + Returns: + Extracted value or empty string if not found + """ + # Handle common column mappings + column_mappings = { + 'rule': ['rule', 'rule_id', 'check', 'check_name'], + 'file': ['file', 'filename', 'path', 'location'], + 'line': ['line', 'line_number', 'start_line'], + 'severity': ['severity', 'level'], + 'title': ['title', 'description', 'message'], + 'component': ['component', 'id', 'name'] + } + + # Try direct access first + if column in finding: + return finding[column] + + # Try mapped alternatives + for mapped_column in column_mappings.get(column.lower(), [column]): + if mapped_column in finding: + return finding[mapped_column] + + # Try nested access for common patterns + if 'props' in finding and isinstance(finding['props'], dict): + if column in finding['props']: + return finding['props'][column] + + if 'location' in finding and isinstance(finding['location'], dict): + if column in finding['location']: + return finding['location'][column] + + return "" \ No newline at end of file diff --git a/socket_basics/core/formatters/console.py b/socket_basics/core/formatters/console.py new file mode 100644 index 0000000..c74b3b7 --- /dev/null +++ b/socket_basics/core/formatters/console.py @@ -0,0 +1,186 @@ +""" +Console table formatter for Socket Security Basics. + +This module provides console/terminal table formatting functionality, +converting security findings into readable text tables. +""" + +from typing import Any, List +from .base import TableFormatter + +# Console display limits +MAX_CELL_LENGTH = 200 +DEFAULT_TRUNCATION_SUFFIX = "..." + + +class ConsoleFormatter(TableFormatter): + """Formatter for console/terminal table display.""" + + def __init__(self): + super().__init__(max_content_length=None) # No global limit for console + + def format_findings(self, findings: List[dict], title: str = "Security Findings") -> str: + """Format security findings as a console table. + + Args: + findings: List of finding dictionaries + title: Title for the formatted content + + Returns: + Formatted console table as text + """ + if not findings: + return f"{title}: No findings to report." + + # Standard columns for security findings + columns = ['component', 'severity', 'title', 'file', 'line'] + return self.format_findings_as_table(findings, columns, title) + + def format_table(self, headers: List[str], rows: List[List[Any]], title: str = "Results") -> str: + """Format tabular data as a console table. + + Args: + headers: Column headers + rows: Table rows as lists of values + title: Title for the formatted table + + Returns: + Formatted console table as text + """ + if not rows: + return f"{title}: No data to display." + + # Try to use tabulate if available, fallback to simple formatting + try: + from tabulate import tabulate + + # Sanitize all cells for console display + sanitized_rows = [] + for row in rows: + sanitized_row = [self._sanitize_cell_for_console(cell) for cell in row] + sanitized_rows.append(sanitized_row) + + table_content = tabulate(sanitized_rows, headers=headers, tablefmt="grid") + + # Add title + output_lines = [f"\n{title.upper()}", "-" * len(title), "", table_content, ""] + return "\n".join(output_lines) + + except ImportError: + # Fallback to simple table formatting + return self._simple_table_format(headers, rows, title) + + def _simple_table_format(self, headers: List[str], rows: List[List[Any]], title: str) -> str: + """Simple table formatting fallback when tabulate is not available. + + Args: + headers: Column headers + rows: Table rows as lists of values + title: Title for the formatted table + + Returns: + Simple formatted table as text + """ + # Sanitize all cells for console display + sanitized_rows = [] + for row in rows: + sanitized_row = [self._sanitize_cell_for_console(cell) for cell in row] + sanitized_rows.append(sanitized_row) + + # Calculate column widths + col_widths = [len(header) for header in headers] + for row in sanitized_rows: + for i, cell in enumerate(row): + if i < len(col_widths): + col_widths[i] = max(col_widths[i], len(str(cell))) + + # Build table + output_lines = [] + + # Title + output_lines.append(f"\n{title.upper()}") + output_lines.append("-" * len(title)) + output_lines.append("") + + # Header row + header_row = " | ".join( + header.ljust(col_widths[i]) for i, header in enumerate(headers) + ) + output_lines.append(header_row) + + # Header separator + separator = "-+-".join("-" * width for width in col_widths) + output_lines.append(separator) + + # Data rows + for row in sanitized_rows: + data_row = " | ".join( + str(cell).ljust(col_widths[i]) for i, cell in enumerate(row) + ) + output_lines.append(data_row) + + output_lines.append("") + return "\n".join(output_lines) + + def _sanitize_cell_for_console(self, cell: Any, max_length: int = MAX_CELL_LENGTH) -> str: + """Sanitize table cell values for console display. + + Args: + cell: The cell value to sanitize + max_length: Maximum length before truncation + + Returns: + Sanitized string suitable for console display + """ + cell_str = self.sanitize_text(cell) + + # Truncate very long strings + if len(cell_str) > max_length: + truncate_at = max_length - len(DEFAULT_TRUNCATION_SUFFIX) + cell_str = cell_str[:truncate_at] + DEFAULT_TRUNCATION_SUFFIX + + # Replace newlines with spaces for table display + return " ".join(cell_str.split()) + + +def format_console_section(title: str, content: str, separator_char: str = "-") -> str: + """Format a titled section for console display. + + Args: + title: Section title + content: Section content + separator_char: Character to use for title underline + + Returns: + Formatted section string with title, separator, and content + """ + formatted_title = title.upper() + separator = separator_char * len(formatted_title) + + return f"\n{formatted_title}\n{separator}\n{content}\n" + + +def build_console_output_from_notifications(notifications: List[dict]) -> str: + """Build formatted console output from notification data. + + Args: + notifications: List of dicts with 'title' and 'content' keys + + Returns: + Formatted string ready for console display + """ + if not notifications: + return "No notifications to display." + + output_sections = [] + for notification in notifications: + if not isinstance(notification, dict): + continue + + title = notification.get('title', 'Untitled') + content = notification.get('content', 'No content') + + section = format_console_section(title, content) + output_sections.append(section) + + return "".join(output_sections) \ No newline at end of file diff --git a/socket_basics/core/formatters/jira.py b/socket_basics/core/formatters/jira.py new file mode 100644 index 0000000..2a9df24 --- /dev/null +++ b/socket_basics/core/formatters/jira.py @@ -0,0 +1,129 @@ +""" +JIRA text formatter for Socket Security Basics. + +This module provides text formatting for JIRA notifications, converting +security findings into readable text format suitable for JIRA comments. +Note: This converts findings to text rather than Atlassian Document Format (ADF). +""" + +from typing import Any, List +from .base import TableFormatter + +# JIRA limits +MAX_JIRA_COMMENT_LENGTH = 32767 # JIRA comment length limit + + +class JiraFormatter(TableFormatter): + """Formatter for JIRA text comments.""" + + def __init__(self): + super().__init__(max_content_length=MAX_JIRA_COMMENT_LENGTH) + + def format_findings(self, findings: List[dict], title: str = "Security Findings") -> str: + """Format security findings as JIRA-friendly text. + + Args: + findings: List of finding dictionaries + title: Title for the formatted content + + Returns: + Formatted text suitable for JIRA + """ + if not findings: + return f"h3. {title}\n\nNo findings to report." + + # Format using JIRA text formatting + output_lines = [f"h3. {title}", ""] + + for i, finding in enumerate(findings, 1): + component = self._extract_column_value(finding, 'component') or 'Unknown' + severity = self._extract_column_value(finding, 'severity') or 'Unknown' + title_text = self._extract_column_value(finding, 'title') or 'No description' + file_path = self._extract_column_value(finding, 'file') or '' + line_num = self._extract_column_value(finding, 'line') or '' + + # Format as JIRA-style text + location = f" in {{code}}{file_path}{{code}}" if file_path else "" + if line_num: + location += f" (line {line_num})" + + severity_color = self._get_severity_color(severity) + + output_lines.append(f"{i}. {severity_color}*{severity.upper()}*{severity_color} - {component}") + output_lines.append(f" {title_text}{location}") + output_lines.append("") + + content = "\n".join(output_lines) + return self.truncate_content(content) + + def format_table(self, headers: List[str], rows: List[List[Any]], title: str = "Results") -> str: + """Format tabular data as JIRA-friendly text. + + Args: + headers: Column headers + rows: Table rows as lists of values + title: Title for the formatted table + + Returns: + Formatted text suitable for JIRA + """ + if not rows: + return f"h3. {title}\n\nNo data to display." + + # Format as JIRA table + output_lines = [f"h3. {title}", ""] + + # JIRA table header + header_row = "|| " + " || ".join(headers) + " ||" + output_lines.append(header_row) + + # JIRA table rows + for row in rows: + sanitized_row = [self._sanitize_cell_for_jira(cell) for cell in row] + data_row = "| " + " | ".join(sanitized_row) + " |" + output_lines.append(data_row) + + output_lines.append("") + content = "\n".join(output_lines) + return self.truncate_content(content) + + def _sanitize_cell_for_jira(self, cell: Any) -> str: + """Sanitize table cell values for JIRA display. + + Args: + cell: The cell value to sanitize + + Returns: + Sanitized string suitable for JIRA table + """ + cell_str = self.sanitize_text(cell) + + # Escape JIRA special characters + cell_str = cell_str.replace("|", "\\|") + cell_str = cell_str.replace("\n", " ") + + # Limit cell length for readability + if len(cell_str) > 200: + cell_str = cell_str[:197] + "..." + + return cell_str + + def _get_severity_color(self, severity: str) -> str: + """Get JIRA color markup for severity level. + + Args: + severity: Severity level string + + Returns: + Appropriate JIRA color markup for severity + """ + severity_lower = str(severity).lower() + + if severity_lower in ['critical', 'high']: + return "{color:red}" + elif severity_lower in ['medium', 'moderate']: + return "{color:orange}" + elif severity_lower in ['low', 'info']: + return "{color:blue}" + else: + return "{color:gray}" \ No newline at end of file diff --git a/socket_basics/core/formatters/json.py b/socket_basics/core/formatters/json.py new file mode 100644 index 0000000..26405a7 --- /dev/null +++ b/socket_basics/core/formatters/json.py @@ -0,0 +1,64 @@ +""" +JSON formatter for Socket Security Basics. + +This module provides JSON formatting functionality, converting security findings +into structured JSON suitable for JSON-based notifications. +""" + +import json +from typing import Any, List +from .base import BaseFormatter + + +class JsonFormatter(BaseFormatter): + """Formatter for JSON-based content.""" + + def __init__(self): + super().__init__(max_content_length=None) # No limit for JSON + + def format_findings(self, findings: List[dict], title: str = "Security Findings") -> str: + """Format security findings as JSON. + + Args: + findings: List of finding dictionaries + title: Title for the formatted content + + Returns: + JSON string representation of findings + """ + output = { + "title": title, + "findings": findings, + "count": len(findings) + } + + return json.dumps(output, indent=2, ensure_ascii=False) + + def format_table(self, headers: List[str], rows: List[List[Any]], title: str = "Results") -> str: + """Format tabular data as JSON. + + Args: + headers: Column headers + rows: Table rows as lists of values + title: Title for the formatted table + + Returns: + JSON string representation of table data + """ + # Convert rows to list of dictionaries + table_data = [] + for row in rows: + row_dict = {} + for i, header in enumerate(headers): + value = row[i] if i < len(row) else "" + row_dict[header] = value + table_data.append(row_dict) + + output = { + "title": title, + "headers": headers, + "data": table_data, + "count": len(table_data) + } + + return json.dumps(output, indent=2, ensure_ascii=False) \ No newline at end of file diff --git a/socket_basics/core/formatters/markdown.py b/socket_basics/core/formatters/markdown.py new file mode 100644 index 0000000..9711db7 --- /dev/null +++ b/socket_basics/core/formatters/markdown.py @@ -0,0 +1,146 @@ +""" +GitHub-flavored markdown formatter for Socket Security Basics. + +This module provides markdown formatting functionality for GitHub PR comments, +converting security findings into markdown tables and sections. +""" + +from typing import Any, List +from .base import TableFormatter + +# GitHub comment limits +MAX_COMMENT_LENGTH = 60000 # Conservative limit (GitHub allows ~65KB) + + +class MarkdownFormatter(TableFormatter): + """Formatter for GitHub-flavored markdown content.""" + + def __init__(self): + super().__init__(max_content_length=MAX_COMMENT_LENGTH) + + def format_findings(self, findings: List[dict], title: str = "Security Findings") -> str: + """Format security findings as a markdown table. + + Args: + findings: List of finding dictionaries + title: Title for the formatted content + + Returns: + Formatted markdown table + """ + if not findings: + return f"## {title}\n\nNo findings to report." + + # Standard columns for security findings + columns = ['Component', 'Severity', 'Issue', 'File', 'Line'] + table_content = self.format_findings_as_table(findings, columns, title) + + return self.truncate_content(table_content) + + def format_table(self, headers: List[str], rows: List[List[Any]], title: str = "Results") -> str: + """Format tabular data as a markdown table. + + Args: + headers: Column headers + rows: Table rows as lists of values + title: Title for the formatted table + + Returns: + Formatted markdown table + """ + if not rows: + return f"## {title}\n\nNo data to display." + + # Sanitize cells for markdown + sanitized_rows = [] + for row in rows: + sanitized_row = [self._sanitize_cell_for_markdown(cell) for cell in row] + sanitized_rows.append(sanitized_row) + + # Build markdown table + output_lines = [] + + # Title as H2 + output_lines.append(f"## {title}") + output_lines.append("") + + # Header row + header_row = "| " + " | ".join(headers) + " |" + output_lines.append(header_row) + + # Header separator + separator = "|" + "|".join([" --- " for _ in headers]) + "|" + output_lines.append(separator) + + # Data rows + for row in sanitized_rows: + data_row = "| " + " | ".join(str(cell) for cell in row) + " |" + output_lines.append(data_row) + + return "\n".join(output_lines) + + def _sanitize_cell_for_markdown(self, cell: Any) -> str: + """Sanitize table cell values for markdown display. + + Args: + cell: The cell value to sanitize + + Returns: + Sanitized string suitable for markdown table + """ + cell_str = self.sanitize_text(cell) + + # Escape markdown special characters in table cells + cell_str = cell_str.replace("|", "\\|") + cell_str = cell_str.replace("\n", "
") + + # Limit cell length for readability + if len(cell_str) > 200: + cell_str = cell_str[:197] + "..." + + return cell_str + + +def build_markdown_from_notifications(notifications: List[dict]) -> str: + """Build GitHub-flavored markdown from notification data. + + Args: + notifications: List of dicts with 'title' and 'content' keys + + Returns: + Complete markdown string with sections separated by horizontal rules + """ + if not notifications: + return "No security findings to report." + + markdown_sections = [] + for item in notifications: + if not isinstance(item, dict): + continue + + title = item.get('title', 'Untitled') + content = item.get('content', 'No content') + + # Format as markdown section with H3 header + markdown_sections.append(f"### {title}\n\n{content}") + + return '\n\n---\n\n'.join(markdown_sections) + + +def create_pr_comment_body(markdown_content: str, uid: str, footer: str = "Generated by Socket Security") -> str: + """Create a complete PR comment body with marker and footer. + + Args: + markdown_content: Main markdown content + uid: Unique identifier for comment tracking + footer: Footer text + + Returns: + Complete PR comment body with tracking markers + """ + return f""" +{markdown_content} + +--- +*{footer}* +""" \ No newline at end of file diff --git a/socket_basics/core/formatters/sentinel.py b/socket_basics/core/formatters/sentinel.py new file mode 100644 index 0000000..f907a3a --- /dev/null +++ b/socket_basics/core/formatters/sentinel.py @@ -0,0 +1,100 @@ +""" +Microsoft Sentinel log formatter for Socket Security Basics. + +This module provides log formatting for Microsoft Sentinel/Azure Log Analytics, +converting security findings into structured log entries. +""" + +import json +from datetime import datetime +from typing import Any, List +from .base import TableFormatter + + +class SentinelFormatter(TableFormatter): + """Formatter for Microsoft Sentinel log events.""" + + def __init__(self): + super().__init__(max_content_length=None) # No strict limit for logs + + def format_findings(self, findings: List[dict], title: str = "Security Findings") -> str: + """Format security findings as Sentinel log events. + + Args: + findings: List of finding dictionaries + title: Title for the formatted content + + Returns: + JSON log entries suitable for Sentinel + """ + if not findings: + empty_event = { + "TimeGenerated": datetime.utcnow().isoformat() + "Z", + "EventType": "SecurityScan", + "Category": title, + "Message": "No findings to report", + "FindingsCount": 0 + } + return json.dumps(empty_event, indent=2) + + # Create individual log events for each finding + log_events = [] + for finding in findings: + event = { + "TimeGenerated": datetime.utcnow().isoformat() + "Z", + "EventType": "SecurityFinding", + "Category": title, + "Component": self._extract_column_value(finding, 'component') or 'Unknown', + "Severity": self._extract_column_value(finding, 'severity') or 'Unknown', + "Title": self._extract_column_value(finding, 'title') or 'No description', + "FilePath": self._extract_column_value(finding, 'file') or '', + "LineNumber": self._extract_column_value(finding, 'line') or '', + "RawFinding": finding + } + log_events.append(event) + + # Return as newline-delimited JSON (NDJSON) for log ingestion + return "\n".join(json.dumps(event, ensure_ascii=False) for event in log_events) + + def format_table(self, headers: List[str], rows: List[List[Any]], title: str = "Results") -> str: + """Format tabular data as Sentinel log events. + + Args: + headers: Column headers + rows: Table rows as lists of values + title: Title for the formatted table + + Returns: + JSON log entries suitable for Sentinel + """ + if not rows: + empty_event = { + "TimeGenerated": datetime.utcnow().isoformat() + "Z", + "EventType": "SecurityScan", + "Category": title, + "Message": "No data to display", + "RowCount": 0 + } + return json.dumps(empty_event, indent=2) + + # Create log events for table data + log_events = [] + for i, row in enumerate(rows): + event = { + "TimeGenerated": datetime.utcnow().isoformat() + "Z", + "EventType": "SecurityTableRow", + "Category": title, + "RowIndex": i + 1 + } + + # Add column data as event properties + for j, header in enumerate(headers): + if j < len(row): + # Clean header name for property key + prop_key = header.replace(" ", "").replace("-", "").replace("_", "") + event[prop_key] = self.sanitize_text(row[j]) + + log_events.append(event) + + # Return as newline-delimited JSON (NDJSON) + return "\n".join(json.dumps(event, ensure_ascii=False) for event in log_events) \ No newline at end of file diff --git a/socket_basics/core/formatters/slack.py b/socket_basics/core/formatters/slack.py new file mode 100644 index 0000000..8425ac4 --- /dev/null +++ b/socket_basics/core/formatters/slack.py @@ -0,0 +1,108 @@ +""" +Slack text formatter for Socket Security Basics. + +This module provides text formatting for Slack notifications, converting +security findings into readable text format suitable for Slack messages. +Note: This converts findings to text rather than Slack Block Kit format. +""" + +from typing import Any, List +from .base import TableFormatter + +# Slack limits +MAX_SLACK_MESSAGE_SIZE = 40000 # Conservative limit for Slack messages + + +class SlackFormatter(TableFormatter): + """Formatter for Slack text messages.""" + + def __init__(self): + super().__init__(max_content_length=MAX_SLACK_MESSAGE_SIZE) + + def format_findings(self, findings: List[dict], title: str = "Security Findings") -> str: + """Format security findings as Slack-friendly text. + + Args: + findings: List of finding dictionaries + title: Title for the formatted content + + Returns: + Formatted text suitable for Slack + """ + if not findings: + return f"*{title}*\nNo findings to report." + + # Format as text blocks rather than table + output_lines = [f"*{title}*", ""] + + for i, finding in enumerate(findings, 1): + component = self._extract_column_value(finding, 'component') or 'Unknown' + severity = self._extract_column_value(finding, 'severity') or 'Unknown' + title_text = self._extract_column_value(finding, 'title') or 'No description' + file_path = self._extract_column_value(finding, 'file') or '' + line_num = self._extract_column_value(finding, 'line') or '' + + # Format as Slack-style message + location = f" in `{file_path}`" if file_path else "" + if line_num: + location += f" (line {line_num})" + + severity_emoji = self._get_severity_emoji(severity) + + output_lines.append(f"{i}. {severity_emoji} *{severity.upper()}* - {component}") + output_lines.append(f" {title_text}{location}") + output_lines.append("") + + content = "\n".join(output_lines) + return self.truncate_content(content) + + def format_table(self, headers: List[str], rows: List[List[Any]], title: str = "Results") -> str: + """Format tabular data as Slack-friendly text. + + Args: + headers: Column headers + rows: Table rows as lists of values + title: Title for the formatted table + + Returns: + Formatted text suitable for Slack + """ + if not rows: + return f"*{title}*\nNo data to display." + + output_lines = [f"*{title}*", ""] + + # Format as list items rather than table + for i, row in enumerate(rows, 1): + row_parts = [] + for j, cell in enumerate(row): + if j < len(headers): + header = headers[j] + cell_str = self.sanitize_text(cell) + row_parts.append(f"*{header}:* {cell_str}") + + output_lines.append(f"{i}. {' | '.join(row_parts)}") + + output_lines.append("") + content = "\n".join(output_lines) + return self.truncate_content(content) + + def _get_severity_emoji(self, severity: str) -> str: + """Get emoji for severity level. + + Args: + severity: Severity level string + + Returns: + Appropriate emoji for severity + """ + severity_lower = str(severity).lower() + + if severity_lower in ['critical', 'high']: + return "🚨" + elif severity_lower in ['medium', 'moderate']: + return "⚠️" + elif severity_lower in ['low', 'info']: + return "ℹ️" + else: + return "🔍" \ No newline at end of file diff --git a/socket_basics/core/formatters/sumologic.py b/socket_basics/core/formatters/sumologic.py new file mode 100644 index 0000000..59e68e2 --- /dev/null +++ b/socket_basics/core/formatters/sumologic.py @@ -0,0 +1,126 @@ +""" +SumoLogic log formatter for Socket Security Basics. + +This module provides log formatting for SumoLogic, converting security findings +into structured log entries suitable for SumoLogic ingestion. +""" + +import json +from datetime import datetime +from typing import Any, List +from .base import TableFormatter + + +class SumologicFormatter(TableFormatter): + """Formatter for SumoLogic log events.""" + + def __init__(self): + super().__init__(max_content_length=None) # No strict limit for logs + + def format_findings(self, findings: List[dict], title: str = "Security Findings") -> str: + """Format security findings as SumoLogic log events. + + Args: + findings: List of finding dictionaries + title: Title for the formatted content + + Returns: + JSON log entries suitable for SumoLogic + """ + if not findings: + empty_event = { + "timestamp": datetime.utcnow().isoformat() + "Z", + "level": "INFO", + "source": "socket-security", + "category": title, + "message": "No findings to report", + "findings_count": 0 + } + return json.dumps(empty_event, indent=2) + + # Create individual log events for each finding + log_events = [] + for finding in findings: + event = { + "timestamp": datetime.utcnow().isoformat() + "Z", + "level": self._get_log_level(self._extract_column_value(finding, 'severity')), + "source": "socket-security", + "category": title, + "component": self._extract_column_value(finding, 'component') or 'Unknown', + "severity": self._extract_column_value(finding, 'severity') or 'Unknown', + "title": self._extract_column_value(finding, 'title') or 'No description', + "file_path": self._extract_column_value(finding, 'file') or '', + "line_number": self._extract_column_value(finding, 'line') or '', + "raw_finding": finding + } + log_events.append(event) + + # Return as newline-delimited JSON (NDJSON) for log ingestion + return "\n".join(json.dumps(event, ensure_ascii=False) for event in log_events) + + def format_table(self, headers: List[str], rows: List[List[Any]], title: str = "Results") -> str: + """Format tabular data as SumoLogic log events. + + Args: + headers: Column headers + rows: Table rows as lists of values + title: Title for the formatted table + + Returns: + JSON log entries suitable for SumoLogic + """ + if not rows: + empty_event = { + "timestamp": datetime.utcnow().isoformat() + "Z", + "level": "INFO", + "source": "socket-security", + "category": title, + "message": "No data to display", + "row_count": 0 + } + return json.dumps(empty_event, indent=2) + + # Create log events for table data + log_events = [] + for i, row in enumerate(rows): + event = { + "timestamp": datetime.utcnow().isoformat() + "Z", + "level": "INFO", + "source": "socket-security", + "category": title, + "row_index": i + 1 + } + + # Add column data as event properties + for j, header in enumerate(headers): + if j < len(row): + # Clean header name for property key + prop_key = header.lower().replace(" ", "_").replace("-", "_") + event[prop_key] = self.sanitize_text(row[j]) + + log_events.append(event) + + # Return as newline-delimited JSON (NDJSON) + return "\n".join(json.dumps(event, ensure_ascii=False) for event in log_events) + + def _get_log_level(self, severity: str) -> str: + """Convert security severity to log level. + + Args: + severity: Security severity level + + Returns: + Appropriate log level for SumoLogic + """ + severity_lower = str(severity).lower() + + if severity_lower in ['critical']: + return "FATAL" + elif severity_lower in ['high']: + return "ERROR" + elif severity_lower in ['medium', 'moderate']: + return "WARN" + elif severity_lower in ['low', 'info']: + return "INFO" + else: + return "DEBUG" \ No newline at end of file diff --git a/socket_basics/core/formatters/teams.py b/socket_basics/core/formatters/teams.py new file mode 100644 index 0000000..fe9501d --- /dev/null +++ b/socket_basics/core/formatters/teams.py @@ -0,0 +1,108 @@ +""" +Microsoft Teams text formatter for Socket Security Basics. + +This module provides text formatting for Microsoft Teams notifications, +converting security findings into readable text format suitable for Teams messages. +Note: This converts findings to text rather than MessageCard format. +""" + +from typing import Any, List +from .base import TableFormatter + +# Teams limits (similar to Slack) +MAX_TEAMS_MESSAGE_SIZE = 28000 # Conservative limit for Teams messages + + +class TeamsFormatter(TableFormatter): + """Formatter for Microsoft Teams text messages.""" + + def __init__(self): + super().__init__(max_content_length=MAX_TEAMS_MESSAGE_SIZE) + + def format_findings(self, findings: List[dict], title: str = "Security Findings") -> str: + """Format security findings as Teams-friendly text. + + Args: + findings: List of finding dictionaries + title: Title for the formatted content + + Returns: + Formatted text suitable for Teams + """ + if not findings: + return f"**{title}**\n\nNo findings to report." + + # Format as text blocks + output_lines = [f"**{title}**", ""] + + for i, finding in enumerate(findings, 1): + component = self._extract_column_value(finding, 'component') or 'Unknown' + severity = self._extract_column_value(finding, 'severity') or 'Unknown' + title_text = self._extract_column_value(finding, 'title') or 'No description' + file_path = self._extract_column_value(finding, 'file') or '' + line_num = self._extract_column_value(finding, 'line') or '' + + # Format as Teams-style message + location = f" in `{file_path}`" if file_path else "" + if line_num: + location += f" (line {line_num})" + + severity_icon = self._get_severity_icon(severity) + + output_lines.append(f"{i}. {severity_icon} **{severity.upper()}** - {component}") + output_lines.append(f" {title_text}{location}") + output_lines.append("") + + content = "\n".join(output_lines) + return self.truncate_content(content) + + def format_table(self, headers: List[str], rows: List[List[Any]], title: str = "Results") -> str: + """Format tabular data as Teams-friendly text. + + Args: + headers: Column headers + rows: Table rows as lists of values + title: Title for the formatted table + + Returns: + Formatted text suitable for Teams + """ + if not rows: + return f"**{title}**\n\nNo data to display." + + output_lines = [f"**{title}**", ""] + + # Format as list items + for i, row in enumerate(rows, 1): + row_parts = [] + for j, cell in enumerate(row): + if j < len(headers): + header = headers[j] + cell_str = self.sanitize_text(cell) + row_parts.append(f"**{header}:** {cell_str}") + + output_lines.append(f"{i}. {' | '.join(row_parts)}") + + output_lines.append("") + content = "\n".join(output_lines) + return self.truncate_content(content) + + def _get_severity_icon(self, severity: str) -> str: + """Get icon for severity level. + + Args: + severity: Severity level string + + Returns: + Appropriate icon for severity + """ + severity_lower = str(severity).lower() + + if severity_lower in ['critical', 'high']: + return "🚨" + elif severity_lower in ['medium', 'moderate']: + return "⚠️" + elif severity_lower in ['low', 'info']: + return "ℹ️" + else: + return "🔍" \ No newline at end of file diff --git a/socket_basics/core/formatters/webhook.py b/socket_basics/core/formatters/webhook.py new file mode 100644 index 0000000..4bdc5ec --- /dev/null +++ b/socket_basics/core/formatters/webhook.py @@ -0,0 +1,102 @@ +""" +Webhook payload formatter for Socket Security Basics. + +This module provides JSON formatting for webhook notifications, converting +security findings into structured payloads suitable for webhook endpoints. +""" + +import json +from datetime import datetime +from typing import Any, List +from .base import TableFormatter + + +class WebhookFormatter(TableFormatter): + """Formatter for webhook JSON payloads.""" + + def __init__(self): + super().__init__(max_content_length=None) # No strict limit for webhooks + + def format_findings(self, findings: List[dict], title: str = "Security Findings") -> str: + """Format security findings as webhook payload. + + Args: + findings: List of finding dictionaries + title: Title for the formatted content + + Returns: + JSON payload suitable for webhook endpoints + """ + payload = { + "timestamp": datetime.utcnow().isoformat() + "Z", + "event_type": "security_scan_completed", + "title": title, + "summary": { + "total_findings": len(findings), + "severities": self._count_severities(findings) + }, + "findings": [] + } + + # Format each finding + for finding in findings: + formatted_finding = { + "component": self._extract_column_value(finding, 'component') or 'Unknown', + "severity": self._extract_column_value(finding, 'severity') or 'Unknown', + "title": self._extract_column_value(finding, 'title') or 'No description', + "file_path": self._extract_column_value(finding, 'file') or '', + "line_number": self._extract_column_value(finding, 'line') or '', + "raw_data": finding + } + payload["findings"].append(formatted_finding) + + return json.dumps(payload, indent=2, ensure_ascii=False) + + def format_table(self, headers: List[str], rows: List[List[Any]], title: str = "Results") -> str: + """Format tabular data as webhook payload. + + Args: + headers: Column headers + rows: Table rows as lists of values + title: Title for the formatted table + + Returns: + JSON payload suitable for webhook endpoints + """ + payload = { + "timestamp": datetime.utcnow().isoformat() + "Z", + "event_type": "security_table_data", + "title": title, + "summary": { + "total_rows": len(rows), + "columns": headers + }, + "data": [] + } + + # Convert rows to list of dictionaries + for row in rows: + row_dict = {} + for i, header in enumerate(headers): + value = self.sanitize_text(row[i]) if i < len(row) else "" + row_dict[header] = value + payload["data"].append(row_dict) + + return json.dumps(payload, indent=2, ensure_ascii=False) + + def _count_severities(self, findings: List[dict]) -> dict: + """Count findings by severity level. + + Args: + findings: List of finding dictionaries + + Returns: + Dictionary with severity counts + """ + severity_counts = {} + for finding in findings: + severity = self._extract_column_value(finding, 'severity') or 'Unknown' + severity_lower = str(severity).lower() + severity_counts[severity_lower] = severity_counts.get(severity_lower, 0) + 1 + + return severity_counts \ No newline at end of file diff --git a/socket_basics/core/notification/console.py b/socket_basics/core/notification/console.py index 9d9744b..94d07c0 100644 --- a/socket_basics/core/notification/console.py +++ b/socket_basics/core/notification/console.py @@ -10,298 +10,36 @@ class ConsoleNotifier(BaseNotifier): name = "console" - def _preview_snippet(self, text: str, max_lines: int = 2, max_chars: int = 140) -> str: - """Return a compact single-line preview for potentially multi-line snippets. - - - Collapse consecutive whitespace and newlines. - - Join up to `max_lines` with a visible separator and append ellipsis if truncated. - - Truncate to `max_chars` characters with ellipsis. - """ - if not isinstance(text, str) or not text: - return text or "" - - # Split into logical lines and trim - lines = [l.strip() for l in text.splitlines() if l.strip()] - if not lines: - return "" - - preview_lines = lines[:max_lines] - preview = " ⏎ ".join(preview_lines) - if len(lines) > max_lines: - preview = preview + " ⏎ ..." - - # Collapse repeated whitespace inside the preview - preview = " ".join(preview.split()) - - if len(preview) > max_chars: - return preview[: max_chars - 3] + "..." - return preview - def _sanitize_cell(self, cell: Any) -> Any: """Normalize table cell values: shorten long strings and collapse newlines.""" - # If connector provided a raw match-group structure (list of frames), - # try to convert it into a human-readable nested arrow string using - # the SocketTier1Scanner formatter. This is defensive: some connectors - # historically returned structured objects which tabulate would print - # as raw Python reprs. - if not isinstance(cell, str) and isinstance(cell, list): - try: - # local import to avoid circular module-level dependencies - from socket_basics.core.connector.socket_tier1.scanner import SocketTier1Scanner - - formatted = SocketTier1Scanner(config=None)._format_match_groups(cell) - if formatted: - cell = "```\n" + formatted + "\n```" - else: - cell = str(cell) - except Exception: - # Fall back to stringifying the cell - try: - cell = str(cell) - except Exception: - pass - - if isinstance(cell, str): - # If it contains newlines or is long, create a preview - if "\n" in cell or len(cell) > 200: - return self._preview_snippet(cell) - # Otherwise collapse excessive internal whitespace - return " ".join(cell.split()) - return cell - - def _format_location(self, loc: Dict[str, Any]) -> str: - if not loc: - return "-" - path = loc.get("path", "-") - line = loc.get("line") - if line is not None: - return f"{path}:{line}" - return path + if not isinstance(cell, str): + return str(cell) if cell is not None else "" + + # Truncate very long strings + if len(cell) > 200: + return cell[:197] + "..." + + # Replace newlines with spaces for table display + return " ".join(cell.split()) def notify(self, facts: Dict[str, Any]) -> None: - components = facts.get("components", []) - - # Diagnostic: log the notifications payload to help debug why console - # might be falling back to the 'ALL FINDINGS' table. - try: - notif_payload = facts.get('notifications') - if notif_payload is not None: - try: - # keep debug concise - logger_payload = {k: (type(v).__name__) for k, v in (notif_payload.items() if isinstance(notif_payload, dict) else [])} - except Exception: - logger_payload = str(type(notif_payload)) - try: - import logging - - logging.getLogger(__name__).debug('ConsoleNotifier received facts.notifications: %s', logger_payload) - except Exception: - pass - except Exception: - pass - - # Console notifier should not decide which severities to show; the - # NotificationManager is responsible for filtering notifications by - # severity. Notifier only formats and presents whatever is attached - # to `facts['notifications']` or raw `facts['components']`. - - # If global consolidated console output is enabled, show a single table for all components - consolidated = False - try: - app_cfg = getattr(self, 'app_config', {}) or {} - # Only enable consolidated tabular output when console_tabular_enabled is explicitly set - consolidated = bool(app_cfg.get('console_tabular_enabled')) - except Exception: - consolidated = False - - # Only show tabular consolidated output when explicitly requested - if consolidated: - # If connectors provided pre-built notification rows, use them per category - notifications = facts.get('notifications', {}) or {} - - # mapping of connector -> (display name, headers) (reserved for future use) - categories = { - 'opengrep': ('SAST', ['Rule', 'File', 'Location', 'Lines', 'Snippet']), - 'trufflehog': ('Secret Scanning', ['Detector', 'Severity', 'File', 'Line', 'Redacted']), - 'trivy': ('Image/Dockerfile', ['Title', 'Severity', 'Image', 'Package']) - } - - printed_any = False - - # If the connector produced socket_tier1 data, prefer the - # connector-provided notifications when present. Connectors may - # now supply the canonical list-of-table-dicts format or the older - # dict-mapping format. Normalize either into an internal mapping - # of {title -> {headers, rows}} for consistent processing below. - provided = facts.get('notifications', {}) or {} - - # Normalize list-of-table-dicts into mapping {title: {headers, rows}} - normalized: Dict[str, Dict[str, Any]] = {} - try: - if isinstance(provided, dict): - normalized = provided - elif isinstance(provided, list): - for item in provided: - if not isinstance(item, dict): - continue - title = item.get('title') or 'results' - headers = item.get('headers') - rows = item.get('rows') or [] - normalized.setdefault(title, {'headers': headers, 'rows': []}) - if rows: - normalized[title]['rows'].extend(rows) - except Exception: - normalized = provided if isinstance(provided, dict) else {} - - if 'socket_tier1' in facts: - # Prefer the connector-provided normalized payload when present - if normalized.get('Socket Tier 1 Reachability'): - notifications = normalized + # New simplified format: expect notifications to be a list of {title, content} dicts + notifications = facts.get('notifications', []) or [] + + if isinstance(notifications, list) and notifications: + # Display each notification separately + for item in notifications: + if isinstance(item, dict) and 'title' in item and 'content' in item: + title = item['title'] + content = item['content'] + + print(f"\n{title.upper()}") + print("-" * len(title)) + print(content) + print() else: - try: - from socket_basics.core.connector.socket_tier1.scanner import SocketTier1Scanner - rows_from_scanner = SocketTier1Scanner(config=None).notification_rows(facts) - if rows_from_scanner: - # rows_from_scanner expected to be list of table dicts - for item in rows_from_scanner: - if isinstance(item, dict) and 'title' in item and 'rows' in item: - title = item.get('title') - headers = item.get('headers') - rows = item.get('rows') or [] - normalized.setdefault(title, {'headers': headers, 'rows': []}) - normalized[title]['rows'].extend(rows) - notifications = normalized - else: - notifications = normalized or {} - except Exception: - notifications = normalized or {} - else: - notifications = normalized or {} - - # Iterate and print all notification groups (connector-provided or otherwise) - for group_label, payload in (notifications.items() if isinstance(notifications, dict) else []): - if not payload: - continue - # payload must be dict with 'headers' and 'rows' per Manager contract - if not isinstance(payload, dict) or 'rows' not in payload: - logger.warning('ConsoleNotifier: skipping notification group %s due to unexpected payload shape', group_label) - continue - headers = payload.get('headers') - rows = payload.get('rows') or [] - - # Require connector-provided headers; do not infer or override - if not headers or not isinstance(headers, list): - logger.warning('ConsoleNotifier: skipping notification group %s because headers missing or invalid; Manager should filter these', group_label) - continue - - display = group_label - display_headers = headers - - # Sanitize rows for printing - sanitized_input_rows = [] - for r in rows: - # If headers indicate SAST-like shape or group label suggests SAST, - # try to map common legacy shapes into a reasonable presentation. - sanitized_input_rows.append(r if isinstance(r, (list, tuple)) else [str(r)]) - - sanitized_rows = [[self._sanitize_cell(cell) for cell in row] for row in sanitized_input_rows] - print(display.upper()) - print(tabulate(sanitized_rows, headers=display_headers, tablefmt='github')) - print() - printed_any = True - - if printed_any: - return - - # If `facts['notifications']` existed but was empty (no groups), do not - # fall back to printing ALL FINDINGS; this likely indicates connectors - # intentionally suppressed notifications for current severity filter. - if notif_payload is not None and (not normalized): - # Nothing to print and notifications were intentionally empty - return - - # If no connector-specific rows were provided, fallback to grouped tables by inferred tool - rows: List[List[str]] = [] - for c in components: - comp_name = c.get('name') or c.get('id') or '-' - for a in c.get('alerts', []): - path = comp_name or a.get('location', {}).get('path', '-') - sev = a.get('severity', '') - msg = a.get('message') or a.get('title') or a.get('description', '') - loc_str = self._format_location(a.get('location', {}) or {}) - rows.append([path, sev, self._sanitize_cell(msg), loc_str]) - - if rows: - print("ALL FINDINGS") - print(tabulate(rows, headers=["File", "Severity", "Message", "Location"], tablefmt="github")) - return - - # summary - total_components = len(components) - total_alerts = 0 - per_type: Dict[str, int] = {} - for c in components: - alerts = c.get("alerts", []) - total_alerts += len(alerts) - for a in alerts: - # Prefer connector qualifier or alert props.tool over the generic alert type - t = ( - c.get('qualifiers', {}).get('scanner') or - a.get('props', {}).get('tool') or - a.get('type', 'unknown') - ) - per_type[t] = per_type.get(t, 0) + 1 - - print("Socket Basics Scan Summary") - print("--------------------------") - print(f"Components: {total_components}") - print(f"Total alerts: {total_alerts}") - for t, cnt in per_type.items(): - print(f" - {t}: {cnt}") - print() - - # Group by connector/tool using component qualifiers or alert props when available - grouped: Dict[str, List[List[str]]] = {} - for c in components: - # Determine logical tool/scanner name - # Prefer explicit qualifiers (scanner) when present so components typed as 'generic' - # but qualified as sast/secrets/dockerfile/image are grouped correctly. - tool = (c.get('qualifiers', {}) or {}).get('scanner') or c.get('tool') or c.get('source') or c.get('name') or 'unknown' - # Note: above uses qualifiers first. We'll fallback per-alert if needed. - alerts = c.get('alerts', []) - for a in alerts: - # Per-alert override if component-level scanner missing - alert_tool = ( - c.get('qualifiers', {}).get('scanner') or - a.get('props', {}).get('tool') or - a.get('type') or - c.get('type') or - c.get('name') or - 'unknown' - ) - path = c.get('name') or a.get('location', {}).get('path', "-") - sev = a.get('severity', '') - # Prefer title/description/message - msg = a.get('message') or a.get('title') or a.get('description', '') - loc_str = self._format_location(a.get('location', {}) or {}) - grouped.setdefault(alert_tool, []).append([path, sev, self._sanitize_cell(msg), loc_str]) - - # Friendly display mapping for known scanner keys - display_map = { - 'sast': 'SAST', - 'secret': 'SECRET SCANNING', - 'secrets': 'SECRET SCANNING', - 'trufflehog': 'SECRET SCANNING', - 'trivy': 'IMAGE/DOCKERFILE', - 'dockerfile': 'DOCKERFILE', - 'image': 'IMAGE' - } - - for tool_key in sorted(grouped.keys()): - rows = grouped[tool_key] - if not rows: - continue - display = display_map.get(str(tool_key).lower(), str(tool_key).upper()) - print(display) - print(tabulate(rows, headers=["File", "Severity", "Message", "Location"], tablefmt="github")) - print() + logger.warning("ConsoleNotifier: skipping invalid notification item: %s", type(item)) + return + + # No console data available + logger.debug("No console notifications found") \ No newline at end of file diff --git a/socket_basics/core/notification/github_pr_notifier.py b/socket_basics/core/notification/github_pr_notifier.py index 7bce4ae..2e91e58 100644 --- a/socket_basics/core/notification/github_pr_notifier.py +++ b/socket_basics/core/notification/github_pr_notifier.py @@ -1,508 +1,308 @@ -import json +from typing import Any, Dict, List, Optional import logging -import os -import re -import subprocess -from typing import Any, Dict, List, Optional, Tuple -from .base import BaseNotifier +from socket_basics.core.notification.base import BaseNotifier +from socket_basics.core.config import get_github_token, get_github_repository, get_github_pr_number logger = logging.getLogger(__name__) class GithubPRNotifier(BaseNotifier): - """Post per-group comments to a GitHub PR. Works in GH Actions (GITHUB_* envs) - or locally/CI by using git to discover repo and branch. Uses GITHUB_TOKEN - (or INPUT_GITHUB_TOKEN) to authenticate. Attempts to find an open PR for - the branch; if found, it will fetch existing comments and update them per - group. If a previous comment exists, it will merge items using checkboxes - and mark resolved items as checked. + """GitHub PR notifier: posts security findings as PR comments. + + Simplified version that works with pre-formatted content from connectors. """ name = "github_pr" def __init__(self, params: Dict[str, Any] | None = None): super().__init__(params or {}) + # GitHub token from params, env variable, or app config self.token = ( - self.config.get('token') - or os.getenv('GITHUB_TOKEN') - or os.getenv('INPUT_GITHUB_TOKEN') + self.config.get('token') or + get_github_token() + ) + self.api_base = "https://api.github.com" + + # Get repository from GitHub environment + self.repository = ( + self.config.get('repository') or + get_github_repository() ) - # Helpers to discover repo, branch, and PR number - def _discover_repo(self, facts: Dict[str, Any]) -> Optional[str]: - # priority: GitHub event/env, git remote, facts (facts often come from --workspace) - # Prefer explicit GitHub environment or event payload first - repo = os.getenv('GITHUB_REPOSITORY') - try: - # Prefer structured event file path - event_path = os.getenv('GITHUB_EVENT_PATH') - ev = None - if event_path and os.path.exists(event_path): - with open(event_path, 'r') as fh: - ev = json.load(fh) - else: - # Also allow inline JSON in GITHUB_EVENT - ev_raw = os.getenv('GITHUB_EVENT') - if ev_raw: - try: - ev = json.loads(ev_raw) - except Exception: - ev = None + def notify(self, facts: Dict[str, Any]) -> None: + notifications = facts.get('notifications', []) or [] + + if not isinstance(notifications, list): + logger.error('GithubPRNotifier: only supports new format - list of dicts with title/content') + return + + if not notifications: + logger.info('GithubPRNotifier: no notifications present; skipping') + return - if ev: - pr = ev.get('pull_request') or ev.get('pullRequest') - if pr: - head = pr.get('head') or {} - repo_info = head.get('repo') or pr.get('base', {}).get('repo') - if repo_info and repo_info.get('full_name'): - repo = repo_info.get('full_name') - except Exception: - pass + # Get full scan URL if available + full_scan_url = facts.get('full_scan_html_url') + + # Validate format + valid_notifications = [] + for item in notifications: + if isinstance(item, dict) and 'title' in item and 'content' in item: + # Append full scan URL to content if available + content = item['content'] + if full_scan_url: + content += f"\n\n---\n\n🔗 [View complete scan results]({full_scan_url})\n" + item = {'title': item['title'], 'content': content} + valid_notifications.append(item) + else: + logger.warning('GithubPRNotifier: skipping invalid notification item: %s', type(item)) + + if not valid_notifications: + return - # If not found via env/event, try local git remote - if not repo: - try: - url = subprocess.check_output(['git', 'config', '--get', 'remote.origin.url'], text=True).strip() - if url.endswith('.git'): - url = url[:-4] - if url.startswith('git@'): - repo = url.split(':', 1)[1] + # Get PR number for current branch + pr_number = self._get_pr_number() + if not pr_number: + logger.warning('GithubPRNotifier: unable to determine PR number for current branch') + return + + # Get existing comments to check for sections to update + existing_comments = self._get_pr_comments(pr_number) + + # Group notifications by comment (find existing sections) + comment_updates = {} + new_sections = [] + + for notification in valid_notifications: + content = notification['content'] + section_match = self._extract_section_markers(content) + + if section_match: + section_type = section_match['type'] + section_content = section_match['content'] + + # Find existing comment with this section + existing_comment = self._find_comment_with_section(existing_comments, section_type) + + if existing_comment: + # Update existing comment + comment_id = existing_comment['id'] + if comment_id not in comment_updates: + comment_updates[comment_id] = existing_comment['body'] + comment_updates[comment_id] = self._update_section_in_comment( + comment_updates[comment_id], section_type, content + ) else: - parts = url.rstrip('/').split('/') - if len(parts) >= 2: - repo = f"{parts[-2]}/{parts[-1]}" - except Exception: - pass - - # Finally, fall back to facts (often populated from --workspace) - if not repo: - repo = facts.get('repository') or None - - return repo - - def _discover_branch(self, facts: Dict[str, Any]) -> Optional[str]: - # Prefer GitHub env or event payload first, then local git, then facts - branch = os.getenv('GITHUB_REF') or os.getenv('GITHUB_HEAD_REF') - try: - event_path = os.getenv('GITHUB_EVENT_PATH') - ev = None - if event_path and os.path.exists(event_path): - with open(event_path, 'r') as fh: - ev = json.load(fh) + # New section to add + new_sections.append(content) else: - ev_raw = os.getenv('GITHUB_EVENT') - if ev_raw: - try: - ev = json.loads(ev_raw) - except Exception: - ev = None - - if ev: - pr = ev.get('pull_request') or ev.get('pullRequest') - if pr: - head = pr.get('head') or {} - branch = head.get('ref') or ev.get('ref') or branch - except Exception: - pass - - # If not available from env/event, try local git - if not branch: - try: - branch = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD'], text=True).strip() - except Exception: - branch = None - - # Finally, fall back to facts (often populated from --workspace) - if not branch: - branch = facts.get('branch') or None - - if branch and branch.startswith('refs/heads/'): - branch = branch.split('refs/heads/')[-1] - - return branch - - def _api_headers(self) -> Dict[str, str]: - headers = {'Accept': 'application/vnd.github+json'} - if self.token: - headers['Authorization'] = f"token {self.token}" - return headers - - def _api_base(self) -> str: - # Allow overriding API base for GH Enterprise or custom endpoints. - # Priority: config, INPUT_GITHUB_API_URL, GITHUB_API_URL, GITHUB_SERVER_URL, default api.github.com - api = self.config.get('api_base') or os.getenv('INPUT_GITHUB_API_URL') or os.getenv('GITHUB_API_URL') or os.getenv('GITHUB_SERVER_URL') - if api: - # normalize to host only or full url - api = api.rstrip('/') - if not api.startswith('http'): - api = f"https://{api}" - else: - api = 'https://api.github.com' - return api - - def _split_owner_repo(self, owner_repo: str) -> Tuple[Optional[str], Optional[str]]: - """Safely split an owner/repo string into (owner, repo). - - Returns (None, None) if the format is invalid. - """ - try: - owner, repo = owner_repo.split('/') - return owner, repo - except Exception: - return None, None - - def _render_location_and_snippet(self, repo_rel: str, ref: str, start: str, end: str, snippet: str) -> List[str]: - """Return lines for the location and optional snippet in the requested format. - - Output example: - " * location: [path/to/file](relative_link) - `start`-`end`" - - ``` - CODE - ``` - """ - out: List[str] = [] - start_s = str(start) if start is not None else '' - end_s = str(end) if end is not None else '' - if start_s and end_s: - display_range = f"`{start_s}`-`{end_s}`" - elif start_s: - display_range = f"`{start_s}`-`{start_s}`" - else: - display_range = '' - - anchor = '' - if start_s and end_s: - anchor = f"L{start_s}-L{end_s}" - elif start_s: - anchor = f"L{start_s}" - - if anchor: - rel = f"/blob/{ref}/{repo_rel}#{anchor}" - if display_range: - out.append(f" * location: [{repo_rel}]({rel}) - {display_range}") + # No section markers, treat as new section + new_sections.append(content) + + # Update existing comments with new section content + for comment_id, updated_body in comment_updates.items(): + success = self._update_comment(pr_number, comment_id, updated_body) + if success: + logger.info('GithubPRNotifier: updated existing comment %s', comment_id) else: - out.append(f" * location: [{repo_rel}]({rel})") - else: - if display_range: - out.append(f" * location: [{repo_rel}] - {display_range}") + logger.error('GithubPRNotifier: failed to update comment %s', comment_id) + + # Create separate comments for each new section + # Each scanner should get its own comment to avoid merging issues + for section_content in new_sections: + success = self._post_comment(pr_number, section_content) + if success: + logger.info('GithubPRNotifier: posted individual comment for section') else: - out.append(f" * location: [{repo_rel}]") - - if snippet: - sn = snippet if len(snippet) <= 800 else snippet[:800] + '...' - indented_block = ['', ' ```'] + [f' {l}' for l in sn.splitlines()] + [' ```'] - out.extend(indented_block) + logger.error('GithubPRNotifier: failed to post individual comment') - return out + def _send_pr_comment(self, facts: Dict[str, Any], title: str, content: str) -> None: + """Send a single PR comment with title and content.""" + if not self.token: + logger.warning('GithubPRNotifier: no GitHub token available') + return - def _parse_checklist(self, body: str) -> List[Dict[str, Any]]: - """Parse a markdown checklist in the comment body. + # Get repository and branch info from config (discovered by main logic) + owner_repo = self.repository + branch = self.config.get('branch') + + if not self.repository or not branch: + logger.warning('GithubPRNotifier: repository (%s) or branch (%s) not available in config', + self.repository, branch) + return - Returns a list of dicts: {'text': , 'checked': } - Recognizes lines like: - - [ ] some text - - [x] some text - Also tolerates leading spaces and other list markers. - """ - items: List[Dict[str, Any]] = [] - if not body: - return items + # Find PR number + pr_number = self._get_pr_number() + if not pr_number: + logger.info('GithubPRNotifier: no PR found for branch %s in %s', branch, self.repository) + return - for line in body.splitlines(): - line = line.strip() - m = re.match(r"^[-*+]\s*\[( |x|X)\]\s*(.*)$", line) - if m: - checked = (m.group(1).lower() == 'x') - text = m.group(2).strip() - items.append({'text': text, 'checked': checked}) - return items + # Create comment body with pre-formatted content + uid = f"socket-security:{self.repository}:{branch}:{title.lower().replace(' ', '-')}" + marker = f"" + comment_body = f"{marker}\n\n### {title}\n\n{content}\n\n---\n*Generated by Socket Security*" + + # Post the comment + success = self._post_comment(pr_number, comment_body) + if success: + logger.info('GithubPRNotifier: posted comment for "%s"', title) + else: + logger.error('GithubPRNotifier: failed to post comment for "%s"', title) - def _find_pr_for_branch(self, owner_repo: str, branch: str) -> Optional[int]: - # Query PRs matching head branch via GitHub API + def _get_pr_number(self) -> Optional[int]: + """Get PR number from environment or API.""" + # Try environment variables first + pr_env = get_github_pr_number() + if pr_env and pr_env.isdigit(): + return int(pr_env) + + # Try to find via API + return self._find_pr_for_branch() + + def _find_pr_for_branch(self) -> Optional[int]: + """Find PR number for the given branch using API.""" + owner_repo = self.repository + branch = self.config.get('branch') + + if not self.repository or not branch: + return None + try: import requests - except Exception: - logger.error('requests library required for GithubPRNotifier') - return None - owner, repo = self._split_owner_repo(owner_repo) - if not owner or not repo: - return None - base = self._api_base() - url = f"{base}/repos/{owner}/{repo}/pulls?state=open&head={owner}:{branch}" - resp = requests.get(url, headers=self._api_headers()) - if resp.status_code != 200: - logger.debug('Failed to list PRs: %s %s', resp.status_code, resp.text) - return None - prs = resp.json() or [] - if prs: - return prs[0].get('number') + headers = { + 'Authorization': f'token {self.token}', + 'Accept': 'application/vnd.github.v3+json' + } + + url = f"{self.api_base}/repos/{self.repository}/pulls" + params = {'head': f"{self.repository.split('/')[0]}:{branch}", 'state': 'open'} + + resp = requests.get(url, headers=headers, params=params, timeout=10) + if resp.status_code == 200: + prs = resp.json() + if prs: + return prs[0]['number'] + except Exception as e: + logger.debug('GithubPRNotifier: failed to find PR for branch %s: %s', branch, e) + return None - def _list_comments(self, owner_repo: str, pr_number: int) -> List[Dict[str, Any]]: + def _get_pr_comments(self, pr_number: int) -> List[Dict[str, Any]]: + """Get all comments for a PR.""" + owner_repo = self.repository + + if not self.repository: + return [] + try: import requests - except Exception: - logger.error('requests library required for GithubPRNotifier') - return [] - owner, repo = self._split_owner_repo(owner_repo) - if not owner or not repo: - logger.error('Invalid owner/repo format for pr comment: %s', owner_repo) - return [] - base = self._api_base() - url = f"{base}/repos/{owner}/{repo}/issues/{pr_number}/comments" - resp = requests.get(url, headers=self._api_headers()) - if resp.status_code != 200: - logger.debug('Failed to list comments: %s %s', resp.status_code, resp.text) + headers = { + 'Authorization': f'token {self.token}', + 'Accept': 'application/vnd.github.v3+json' + } + + url = f"{self.api_base}/repos/{self.repository}/issues/{pr_number}/comments" + + resp = requests.get(url, headers=headers, timeout=10) + if resp.status_code == 200: + return resp.json() + else: + logger.warning('GithubPRNotifier: failed to get comments: %s', resp.status_code) + return [] + except Exception as e: + logger.error('GithubPRNotifier: exception getting comments: %s', e) return [] - return resp.json() or [] - def _post_comment(self, owner_repo: str, pr_number: int, body: str) -> Optional[Dict[str, Any]]: - try: - import requests - except Exception: - logger.error('requests library required for GithubPRNotifier') - return None - owner, repo = self._split_owner_repo(owner_repo) - if not owner or not repo: - logger.error('Invalid owner/repo format for posting comment: %s', owner_repo) - return None - base = self._api_base() - url = f"{base}/repos/{owner}/{repo}/issues/{pr_number}/comments" - resp = requests.post(url, headers=self._api_headers(), json={'body': body}) - if resp.status_code not in (200, 201): - logger.error('Failed to post comment: %s %s', resp.status_code, resp.text) - return None - return resp.json() + def _extract_section_markers(self, content: str) -> Optional[Dict[str, str]]: + """Extract section type and content from HTML comment markers.""" + import re + + # Look for ... + pattern = r'(.*?)' + match = re.search(pattern, content, re.DOTALL) + + if match: + section_type = match.group(1) + section_content = content # Keep full content with markers + return {'type': section_type, 'content': section_content} + + return None - def _update_comment(self, owner_repo: str, comment_id: int, body: str) -> Optional[Dict[str, Any]]: - """Update an existing PR/issue comment by id. + def _find_comment_with_section(self, comments: List[Dict[str, Any]], section_type: str) -> Optional[Dict[str, Any]]: + """Find an existing comment that contains the given section type.""" + import re + + pattern = f'' + + for comment in comments: + if re.search(pattern, comment.get('body', '')): + return comment + + return None - Uses the issues comments endpoint: PATCH /repos/{owner}/{repo}/issues/comments/{comment_id} - """ + def _update_section_in_comment(self, comment_body: str, section_type: str, new_section_content: str) -> str: + """Update a specific section within a comment body.""" + import re + + # Pattern to match the existing section + pattern = f'.*?' + + # Replace the existing section with new content + updated_body = re.sub(pattern, new_section_content, comment_body, flags=re.DOTALL) + + return updated_body + + def _update_comment(self, pr_number: int, comment_id: int, comment_body: str) -> bool: + """Update an existing comment.""" + owner_repo = self.repository + + if not self.repository: + return False + try: import requests - except Exception: - logger.error('requests library required for GithubPRNotifier') - return None - owner, repo = self._split_owner_repo(owner_repo) - if not owner or not repo: - logger.error('Invalid owner/repo format for updating comment: %s', owner_repo) - return None - base = self._api_base() - url = f"{base}/repos/{owner}/{repo}/issues/comments/{comment_id}" - resp = requests.patch(url, headers=self._api_headers(), json={'body': body}) - if resp.status_code != 200: - logger.error('Failed to update comment: %s %s', resp.status_code, resp.text) - return None - return resp.json() - def notify(self, facts: Dict[str, Any]) -> None: - # Require canonical notifications provided by NotificationManager. - notifications = facts.get('notifications') - if not notifications: - logger.info('GithubPRNotifier: no notifications present in facts; skipping') - return - - # Normalize into mapping title -> rows - normalized: Dict[str, List[Any]] = {} - if isinstance(notifications, list): - for item in notifications: - if not isinstance(item, dict): - continue - title = item.get('title') or 'results' - rows = item.get('rows') or [] - normalized[title] = rows - elif isinstance(notifications, dict): - for title, payload in notifications.items(): - if isinstance(payload, dict): - normalized[title] = payload.get('rows') or [] - elif isinstance(payload, list): - normalized[title] = payload - - owner_repo = self._discover_repo(facts) - branch = self._discover_branch(facts) - - if not owner_repo or not branch: - logger.info('Could not determine repository or branch for PR notifier') - return - - pr_number = None - pr_env = os.getenv('GITHUB_PR_NUMBER') or os.getenv('INPUT_PR_NUMBER') - if pr_env and pr_env.isdigit(): - pr_number = int(pr_env) - else: - pr_number = self._find_pr_for_branch(owner_repo, branch) - - if not pr_number: - logger.info('No pull request found for branch %s in %s', branch, owner_repo) - return - - existing_comments = self._list_comments(owner_repo, pr_number) - - bot_comments = [] - uid = f"socket-security:{owner_repo}:{branch}" - marker = f"" - for c in existing_comments: - if not c: - continue - body = c.get('body', '') or '' - if marker in body: - bot_comments.append(c) - - for group_label, rows in normalized.items(): - items: List[Dict[str, Any]] = [] - for r in rows: - try: - # Expected canonical OpenGrep row shape: [Rule, Severity, File, Path, Lines, Code] - if group_label.lower().startswith('sast') and len(r) >= 3: - rule = str(r[0]) if len(r) > 0 else '' - severity = str(r[1]) if len(r) > 1 else '' - file_name = str(r[2]) if len(r) > 2 else '' - file_path = str(r[3]) if len(r) > 3 else (str(r[2]) if len(r) > 2 else '') - loc = str(r[4]) if len(r) > 4 else '' - snippet = str(r[5]) if len(r) > 5 else str(r[4]) if len(r) > 4 else '' - # Store both display file name and the full path (used for links) - items.append({'rule': rule, 'severity': severity, 'file': file_name, 'path': file_path, 'loc': loc, 'snippet': snippet, 'checked': False}) - else: - desc = ' | '.join([str(x) for x in r if x is not None and x != '']) - items.append({'text': desc, 'checked': False}) - except Exception: - desc = ' | '.join([str(x) for x in r if x is not None and x != '']) - items.append({'text': desc, 'checked': False}) - - prev_comment = None - prev_items: List[Dict[str, Any]] = [] - for c in bot_comments: - body = c.get('body', '') or '' - if f"### {group_label}" in body: - prev_comment = c - prev_items = self._parse_checklist(body) - break - - prev_texts = {p['text']: p.get('checked', False) for p in prev_items} - - merged: List[Dict[str, Any]] = [] - if group_label.lower().startswith('sast'): - grouped: Dict[str, Dict[str, List[Dict[str, Any]]]] = {} - for it in items: - rule = it.get('rule', '') - # Use the full path (path) for link generation and grouping - file_path = it.get('path', '') - loc = it.get('loc', '') - snippet = it.get('snippet', '') - - checked = False - cand1 = rule - cand2 = f"{rule} | {file_path} | {loc}" - if cand1 in prev_texts: - checked = prev_texts.get(cand1, False) - elif cand2 in prev_texts: - checked = prev_texts.get(cand2, False) - - grouped.setdefault(rule, {}).setdefault(file_path, []).append({'loc': loc, 'snippet': snippet, 'checked': checked}) - - for rule, files in grouped.items(): - merged.append({'rule': rule, 'files': files}) - else: - for it in items: - text = it.get('text', '') - checked = prev_texts.get(text, False) - merged.append({'text': text, 'checked': checked}) - - total = 0 - if group_label.lower().startswith('sast'): - for m in merged: - files_map = m.get('files') or {} - if isinstance(files_map, dict) and files_map: - for file_list in files_map.values(): - total += len(file_list) - else: - for mf in (m.get('manifestFiles') or []): - total += 1 - else: - total = len(merged) - - if total == 0: - if prev_comment and prev_items: - updated_items = [] - for it in prev_items: - updated_items.append({'checked': True, 'text': it.get('text', '')}) - new_body = marker + '\n\n' + f"### {group_label}\n\nSocket Security findings for *{owner_repo}* on branch `{branch}` — *0 alert(s)*." + '\n\n' + self._render_checklist(updated_items) + '\n\n_This comment is managed by Socket Security notifier._' - logger.info('Marking existing PR comment as resolved for group %s', group_label) - self._update_comment(owner_repo, prev_comment.get('id'), new_body) - else: - logger.info('No alerts and no previous comment for group %s — nothing to post', group_label) - continue - - header = f"### {group_label}\n\nSocket Security findings for *{owner_repo}* on branch `{branch}` — *{total} alert(s)*." - lines: List[str] = [marker, '', header, ''] - - if group_label.lower().startswith('sast'): - for m in merged: - rule = m.get('rule', '') - files = m.get('files', {}) or {} - - def render_file_occurrences(file_path: str, occs: List[Dict[str, Any]]): - file_key = f"{rule} | {file_path}" - any_occ_checked = any(occ.get('checked', False) for occ in occs) - checked = prev_texts.get(file_key, False) or any_occ_checked - box = 'x' if checked else ' ' - lines.append(f"- [{box}] {rule}") - - repo_rel = file_path - try: - cwd = facts.get('cwd') - if not cwd: - git_root = subprocess.check_output(['git', 'rev-parse', '--show-toplevel'], text=True).strip() - cwd = git_root - except Exception: - cwd = None - try: - if repo_rel and cwd and repo_rel.startswith(cwd): - repo_rel = repo_rel[len(cwd):].lstrip('/').lstrip('./') - except Exception: - pass - - basename = os.path.basename(repo_rel) or repo_rel - lines.append(f" File: `{basename}`") - - for occ in occs: - loc = occ.get('loc', '') - snippet = occ.get('snippet', '') - start = '' - end = '' - if loc and '-' in str(loc): - parts = str(loc).split('-', 1) - start = parts[0] - end = parts[1] - else: - start = str(loc) - - ref = os.getenv('GITHUB_SHA') or branch - lines.extend(self._render_location_and_snippet(repo_rel, ref, start, end, snippet)) - - if isinstance(files, dict) and files: - for file_path, occs in files.items(): - render_file_occurrences(file_path, occs) - else: - for mf in (m.get('manifestFiles') or []): - fpath = mf.get('file', '') - occs = [{'loc': mf.get('start') or '', 'snippet': mf.get('snippet') or ''}] - render_file_occurrences(fpath, occs) + headers = { + 'Authorization': f'token {self.token}', + 'Accept': 'application/vnd.github.v3+json' + } + + url = f"{self.api_base}/repos/{self.repository}/issues/comments/{comment_id}" + payload = {'body': comment_body} + + resp = requests.patch(url, headers=headers, json=payload, timeout=10) + if resp.status_code == 200: + logger.debug('GithubPRNotifier: comment updated successfully') + return True else: - for it in merged: - checked = it.get('checked', False) - box = 'x' if checked else ' ' - text = it.get('text', '') - lines.append(f"- [{box}] {text}") - - lines.append('_This comment is provided by Socket Security._') - body = '\n'.join(lines) - - if prev_comment: - logger.info('Updating existing PR comment for group %s', group_label) - self._update_comment(owner_repo, prev_comment.get('id'), body) + logger.warning('GithubPRNotifier: API error updating comment %s: %s', resp.status_code, resp.text[:200]) + return False + except Exception as e: + logger.error('GithubPRNotifier: exception updating comment: %s', e) + return False + + def _post_comment(self, pr_number: int, comment_body: str) -> bool: + """Post a comment to the PR.""" + if not self.repository: + logger.warning('GithubPRNotifier: no repository configured') + return False + + try: + import requests + headers = { + 'Authorization': f'token {self.token}', + 'Accept': 'application/vnd.github.v3+json' + } + + url = f"{self.api_base}/repos/{self.repository}/issues/{pr_number}/comments" + payload = {'body': comment_body} + + resp = requests.post(url, headers=headers, json=payload, timeout=10) + if resp.status_code == 201: + logger.debug('GithubPRNotifier: comment posted successfully') + return True else: - logger.info('Posting new PR comment for group %s', group_label) - self._post_comment(owner_repo, pr_number, body) + logger.warning('GithubPRNotifier: API error %s: %s', resp.status_code, resp.text[:200]) + return False + except Exception as e: + logger.error('GithubPRNotifier: exception posting comment: %s', e) + return False \ No newline at end of file diff --git a/socket_basics/core/notification/jira_notifier.py b/socket_basics/core/notification/jira_notifier.py index f533223..92d8f10 100644 --- a/socket_basics/core/notification/jira_notifier.py +++ b/socket_basics/core/notification/jira_notifier.py @@ -1,462 +1,505 @@ -from typing import Any, Dict, List, Optional -import os -from pathlib import Path +from typing import Any, Dict, Optional import logging -import json from socket_basics.core.notification.base import BaseNotifier +from socket_basics.core.config import ( + get_jira_url, get_jira_project, get_jira_email, + get_jira_api_token, get_github_repository +) logger = logging.getLogger(__name__) -def _adf_text_node(text: str) -> Dict[str, Any]: - return {"type": "text", "text": text} - - -def _adf_paragraph(text: str) -> Dict[str, Any]: - return {"type": "paragraph", "content": [{"type": "text", "text": text}]} - - -def _adf_table(headers: List[str], rows: List[List[str]]) -> Dict[str, Any]: - """Build a Jira ADF table node from headers and rows. - - headers: list of header strings - rows: list of row lists (strings) - Returns a node of type 'table'. - """ - def cell(text: str) -> Dict[str, Any]: - return {"type": "tableCell", "content": [{"type": "paragraph", "content": [{"type": "text", "text": text}]}]} - - # header row - header_cells = [cell(h) for h in headers] - header_row = {"type": "tableRow", "content": header_cells} - - body_rows = [] - for r in rows: - # ensure row has same number of columns - cells = [cell(str(r[i]) if i < len(r) else '') for i in range(len(headers))] - body_rows.append({"type": "tableRow", "content": cells}) - - table = {"type": "table", "content": [header_row] + body_rows} - return table - - class JiraNotifier(BaseNotifier): - """Notifier that posts results to Jira using ADF tables for structured data.""" + """JIRA notifier: creates JIRA issues for security findings. + + Creates a main issue with summary table and adds each scanner result as a comment. + """ name = "jira" def __init__(self, params: Dict[str, Any] | None = None): super().__init__(params or {}) - self.server = self.config.get('server') or os.getenv('INPUT_JIRA_URL') - self.project = self.config.get('project') or os.getenv('INPUT_JIRA_PROJECT') - self.auth = self.config.get('auth') or { - 'email': os.getenv('INPUT_JIRA_EMAIL'), - 'api_token': os.getenv('INPUT_JIRA_API_TOKEN') - } - - def _requests(self): - try: - import requests + # JIRA configuration from params, env variables, or app config + self.server = ( + self.config.get('server') or + get_jira_url() + ) + self.project = ( + self.config.get('project') or + get_jira_project() + ) + self.email = ( + self.config.get('email') or + get_jira_email() + ) + self.api_token = ( + self.config.get('api_token') or + get_jira_api_token() + ) + + # Get repository from config or environment + self.repository = ( + self.config.get('repository') or + get_github_repository() or + 'Unknown' + ) - return requests - except Exception: - logger.error("requests library required for JiraNotifier") - return None + def notify(self, facts: Dict[str, Any]) -> None: + notifications = facts.get('notifications', []) or [] + + if not isinstance(notifications, list): + logger.error('JiraNotifier: only supports new format - list of dicts with title/content') + return + + if not notifications: + logger.info('JiraNotifier: no notifications present; skipping') + return - def _find_existing_issue(self, summary: str, auth: Optional[Any], headers: Dict[str, str], base: str) -> Optional[str]: - requests = self._requests() - if requests is None: - return None + # Validate format + valid_notifications = [] + for item in notifications: + if isinstance(item, dict) and 'title' in item and 'content' in item: + valid_notifications.append(item) + else: + logger.warning('JiraNotifier: skipping invalid notification item: %s', type(item)) + + if not valid_notifications: + return - # Build JQL to search for issues with the given summary - jql_summary = summary.replace('"', '\\"') - jql = f'summary ~ "{jql_summary}"' - - # Use POST /rest/api/3/search/jql per Atlassian migration guidance - url = f"{base}/rest/api/3/search/jql" - - # Build payload exactly as in Atlassian docs example - body = { - "expand": "", - "fields": ["key", "summary", "status"], - "fieldsByKeys": True, - "jql": jql, - "maxResults": 1, - "nextPageToken": "", - "properties": [], - "reconcileIssues": [] - } - - # Debug: log the exact payload and headers (avoid printing secrets) + # Get repository info from facts (passed from main config) + repo = facts.get('repository', self.repository) + branch = facts.get('branch', 'Unknown') + commit_hash = facts.get('commit_hash', 'Unknown') + full_scan_url = facts.get('full_scan_html_url') + + logger.info('JiraNotifier: repo=%s, branch=%s, commit_hash=%s', repo, branch, commit_hash) + if full_scan_url: + logger.info('JiraNotifier: full_scan_url=%s', full_scan_url) + + # Create main issue title in format: "Socket Security Results - repo - branch - commit_hash" + title_parts = ["Socket Security Results"] + if repo and repo != 'Unknown': + title_parts.append(repo) + if branch and branch != 'Unknown': + title_parts.append(branch) + if commit_hash and commit_hash != 'Unknown': + title_parts.append(commit_hash[:8]) # Short hash for readability + + main_title = " - ".join(title_parts) + logger.info('JiraNotifier: main title: %s', main_title) + + # Create summary table showing scanner findings + summary_content = self._create_summary_table(valid_notifications) + + # Try to find existing issue with same title + existing_issue = self._find_existing_issue(main_title) + + if existing_issue: + issue_key = existing_issue['key'] + logger.info('JiraNotifier: found existing issue %s, will update', issue_key) + # Update description with new summary + self._update_issue_description(issue_key, summary_content) + else: + # Create main issue with summary + issue_key = self._create_main_issue(main_title, summary_content) + + if issue_key: + # Add each scanner result as a separate comment + for notification in valid_notifications: + self._add_comment_to_issue(issue_key, notification['title'], notification['content'], full_scan_url) + + + + def _create_summary_table(self, notifications: list) -> str: + """Create a summary table showing scanner findings count.""" + # Create summary table with scanner names and count of findings + summary_lines = ['|| Scanner || Findings ||'] + + for notification in notifications: + title = notification.get('title', 'Unknown Scanner') + content = notification.get('content', {}) + + # Count findings from ADF table rows + row_count = self._count_adf_table_rows(content) + + summary_lines.append(f'| {title} | {row_count} |') + + return '\n'.join(summary_lines) + + def _count_adf_table_rows(self, content: Dict[str, Any]) -> int: + """Count data rows in ADF table format.""" + if not isinstance(content, dict) or content.get('type') != 'doc': + return 0 + + # Look for table in ADF content + for item in content.get('content', []): + if item.get('type') == 'table': + table_rows = item.get('content', []) + # Count data rows (exclude header row) + data_rows = [row for row in table_rows if row.get('type') == 'tableRow'][1:] # Skip header + return len(data_rows) + elif item.get('type') == 'paragraph': + # Check if it's a "No ... found" message + text_content = self._extract_text_from_adf_paragraph(item) + if 'No ' in text_content and ' found' in text_content: + return 0 + + # If no table found but content exists, assume 1 finding + return 1 if content.get('content') else 0 + + def _extract_text_from_adf_paragraph(self, paragraph: Dict[str, Any]) -> str: + """Extract text content from ADF paragraph.""" + text_parts = [] + for content_item in paragraph.get('content', []): + if content_item.get('type') == 'text': + text_parts.append(content_item.get('text', '')) + return ''.join(text_parts) + + def _create_main_issue(self, title: str, summary_content: str) -> Optional[str]: + """Create main Jira issue with summary table and return issue key.""" try: - logger.debug("Jira search request: url=%s headers=%s auth=%s body=%s", url, {k: headers.get(k) for k in headers}, (type(auth).__name__ if auth is not None else None), json.dumps(body)) - except Exception: - logger.debug("Jira search request prepared (failed to serialize body for logging)") - - resp = requests.post(url, auth=auth, headers=headers, data=json.dumps(body)) - if resp.status_code >= 400: - # Log request/response details to help diagnose invalid payload errors - try: - req = getattr(resp, 'request', None) - req_headers = dict(req.headers) if req is not None and getattr(req, 'headers', None) else {} - # mask Authorization header if present - if 'Authorization' in req_headers: - req_headers['Authorization'] = 'REDACTED' - req_body = req.body if req is not None else None - logger.debug("Jira search returned %s: %s", resp.status_code, resp.text) - logger.debug("Jira request sent: method=%s url=%s headers=%s body=%s", getattr(req, 'method', None), getattr(req, 'url', None), req_headers, (req_body[:1000] if isinstance(req_body, (bytes, str)) else str(type(req_body)))) - except Exception: - logger.debug("Jira search returned %s and failed to log request details", resp.status_code) - return None - - data = resp.json() or {} - issues = data.get('issues') or [] - if not issues: - return None - return issues[0].get('key') + import requests + from requests.auth import HTTPBasicAuth + + # Create ADF format description with summary table + summary_table_adf = self._convert_jira_table_to_adf(summary_content.split('\n')) + + description_content = [ + { + "type": "paragraph", + "content": [ + {"type": "text", "text": "Socket Security scan results summary:"} + ] + } + ] + + # Add the summary table + description_content.extend(summary_table_adf) + + # Add footer paragraph + description_content.append({ + "type": "paragraph", + "content": [ + {"type": "text", "text": "Detailed findings are provided in the comments below."} + ] + }) + + description_adf = { + "type": "doc", + "version": 1, + "content": description_content + } - def _get_issue_status(self, issue_key: str, auth: Optional[Any], headers: Dict[str, str], base: str) -> Optional[tuple[str, Any]]: - requests = self._requests() - if requests is None: - return None - url = f"{base}/rest/api/3/issue/{issue_key}" - resp = requests.get(url, auth=auth, headers=headers, params={"fields": "status,description"}) - if resp.status_code >= 400: - logger.debug('Failed to fetch issue %s: %s', issue_key, resp.text) - return None - data = resp.json() or {} - fields = data.get('fields') or {} - status = (fields.get('status') or {}).get('name') - description = fields.get('description') - return status, description - - def _get_issue_comments(self, issue_key: str, auth: Optional[Any], headers: Dict[str, str], base: str) -> List[Dict[str, Any]]: - requests = self._requests() - if requests is None: - return [] - url = f"{base}/rest/api/3/issue/{issue_key}/comment" - resp = requests.get(url, auth=auth, headers=headers) - if resp.status_code >= 400: - logger.debug('Failed to fetch comments for %s: %s', issue_key, resp.text) - return [] - data = resp.json() or {} - return data.get('comments') or [] - - def _adf_extract_table_rows(self, adf: Dict[str, Any]) -> List[List[str]]: - """Try to extract simple string rows from an ADF doc produced by this notifier. - - This is heuristic: look for nodes of type 'table' and extract text from cells. - """ - rows: List[List[str]] = [] - if not adf or not isinstance(adf, dict): - return rows - for node in adf.get('content', []) if isinstance(adf.get('content', []), list) else []: - if node.get('type') == 'table': - for tr in node.get('content', []): - if tr.get('type') != 'tableRow': - continue - cells = [] - for tc in tr.get('content', []): - # find first text in cell - txt = '' - for p in tc.get('content', []): - if p.get('type') == 'paragraph': - for t in p.get('content', []): - if t.get('type') == 'text': - txt += t.get('text', '') - cells.append(txt) - rows.append(cells) - # skip header row when present - if len(rows) > 1: - return rows[1:] - return [] + payload = { + "fields": { + "project": {"key": self.project}, + "summary": title, + "description": description_adf, + "issuetype": {"name": "Task"} + } + } - def notify(self, facts: Dict[str, Any], should_print: bool = True) -> None: - """Build ADF description/comment and post to Jira. - - Note: the issue summary continues to include repo/branch, but the body/comment - will NOT contain a leading line like "Security issues detected in ". - Instead the body contains a proper ADF table representing the findings. - """ - app_cfg = getattr(self, 'app_config', {}) or {} - repo = ( - self.config.get('repository') - or app_cfg.get('repository') - or facts.get('repository') - or os.getenv('SOCKET_REPOSITORY_NAME') - or os.getenv('GITHUB_REPOSITORY') - or Path(os.getcwd()).name - ) - branch = ( - self.config.get('branch') - or app_cfg.get('branch') - or facts.get('branch') - or os.getenv('GITHUB_REF_NAME') - or 'unknown' - ) + auth = HTTPBasicAuth(str(self.email), str(self.api_token)) + headers = { + 'Accept': 'application/json', + 'Content-Type': 'application/json' + } - raw_notifications = facts.get('notifications') or {} - # Normalize notifications into mapping: title -> {'headers': Optional[List[str]], 'rows': List[List[str]]} - notifications: Dict[str, Dict[str, Any]] = {} - if isinstance(raw_notifications, list): - for t in raw_notifications: - try: - title = t.get('title') or 'results' - headers = t.get('headers') - rows = t.get('rows') or [] - existing = notifications.setdefault(title, {'headers': None, 'rows': []}) - # Prefer headers provided by the connector if not already set - if headers and not existing.get('headers'): - existing['headers'] = headers - existing['rows'].extend(rows) - except Exception: - continue - elif isinstance(raw_notifications, dict): - for title, payload in raw_notifications.items(): - try: - if isinstance(payload, dict): - headers = payload.get('headers') - rows = payload.get('rows') or [] - notifications[title] = {'headers': headers, 'rows': rows} - elif isinstance(payload, list): - notifications[title] = {'headers': None, 'rows': payload} - else: - notifications[title] = {'headers': None, 'rows': []} - except Exception: - notifications[title] = {'headers': None, 'rows': []} + url = f"{self.server}/rest/api/3/issue" + resp = requests.post(url, auth=auth, headers=headers, json=payload, timeout=10) - # Do NOT synthesize notifications from components. The ConnectorManager - # is responsible for validating and providing canonical notification - # tables. If none are present, log and continue with an empty doc. - if not notifications: - logger.warning('JiraNotifier: no canonical notifications provided; skipping creation of notification tables') - - # Build ADF content blocks: a short paragraph header (optional) and a table - adf_content: List[Dict[str, Any]] = [] - - # For each group, create a separate table with a heading paragraph - for group_label, payload in notifications.items(): - rows = payload.get('rows') or [] - provided_headers = payload.get('headers') - - # heading paragraph for group - adf_content.append(_adf_paragraph(f"{group_label}")) - - # Use only connector-provided headers. Manager enforces that headers - # are present and valid; if a group lacks headers here, skip it. - if not provided_headers or not isinstance(provided_headers, list): - logger.warning("JiraNotifier: skipping group '%s' because headers absent or invalid; notifiers should only receive validated tables", group_label) - continue - # Use connector-provided headers; limit to available columns if rows present - col_count = len(rows[0]) if rows else 0 - headers = provided_headers[:col_count] if col_count else provided_headers - - # truncate/normalize cells - norm_rows: List[List[str]] = [] - for r in rows[:50]: - norm = [] - for i in range(len(headers)): - cell = r[i] if i < len(r) else '' - s = str(cell) if cell is not None else '' - s = " ".join(s.split()) - if len(s) > 800: - s = s[:800] + '...' - norm.append(s) - norm_rows.append(norm) - - adf_content.append(_adf_table(headers, norm_rows)) - - # Fallback: if no notifications, include a short paragraph - if not adf_content: - adf_content.append(_adf_paragraph("No security issues detected in scanned components.")) - - # Build final ADF doc - adf_doc = {"type": "doc", "version": 1, "content": adf_content} - - should_print = bool(self.config.get('print_comment')) - - # Validate credentials - auth_ok = False - if isinstance(self.auth, dict): - auth_ok = bool(self.auth.get('email') and self.auth.get('api_token')) - elif isinstance(self.auth, str): - auth_ok = bool(self.auth) - - if not (self.server and self.project and auth_ok): - missing = [] - if not self.server: - missing.append('server') - if not self.project: - missing.append('project') - if not auth_ok: - missing.append('auth') - logger.info("Jira notifier not fully configured; skipping remote post (missing: %s)", ','.join(missing) if missing else 'none') - logger.debug("Jira ADF content: %s", adf_doc) - if should_print: - try: - print(adf_doc) - except Exception: - logger.debug('Failed to print Jira ADF doc to stdout') - return + if resp.status_code == 201: + issue = resp.json() + issue_key = issue.get('key') + logger.info('JiraNotifier: created main issue %s', issue_key) + return issue_key + else: + logger.warning('JiraNotifier: failed to create main issue: %s - %s', resp.status_code, resp.text) + return None - requests = self._requests() - if requests is None: - if should_print: - print(adf_doc) - return + except Exception as e: + logger.error('JiraNotifier: exception creating main issue: %s', e) + return None - auth = None - headers = {"Content-Type": "application/json", "Accept": "application/json"} - if isinstance(self.auth, dict) and self.auth.get('email') and self.auth.get('api_token'): - auth = (self.auth.get('email'), self.auth.get('api_token')) - elif isinstance(self.auth, str) and self.auth: - headers['Authorization'] = f"Bearer {self.auth}" + def _add_comment_to_issue(self, issue_key: str, title: str, content: Dict[str, Any], full_scan_url: str | None = None) -> None: + """Add a comment with scanner results to the main Jira issue.""" + try: + import requests + from requests.auth import HTTPBasicAuth + + # Content should be in ADF format + comment_adf = content.copy() + + # Add title as heading if provided + if title: + title_heading = { + "type": "heading", + "attrs": {"level": 3}, + "content": [{"type": "text", "text": title}] + } + # Insert title at the beginning of content + comment_adf["content"] = [title_heading] + comment_adf.get("content", []) + + # Add full scan URL at the end if available + if full_scan_url: + # Add a divider + divider = {"type": "rule"} + + # Add paragraph with link to full results + scan_url_paragraph = { + "type": "paragraph", + "content": [ + {"type": "text", "text": "🔗 "}, + { + "type": "text", + "text": "View complete scan results", + "marks": [ + { + "type": "link", + "attrs": {"href": full_scan_url} + } + ] + } + ] + } + + # Append to content + comment_adf["content"].append(divider) + comment_adf["content"].append(scan_url_paragraph) + + payload = { + "body": comment_adf + } - base = self.server.rstrip('/') - summary = self.config.get('summary') or f"Socket Security Issues detected in {repo} - {branch}" + auth = HTTPBasicAuth(str(self.email), str(self.api_token)) + headers = { + 'Accept': 'application/json', + 'Content-Type': 'application/json' + } - try: - found_key = self._find_existing_issue(summary, auth, headers, base) - except Exception: - logger.exception('Error while searching for existing Jira issue') - found_key = None - - if found_key: - # fetch issue status and description - try: - status_desc = self._get_issue_status(found_key, auth, headers, base) - except Exception: - status_desc = None - if status_desc: - status, existing_description = status_desc - else: - status, existing_description = None, None + url = f"{self.server}/rest/api/3/issue/{issue_key}/comment" + resp = requests.post(url, auth=auth, headers=headers, json=payload, timeout=10) - # if the existing issue is resolved/done/closed, create a new one instead - if status and status.lower() in ('done', 'closed', 'resolved', 'complete'): - logger.info('Found existing issue %s but status is %s -> creating a new issue', found_key, status) - found_key = None + if resp.status_code == 201: + logger.info('JiraNotifier: added comment to issue %s for %s', issue_key, title) else: - # gather existing rows from description and comments - existing_rows: List[List[str]] = [] - if existing_description and isinstance(existing_description, dict): - existing_rows.extend(self._adf_extract_table_rows(existing_description)) + # Include source (title) and response body in warning for debugging try: - comments = self._get_issue_comments(found_key, auth, headers, base) - for c in comments: - body = c.get('body') - if isinstance(body, dict): - existing_rows.extend(self._adf_extract_table_rows(body)) + error_detail = resp.json() except Exception: - logger.exception('Error fetching comments for %s', found_key) - - # build current rows for comparison (flatten to strings) - current_rows: List[List[str]] = [] - for node in adf_doc.get('content', []): - if node.get('type') == 'table': - # reuse our extract helper by wrapping - current_rows.extend(self._adf_extract_table_rows({'content': [node]})) - - # normalize comparison by joining cells - def join_row(r: List[str]) -> str: - return '|'.join([s.strip() for s in r]) - - existing_set = set(join_row(r) for r in existing_rows) - new_rows = [r for r in current_rows if join_row(r) not in existing_set] - - if not new_rows: - logger.info('No new alerts to post to %s; skipping comment', found_key) - if should_print: - try: - print({'posted': False, 'issue': found_key}) - except Exception: - logger.debug('Failed to print Jira post status to stdout') - return - - # build a small adf doc for only new rows and post as a comment - table_headers: List[str] = [] - # infer headers from the first table node in adf_doc - for node in adf_doc.get('content', []): - if node.get('type') == 'table': - # infer header text - first_row = node.get('content', [])[0] if node.get('content') else None - if first_row and first_row.get('type') == 'tableRow': - hdrs: List[str] = [] - for cell in first_row.get('content', []): - # extract text - t = '' - for p in cell.get('content', []): - if p.get('type') == 'paragraph': - for txt in p.get('content', []): - if txt.get('type') == 'text': - t += txt.get('text', '') - hdrs.append(t) - table_headers = hdrs - break - - comment_doc = {"type": "doc", "version": 1, "content": []} - comment_doc['content'].append(_adf_paragraph('New alerts')) - comment_doc['content'].append(_adf_table(table_headers or [f'Col{i+1}' for i in range(len(new_rows[0]))], new_rows)) - - url = f"{base}/rest/api/3/issue/{found_key}/comment" - payload = {"body": comment_doc} - # use a validated auth tuple if available - auth_to_use = auth if isinstance(auth, tuple) and len(auth) == 2 and all(isinstance(x, str) and x for x in auth) else None - resp = requests.post(url, auth=auth_to_use, headers=headers, json=payload) - if resp.status_code >= 400: - logger.error('Failed to post Jira comment %s: %s', resp.status_code, resp.text) - else: - issue_url = f"{base}/browse/{found_key}" - logger.info('Posted Jira comment to %s (%s)', found_key, issue_url) - if should_print: - try: - print({'posted': True, 'issue': found_key, 'new_count': len(new_rows)}) - except Exception: - logger.debug('Failed to print Jira post status to stdout') - return - - # Create a new issue - new_issue_url = f"{base}/rest/api/3/issue" - payload = { - "fields": { - "project": {"key": self.project}, - "summary": summary, - "description": adf_doc, - "issuetype": {"name": self.config.get('issuetype') or 'Task'} - } - } + error_detail = resp.text[:200] # First 200 chars of response + logger.warning('JiraNotifier (%s): failed to add comment to issue %s: %s - %s', + title, issue_key, resp.status_code, error_detail) - # Ensure we have a requests module to use - http = self._requests() - if http is None: - if should_print: - try: - print(adf_doc) - except Exception: - logger.debug('Failed to print Jira ADF doc to stdout') - return + except Exception as e: + logger.error('JiraNotifier (%s): exception adding comment to issue: %s', title, e) - # Validate auth tuple (user, token) before passing to requests - auth_to_use = auth if isinstance(auth, tuple) and len(auth) == 2 and all(isinstance(x, str) and x for x in auth) else None + def _convert_jira_table_to_adf(self, table_lines: list) -> list: + """Convert Jira table markup to ADF table format.""" + if not table_lines: + return [] + + table_rows = [] + + for line in table_lines: + line = line.strip() + if line.startswith('||') and line.endswith('||'): + # Header row + headers = [cell.strip() for cell in line[2:-2].split('||')] + header_cells = [] + for header in headers: + header_cells.append({ + "type": "tableHeader", + "content": [ + { + "type": "paragraph", + "content": [{"type": "text", "text": header}] + } + ] + }) + table_rows.append({ + "type": "tableRow", + "content": header_cells + }) + elif line.startswith('|') and line.endswith('|'): + # Data row + cells = [cell.strip() for cell in line[1:-1].split('|')] + data_cells = [] + for i, cell in enumerate(cells): + # Check if this cell contains code (look for common code patterns) + cell_content = [] + if self._is_code_content(cell): + # Format as code block + cell_content = [ + { + "type": "codeBlock", + "content": [{"type": "text", "text": cell}] + } + ] + else: + # Regular text + cell_content = [ + { + "type": "paragraph", + "content": [{"type": "text", "text": cell}] + } + ] + + data_cells.append({ + "type": "tableCell", + "content": cell_content + }) + table_rows.append({ + "type": "tableRow", + "content": data_cells + }) + + if table_rows: + return [{ + "type": "table", + "content": table_rows + }] + + return [] + def _is_code_content(self, text: str) -> bool: + """Determine if text content should be formatted as code.""" + # Check for common code patterns + code_indicators = [ + '{', '}', '(', ')', ';', '=', + 'function', 'var ', 'const ', 'let ', + 'import ', 'require(', 'module.exports', + 'if (', 'for (', 'while (', 'try {', + '.', '->', '=>', '&&', '||', + 'console.log', 'Math.', 'parseInt', + 'eval(', 'userId', 'user.', 'req.', + 'allocations', 'contributions', 'users' + ] + + # If text is longer than 50 chars and contains code indicators, it's likely code + if len(text) > 50: + code_indicator_count = sum(1 for indicator in code_indicators if indicator in text) + if code_indicator_count >= 2: + return True + + # Check for specific patterns that indicate code + if any(pattern in text for pattern in ['{}', '()', '=>', '&&', '||', 'console.log']): + return True + + return False + + def _find_existing_issue(self, title: str) -> Optional[Dict[str, Any]]: + """Find existing Jira issue with the same title.""" try: - resp = http.post(new_issue_url, auth=auth_to_use, headers=headers, json=payload) - if resp.status_code >= 400: - logger.error('Failed to create Jira issue %s: %s', resp.status_code, resp.text) + import requests + from requests.auth import HTTPBasicAuth + + # Search for issues with exact title match using the new API endpoint + auth = HTTPBasicAuth(str(self.email), str(self.api_token)) + headers = { + 'Accept': 'application/json', + 'Content-Type': 'application/json' + } + + # Escape quotes in title for JQL + escaped_title = title.replace('"', '\\"') + jql = f'project = "{self.project}" AND summary ~ "{escaped_title}"' + + # Use the new search/jql endpoint (POST instead of GET) + url = f"{self.server}/rest/api/3/search/jql" + payload = { + "jql": jql, + "maxResults": 10, + "fields": ["key", "summary"] + } + + logger.debug('JiraNotifier: searching with JQL: %s', jql) + + resp = requests.post(url, auth=auth, headers=headers, json=payload, timeout=10) + + if resp.status_code == 200: + search_results = resp.json() + issues = search_results.get('issues', []) + + logger.debug('JiraNotifier: search returned %d issues', len(issues)) + + # Look for exact title match + for issue in issues: + issue_summary = issue.get('fields', {}).get('summary', '') + if issue_summary == title: + logger.info('JiraNotifier: found existing issue with matching title: %s', issue.get('key')) + return issue + + return None + elif resp.status_code == 403: + logger.warning('JiraNotifier: search permission denied (403). Creating new issue instead of searching for existing ones.') + return None + elif resp.status_code == 400: + logger.warning('JiraNotifier: invalid JQL query (400): %s. Creating new issue.', jql) + return None else: - try: - created = resp.json() - k = created.get('key') - issue_url = f"{base}/browse/{k}" if k else base - logger.info('Created Jira issue %s (%s)', k, issue_url) - except Exception: - logger.info('Created Jira issue (response did not contain key)') - except Exception: - logger.exception('Failed to create Jira issue') - - if should_print: - try: - print(adf_doc) - except Exception: - logger.debug('Failed to print Jira ADF doc to stdout') + logger.warning('JiraNotifier: failed to search for existing issues: %s - %s', resp.status_code, resp.text) + return None + + except Exception as e: + logger.error('JiraNotifier: exception searching for existing issue: %s', e) + return None + + def _update_issue_description(self, issue_key: str, summary_content: str) -> None: + """Update the description of an existing issue.""" + try: + import requests + from requests.auth import HTTPBasicAuth + + # Create ADF format description with summary table + summary_table_adf = self._convert_jira_table_to_adf(summary_content.split('\n')) + + description_content = [ + { + "type": "paragraph", + "content": [ + {"type": "text", "text": "Socket Security scan results summary (updated):"} + ] + } + ] + + # Add the summary table + description_content.extend(summary_table_adf) + + # Add footer paragraph + description_content.append({ + "type": "paragraph", + "content": [ + {"type": "text", "text": "Detailed findings are provided in the comments below."} + ] + }) + + description_adf = { + "type": "doc", + "version": 1, + "content": description_content + } + + payload = { + "fields": { + "description": description_adf + } + } + + auth = HTTPBasicAuth(str(self.email), str(self.api_token)) + headers = { + 'Accept': 'application/json', + 'Content-Type': 'application/json' + } + + url = f"{self.server}/rest/api/3/issue/{issue_key}" + resp = requests.put(url, auth=auth, headers=headers, json=payload, timeout=10) + + if resp.status_code == 204: + logger.info('JiraNotifier: updated issue %s description', issue_key) + else: + logger.warning('JiraNotifier: failed to update issue %s: %s', issue_key, resp.status_code) + + except Exception as e: + logger.error('JiraNotifier: exception updating issue description: %s', e) \ No newline at end of file diff --git a/socket_basics/core/notification/json_notifier.py b/socket_basics/core/notification/json_notifier.py index b6ec178..1110d54 100644 --- a/socket_basics/core/notification/json_notifier.py +++ b/socket_basics/core/notification/json_notifier.py @@ -22,58 +22,38 @@ def __init__(self, params: Dict[str, Any] | None = None): self.output_path = self.config.get('output_path') or 'notifications.json' def notify(self, facts: Dict[str, Any]) -> None: - # Strict: Expect NotificationManager to attach grouped `notifications` - # to `facts` following the new contract. Accept either a list of table - # dicts or a mapping of title -> {headers, rows}. Do not synthesize from - # components or perform any legacy fallbacks. - notifications = facts.get('notifications') + # Check if we received pre-formatted data from connectors + notifications = facts.get('notifications', []) if not notifications: logger.info('JsonNotifier: no notifications present in facts; skipping') return - # Normalize into canonical JSON to print. Accept list or dict, but - # validate groups contain headers and rows per new contract. - def _normalize(n: Any) -> List[Dict[str, Any]]: - out: List[Dict[str, Any]] = [] - if isinstance(n, list): - for item in n: - if not isinstance(item, dict): - continue - title = item.get('title') or 'results' - headers = item.get('headers') - rows = item.get('rows') if item.get('rows') is not None else [] - out.append({'title': title, 'headers': headers, 'rows': rows}) - elif isinstance(n, dict): - for title, payload in n.items(): - if isinstance(payload, dict): - headers = payload.get('headers') - rows = payload.get('rows') if payload.get('rows') is not None else [] - out.append({'title': title, 'headers': headers, 'rows': rows}) - elif isinstance(payload, list): - out.append({'title': title, 'headers': None, 'rows': payload}) - return out - - try: - canonical = _normalize(notifications) - # Enforce presence of at least one group with headers and rows. This - # enforces the new contract; callers must produce validated tables. - valid = any(isinstance(g.get('headers'), list) and isinstance(g.get('rows'), list) for g in canonical) - if not valid: - logger.info('JsonNotifier: notifications present but none match required {headers:list, rows:list} shape; skipping') + # New simplified format: list of {title, content} dicts + if isinstance(notifications, list): + valid_notifications = [] + for item in notifications: + if isinstance(item, dict) and 'title' in item and 'content' in item: + valid_notifications.append(item) + else: + logger.warning('JsonNotifier: skipping invalid notification item: %s', type(item)) + + if valid_notifications: + # Create structured JSON output with metadata + json_output = { + 'notifications': valid_notifications, + 'metadata': { + 'repository': facts.get('repository'), + 'branch': facts.get('branch'), + 'timestamp': facts.get('timestamp'), + 'total_notifications': len(valid_notifications) + } + } + print(json.dumps(json_output, indent=2)) return - - output = json.dumps({'notifications': canonical}, indent=2) - # If an output_path is provided in params, write to that file instead of stdout - if self.output_path and self.output_path != '-': - try: - with open(self.output_path, 'w', encoding='utf-8') as f: - f.write(output) - logger.info('Wrote notifications JSON to %s', self.output_path) - except Exception: - logger.exception('Failed to write notifications JSON to %s', self.output_path) else: - # Default: print to stdout - print(output) - logger.info('Printed notifications JSON to stdout') - except Exception as e: - logger.exception('Failed to print notifications JSON: %s', e) + logger.info('JsonNotifier: no valid notifications found') + print(json.dumps({'notifications': [], 'metadata': {'total_notifications': 0}}, indent=2)) + return + else: + logger.error('JsonNotifier: only supports new format - list of dicts with title/content') + return diff --git a/socket_basics/core/notification/manager.py b/socket_basics/core/notification/manager.py index 0313620..eaf3df1 100644 --- a/socket_basics/core/notification/manager.py +++ b/socket_basics/core/notification/manager.py @@ -4,21 +4,24 @@ from typing import Any, Dict, List, Optional from socket_basics.core.notification.base import BaseNotifier -from socket_basics.core.config import load_connectors_config +from socket_basics.core.config import ( + load_connectors_config, + get_slack_webhook_url, + get_webhook_url, + get_ms_sentinel_workspace_id, + get_jira_url, + get_sumologic_endpoint, + get_github_token, + get_jira_email, + get_jira_api_token, + get_socket_basics_severities, + get_github_workspace +) logger = logging.getLogger(__name__) class NotificationManager: - def notify_all(self, facts: Dict[str, Any]) -> None: - # Debug: log facts at debug level (don't print raw structures unconditionally) - try: - logger.debug('notify_all facts: %s', {k: v for k, v in facts.items() if k != 'socket_tier1'}) - if 'socket_tier1' in facts: - logger.debug('Raw socket_tier1 processed results: %s', facts.get('socket_tier1')) - except Exception: - logger.exception('Failed to debug-log notify_all facts') - # ...existing code... """Loads notifier plugins based on a config dict. Config format: @@ -60,91 +63,79 @@ def load_from_config(self) -> None: try: # Slack: webhook if name.lower() == 'slack': + slack_url = get_slack_webhook_url() if ( - os.getenv('SLACK_WEBHOOK_URL') - or os.getenv('INPUT_SLACK_WEBHOOK_URL') + slack_url or (self.app_config and self.app_config.get('slack_webhook_url')) ): enabled = True - if os.getenv('SLACK_WEBHOOK_URL'): - enable_cause = 'env:SLACK_WEBHOOK_URL' - elif os.getenv('INPUT_SLACK_WEBHOOK_URL'): - enable_cause = 'env:INPUT_SLACK_WEBHOOK_URL' + if slack_url: + enable_cause = 'env:SLACK_WEBHOOK_URL or INPUT_SLACK_WEBHOOK_URL' else: enable_cause = 'app_config:slack_webhook_url' # Webhook generic if name.lower() == 'webhook': + webhook = get_webhook_url() if ( - os.getenv('WEBHOOK_URL') - or os.getenv('INPUT_WEBHOOK_URL') + webhook or (self.app_config and self.app_config.get('webhook_url')) ): enabled = True - if os.getenv('WEBHOOK_URL'): - enable_cause = 'env:WEBHOOK_URL' - elif os.getenv('INPUT_WEBHOOK_URL'): - enable_cause = 'env:INPUT_WEBHOOK_URL' + if webhook: + enable_cause = 'env:WEBHOOK_URL or INPUT_WEBHOOK_URL' else: enable_cause = 'app_config:webhook_url' # MS Sentinel if name.lower() == 'ms_sentinel': + sentinel_id = get_ms_sentinel_workspace_id() if ( - os.getenv('INPUT_MS_SENTINEL_WORKSPACE_ID') - or os.getenv('MS_SENTINEL_WORKSPACE_ID') + sentinel_id or (self.app_config and self.app_config.get('ms_sentinel_workspace_id')) ): enabled = True - if os.getenv('MS_SENTINEL_WORKSPACE_ID'): - enable_cause = 'env:MS_SENTINEL_WORKSPACE_ID' - elif os.getenv('INPUT_MS_SENTINEL_WORKSPACE_ID'): - enable_cause = 'env:INPUT_MS_SENTINEL_WORKSPACE_ID' + if sentinel_id: + enable_cause = 'env:MS_SENTINEL_WORKSPACE_ID or INPUT_MS_SENTINEL_WORKSPACE_ID' else: enable_cause = 'app_config:ms_sentinel_workspace_id' # Jira if name.lower() == 'jira': + jira = get_jira_url() if ( - os.getenv('INPUT_JIRA_URL') - or os.getenv('JIRA_URL') + jira or (self.app_config and self.app_config.get('jira_url')) ): enabled = True - if os.getenv('JIRA_URL'): - enable_cause = 'env:JIRA_URL' - elif os.getenv('INPUT_JIRA_URL'): - enable_cause = 'env:INPUT_JIRA_URL' + if jira: + enable_cause = 'env:JIRA_URL or INPUT_JIRA_URL' else: enable_cause = 'app_config:jira_url' # SumoLogic if name.lower() == 'sumologic': + sumologic = get_sumologic_endpoint() if ( - os.getenv('INPUT_SUMOLOGIC_ENDPOINT') - or os.getenv('SUMOLOGIC_ENDPOINT') + sumologic or (self.app_config and self.app_config.get('sumologic_endpoint')) ): enabled = True - if os.getenv('SUMOLOGIC_ENDPOINT'): - enable_cause = 'env:SUMOLOGIC_ENDPOINT' - elif os.getenv('INPUT_SUMOLOGIC_ENDPOINT'): - enable_cause = 'env:INPUT_SUMOLOGIC_ENDPOINT' + if sumologic: + enable_cause = 'env:SUMOLOGIC_ENDPOINT or INPUT_SUMOLOGIC_ENDPOINT' else: enable_cause = 'app_config:sumologic_endpoint' # Github PR notifier: token presence if name.lower() == 'github_pr': + github_token = get_github_token() if ( - os.getenv('INPUT_GITHUB_TOKEN') - or os.getenv('GITHUB_TOKEN') + github_token or (self.app_config and self.app_config.get('github_token')) ): enabled = True - if os.getenv('GITHUB_TOKEN'): - enable_cause = 'env:GITHUB_TOKEN' - elif os.getenv('INPUT_GITHUB_TOKEN'): - enable_cause = 'env:INPUT_GITHUB_TOKEN' + if github_token: + enable_cause = 'env:GITHUB_TOKEN or INPUT_GITHUB_TOKEN' else: enable_cause = 'app_config:github_token' except Exception: @@ -158,14 +149,15 @@ def load_from_config(self) -> None: self.app_config.get('console_tabular_enabled') or self.app_config.get('output_console_enabled') ): enabled = True + enable_cause = 'app_config:console_tabular_enabled or output_console_enabled' if name.lower() == 'json' and ( self.app_config.get('console_json_enabled') or self.app_config.get('output_json_enabled') ): enabled = True - except Exception: - pass - except Exception: - pass + except Exception as e: + logger.debug(f" Exception in console notifier check: {e}") + except Exception as e: + logger.debug(f" Exception in notifier enablement check: {e}") # If any connector requested this notifier via its notification_method param, enable it for connector_name, connector_cfg in connectors_cfg.items(): @@ -217,18 +209,21 @@ def load_from_config(self) -> None: val = None if self.app_config and pname in self.app_config: val = self.app_config.get(pname) - elif env_var and os.getenv(env_var) is not None: + if env_var and os.getenv(env_var) is not None: ev = os.getenv(env_var) - if p_type == 'bool': - val = ev.lower() == 'true' - elif p_type == 'int': - try: - val = int(ev) - except Exception: - logger.warning("Failed to convert notifier param %s=%s to int for notifier %s; using default %s", pname, ev, name, p_default) - val = p_default + if ev is not None: + if p_type == 'bool': + val = ev.lower() == 'true' + elif p_type == 'int': + try: + val = int(ev) + except Exception: + logger.warning("Failed to convert notifier param %s=%s to int for notifier %s; using default %s", pname, ev, name, p_default) + val = p_default + else: + val = ev else: - val = ev + val = p_default else: val = p_default @@ -246,8 +241,8 @@ def load_from_config(self) -> None: # details into params if they exist in the environment or app_config try: if name.lower() == 'jira': - jira_email = os.getenv('INPUT_JIRA_EMAIL') or os.getenv('JIRA_EMAIL') or (self.app_config and self.app_config.get('jira_email')) - jira_token = os.getenv('INPUT_JIRA_API_TOKEN') or os.getenv('JIRA_API_TOKEN') or (self.app_config and self.app_config.get('jira_api_token')) + jira_email = get_jira_email() or (self.app_config and self.app_config.get('jira_email')) + jira_token = get_jira_api_token() or (self.app_config and self.app_config.get('jira_api_token')) if jira_email or jira_token: # ensure params contains auth dict expected by JiraNotifier params['auth'] = {'email': jira_email, 'api_token': jira_token} @@ -274,12 +269,23 @@ def load_from_config(self) -> None: logger.exception("Failed to load notifier %s: %s", name, e) def notify_all(self, facts: Dict[str, Any]) -> None: + # Add repository, branch, and commit info from main config to facts + # so notifiers can access this information for title formatting + if self.app_config: + facts['repository'] = self.app_config.get('repo', 'Unknown') # Note: uses 'repo' not 'repository' + facts['branch'] = self.app_config.get('branch', 'Unknown') + facts['commit_hash'] = self.app_config.get('commit_hash', 'Unknown') + # Add full scan URL if available + full_scan_url = self.app_config.get('full_scan_html_url') + if full_scan_url: + facts['full_scan_html_url'] = full_scan_url + # Determine allowed severities for notifications. Honor SOCKET_BASICS_SEVERITIES # environment variable (comma-separated), fall back to INPUT_FINDING_SEVERITIES, # and default to critical,high when not provided. try: - sev_env = os.getenv('SOCKET_BASICS_SEVERITIES') or os.getenv('INPUT_FINDING_SEVERITIES') - if sev_env is None: + sev_env = get_socket_basics_severities() + if not sev_env: allowed_severities = {'critical', 'high'} else: allowed_severities = {s.strip().lower() for s in str(sev_env).split(',') if s.strip()} @@ -312,7 +318,7 @@ def notify_all(self, facts: Dict[str, Any]) -> None: # can display something meaningful when not running in GH actions. try: if not facts.get('repository'): - workspace = (self.app_config or {}).get('workspace') or os.getenv('GITHUB_WORKSPACE') + workspace = (self.app_config or {}).get('workspace') or get_github_workspace() if workspace: try: from pathlib import Path @@ -328,7 +334,10 @@ def notify_all(self, facts: Dict[str, Any]) -> None: for c_name, c_cfg in connectors_cfg.items(): for p in c_cfg.get('parameters', []) or []: if isinstance(p, dict) and p.get('name') and p.get('group'): - param_to_group[p.get('name')] = p.get('group') + pname = p.get('name') + pgroup = p.get('group') + if pname and pgroup: + param_to_group[pname] = pgroup # Helper: determine group for an alert using props/connector heuristics def _alert_group(alert: Dict[str, Any], comp: Dict[str, Any]) -> str: @@ -379,253 +388,113 @@ def _alert_group(alert: Dict[str, Any], comp: Dict[str, Any]) -> str: return 'Ungrouped' - # If connectors already attached `facts['notifications']`, try to filter them - # by allowed severities where possible, but otherwise respect connector rows. - # Support two shapes for connector-provided notifications: - # - {group_label: [row1, row2, ...]} - # - {group_label: {'headers': [...], 'rows': [[...], ...]}} + # Handle simplified per-notifier format from connectors + # Connectors now provide notifications in simplified format: + # {'notifier_key': [{'title': '...', 'content': 'formatted_content'}, ...]} + # Severity filtering should be done in the connectors, not here + per_notifier_notifications = {} if facts.get('notifications'): try: - logger.debug('Facts already contains notifications; attempting to apply severity filtering') - except Exception: - pass - - try: + logger.debug('Processing connector-provided per-notifier notifications') raw_notifs = facts.get('notifications') or {} - filtered: Dict[str, Any] = {} - - def _process_payload_and_filter(group_label: str, headers, rows): - new_rows = [] - if not rows: - return None - for r in rows: - sev_found = None - try: - # If headers are present, require a Severity header to perform filtering - if headers: - found_sev_index = None - for i, h in enumerate(headers): - try: - if isinstance(h, str) and h.strip().lower() == 'severity': - found_sev_index = i - break - except Exception: - continue - if found_sev_index is None: - # Connector provided headers but omitted Severity column. - # Warn and conservatively include all rows for this group - logger.warning("Connector-provided notifications for '%s' missing 'Severity' header; skipping severity filtering for this group", group_label) - sev_found = None - else: - if isinstance(r, (list, tuple)) and found_sev_index < len(r): - sev_found = str(r[found_sev_index] or '').lower() - - # Fallback heuristic: most tables put severity in idx 1 - if not sev_found: - if isinstance(r, (list, tuple)): - if len(r) > 1: - sev_found = str(r[1] or '').lower() - if not sev_found or sev_found == '': - for cell in r: - try: - if isinstance(cell, str) and str(cell).strip().lower() in allowed_severities: - sev_found = str(cell).strip().lower() - break - except Exception: - continue - except Exception: - sev_found = None - - # If we couldn't determine a severity, conservatively include the row - if not sev_found or sev_found in allowed_severities: - new_rows.append(r) - - if new_rows: - return {'headers': headers, 'rows': new_rows} - return None - - # raw_notifs may be a mapping {group_label: payload} or a list of table-dicts - if isinstance(raw_notifs, dict): - for group_label, payload in raw_notifs.items(): - headers = None - rows = [] - if isinstance(payload, dict) and 'rows' in payload: - headers = payload.get('headers') or [] - rows = payload.get('rows') or [] - elif isinstance(payload, list): - rows = payload - else: - # unknown payload shape: skip it - continue - processed = _process_payload_and_filter(group_label, headers, rows) - if processed: - filtered[group_label] = processed - elif isinstance(raw_notifs, list): - for item in raw_notifs: - if not isinstance(item, dict): + # Check if this is the new per-notifier format + # (keys like 'github_pr', 'slack', 'console' vs old semantic groups) + known_notifier_keys = { + 'github_pr', 'slack', 'msteams', 'ms_sentinel', 'sumologic', + 'json', 'console', 'jira', 'webhook' + } + + has_notifier_keys = any(key in known_notifier_keys for key in raw_notifs.keys()) if isinstance(raw_notifs, dict) else False + + if has_notifier_keys: + # New simplified format: connectors provide pre-formatted content + # No filtering needed - connectors handle severity filtering + for notifier_key, payload in raw_notifs.items(): + if notifier_key not in known_notifier_keys: continue - title = item.get('title') or 'results' - headers = item.get('headers') - rows = item.get('rows') or [] - processed = _process_payload_and_filter(title, headers, rows) - if processed: - filtered[title] = processed - else: - # unrecognized notifications shape; skip filtering - filtered = {} - - # Prune any groups that ended up with zero rows (defensive) - for g in list(filtered.keys()): - payload = filtered.get(g) - try: - if not payload or not isinstance(payload, dict) or not (payload.get('rows') or []): - del filtered[g] - except Exception: - try: - del filtered[g] - except Exception: - pass - - # Attach filtered notifications back to facts for notifiers. - facts['notifications'] = filtered - if not filtered: - try: - logger.info('No notifications remain after severity filtering; skipping notifier delivery') - except Exception: - pass - return - except Exception: - logger.exception('Failed while attempting to filter connector-provided notifications by severity') - - # Attach to notifiers straight away - for n in self.notifiers: - try: - n.notify(facts) - except Exception: - logger.exception("Notifier %s failed", getattr(n, "name", n.__class__.__name__)) - return - - # Special handling: always use connector notification_rows for Socket Tier 1 - # Connector may supply a dict with headers and rows; prefer that shape - notifications: Dict[str, Any] = {} - if 'socket_tier1' in facts: - # If connectors attached a `notifications` mapping with headers+rows, - # respect it verbatim (this allows connectors to control headings and - # column counts). If not present, fall back to the scanner.notification_rows - # legacy method (list-of-rows). - provided = facts.get('notifications') or {} - # Accept either the canonical top-level mapping or connector-attached mapping - if isinstance(provided, dict) and provided.get('Socket Tier 1 Reachability'): - # If the connector provides headers/rows, use them directly - notifications = provided - else: - # Fall back to scanner.notification_rows when no connector-provided mapping exists - from socket_basics.core.connector.socket_tier1.scanner import SocketTier1Scanner - scanner = SocketTier1Scanner(config=None) - rows = scanner.notification_rows(facts) - try: - logger.debug('Rows returned by SocketTier1Scanner.notification_rows: %s', rows) - except Exception: - logger.exception('Failed to debug-log socket_tier1 rows') - - # If rows are present, attach them under the standard group label - if rows: - notifications['Socket Tier 1 Reachability'] = rows - - # Attach to facts and short-circuit the rest of the pipeline - if notifications: - try: - facts['notifications'] = notifications - logger.debug('Attached socket_tier1 notifications (authoritative): %s', notifications) - except Exception: - logger.exception('Failed to attach socket_tier1 notifications') - for n in self.notifiers: - try: - n.notify(facts) - except Exception: - logger.exception("Notifier %s failed", getattr(n, "name", n.__class__.__name__)) - return - else: - # Do not synthesize notification tables from component alerts here. - # Connectors are authoritative for producing `facts['notifications']` in the - # desired headers/rows shape. If no connector-provided notifications are - # present, leave `facts` as-is so notifiers can decide how to render - # `facts['components']` (this avoids manager guessing table shapes). - logger.debug('No connector-provided notifications present; leaving facts.components intact for notifiers to render') - - # Attach notifications to facts so notifiers can render grouped tables. - # If socket_tier1 attached authoritative rows above, skip canonicalization - # to avoid mutating the connector-provided rows. - try: - if not facts.get('_socket_tier1_rows_attached'): - # Canonicalize SAST rows so all notifiers receive a consistent - # 4-column shape: [rule, file_path, lines, snippet] - for g, rows in list(notifications.items()): - if g.lower().startswith('sast'): - new_rows = [] - for r in rows: - try: - # Map common legacy shapes into [rule, file_path, lines, snippet] - rule = '' - full_path = '' - lines = '' - snippet = '' - if len(r) >= 5: - first = (r[0] or '').lower() if isinstance(r[0], str) else '' - if first in ('python', 'javascript', 'js', 'typescript', 'java', 'ruby', 'go', 'php', 'csharp', 'c', 'cpp', 'rust', 'kotlin', 'scala', 'swift'): - # legacy: [language, rule, file, lines, snippet] - rule = r[1] - full_path = r[2] - lines = r[3] - snippet = r[4] - else: - # legacy: [rule, file_name, location, lines, snippet] - rule = r[0] - full_path = r[2] if len(r) > 2 else (r[1] if len(r) > 1 else '') - lines = r[3] - snippet = r[4] - elif len(r) == 4: - # expected canonical shape: [rule, file_path, lines, snippet] - rule, full_path, lines, snippet = r - elif len(r) == 3: - # [rule, file, lines] -> no snippet - rule, full_path, lines = r - snippet = '' + + # Validate payload format: should be list of {title, content} dicts + if isinstance(payload, list): + valid_items = [] + for item in payload: + if isinstance(item, dict) and 'title' in item and 'content' in item: + valid_items.append(item) else: - rule = r[0] if len(r) > 0 else '' - full_path = r[1] if len(r) > 1 else '' - lines = r[2] if len(r) > 2 else '' - snippet = r[3] if len(r) > 3 else '' - - new_rows.append([rule, full_path, lines, snippet]) - except Exception: - # If canonicalization fails, keep original row to avoid data loss - new_rows.append(r) - notifications[g] = new_rows - - if notifications: - facts['notifications'] = notifications + logger.warning('Invalid notification item for %s: expected {title, content}, got %s', + notifier_key, type(item)) + + if valid_items: + per_notifier_notifications[notifier_key] = valid_items + else: + logger.warning('Invalid payload format for %s: expected list, got %s', + notifier_key, type(payload)) + + logger.debug('Processed %d notifier-specific notification formats', len(per_notifier_notifications)) else: - facts.setdefault('notifications', {}) - # Debug: log the notifications dict that will be passed to notifiers - try: - logger.debug('Grouped notifications to be passed to notifiers: %s', facts.get('notifications')) + # No new format notifications found + logger.debug('No per-notifier notification formats found') + except Exception: - logger.exception('Failed to debug-log notifications') - except Exception: - logger.exception('Failed to attach grouped notifications to facts') + logger.exception('Failed to process connector-provided notifications') + + # Don't return early - continue to per-notifier filtering logic below - # Debug: dump notifications just before running notifiers to help - # track down any remaining legacy-shaped rows (e.g. a leading "Language" - # column). This writes a small JSON file to /tmp for inspection during - # local runs. - # Call notifiers - # Debug: write a snapshot of grouped notifications to a temp file so - # we can inspect the exact rows passed to notifiers when debugging + # All connectors now use the new simplified per-notifier format, no legacy processing needed + # Call notifiers with their specific pre-formatted data for n in self.notifiers: try: - n.notify(facts) + # Create a copy of facts for this notifier + notifier_facts = facts.copy() + + # Map notifier class names to their expected notification keys + notifier_name = getattr(n, "name", n.__class__.__name__.lower()) + notifier_key_map = { + 'console': 'console', + 'consolenotifier': 'console', + 'slack': 'slack', + 'slacknotifier': 'slack', + 'github_pr': 'github_pr', + 'githubprnotifier': 'github_pr', + 'jira': 'jira', + 'jiranotifier': 'jira', + 'msteams': 'msteams', + 'msteamsnotifier': 'msteams', + 'ms_teams': 'msteams', + 'msteamsnotifier': 'msteams', + 'ms_sentinel': 'ms_sentinel', + 'mssentinelnotifier': 'ms_sentinel', + 'sumologic': 'sumologic', + 'sumologicnotifier': 'sumologic', + 'json': 'json', + 'jsonnotifier': 'json', + 'webhook': 'webhook', + 'webhooknotifier': 'webhook' + } + + # Get the appropriate notification key for this notifier + notification_key = notifier_key_map.get(notifier_name.lower(), notifier_name.lower()) + + # Debug logging + if per_notifier_notifications: + logger.debug('Notifier %s -> notification_key %s, per_notifier_notifications keys: %s', + notifier_name, notification_key, list(per_notifier_notifications.keys())) + else: + logger.debug('Notifier %s -> notification_key %s, per_notifier_notifications is empty/None', + notifier_name, notification_key) + + # If we have pre-formatted data for this notifier, use it + if per_notifier_notifications and notification_key in per_notifier_notifications: + # Pass the per-notifier data in the simplified format: [{'title': '...', 'content': '...'}, ...] + notifier_data = per_notifier_notifications[notification_key] + notifier_facts['notifications'] = notifier_data + logger.debug('Using pre-formatted data for notifier %s: %s items', notifier_name, len(notifier_data) if isinstance(notifier_data, list) else 1) + else: + # No pre-formatted data available - skip this notifier to avoid sending wrong format + logger.debug('No pre-formatted data found for notifier %s (key: %s), skipping to avoid format mismatch', notifier_name, notification_key) + continue + + n.notify(notifier_facts) except Exception: logger.exception("Notifier %s failed", getattr(n, "name", n.__class__.__name__)) diff --git a/socket_basics/core/notification/ms_sentinel_notifier.py b/socket_basics/core/notification/ms_sentinel_notifier.py index 0f5fbe5..9555daa 100644 --- a/socket_basics/core/notification/ms_sentinel_notifier.py +++ b/socket_basics/core/notification/ms_sentinel_notifier.py @@ -1,91 +1,110 @@ -import json +from typing import Any, Dict import logging -import os -from typing import Any, Dict, List -from .base import BaseNotifier +from socket_basics.core.notification.base import BaseNotifier +from socket_basics.core.config import ( + get_ms_sentinel_workspace_id, get_ms_sentinel_shared_key, + get_ms_sentinel_collector_url +) logger = logging.getLogger(__name__) class MSSentinelNotifier(BaseNotifier): - """Microsoft Sentinel notifier: builds compact JSON bodies suitable for ingestion - by Sentinel HTTP Data Collector API. This follows JsonNotifier mapping style. + """Microsoft Sentinel notifier: sends security findings to Sentinel HTTP Data Collector. + + Simplified version that works with pre-formatted content from connectors. """ name = "ms_sentinel" def __init__(self, params: Dict[str, Any] | None = None): super().__init__(params or {}) - # read configuration from params or environment variables - self.workspace_id = self.config.get('workspace_id') or os.getenv('INPUT_MS_SENTINEL_WORKSPACE_ID') - self.shared_key = self.config.get('shared_key') or os.getenv('INPUT_MS_SENTINEL_SHARED_KEY') - # collector_url optional override - self.collector_url = self.config.get('collector_url') or os.getenv('INPUT_MS_SENTINEL_COLLECTOR_URL') + # MS Sentinel configuration from params, env variables, or app config + self.workspace_id = ( + self.config.get('workspace_id') or + get_ms_sentinel_workspace_id() + ) + self.shared_key = ( + self.config.get('shared_key') or + get_ms_sentinel_shared_key() + ) + self.collector_url = ( + self.config.get('collector_url') or + get_ms_sentinel_collector_url() + ) def notify(self, facts: Dict[str, Any]) -> None: - # Sentinel expects flattened event entries. Require new notification - # contract and do not synthesize events from components. - notifications = facts.get('notifications') + notifications = facts.get('notifications', []) or [] + + if not isinstance(notifications, list): + logger.error('MSSentinelNotifier: only supports new format - list of dicts with title/content') + return + if not notifications: - logger.info('MSSentinelNotifier: no notifications present in facts; skipping') + logger.info('MSSentinelNotifier: no notifications present; skipping') return - # Flatten groups into events list - events: List[Dict[str, Any]] = [] - if isinstance(notifications, list): - for grp in notifications: - if not isinstance(grp, dict): - continue - rows = grp.get('rows') or [] - headers = grp.get('headers') or [] - for r in rows: - # map row columns to known fields where possible - ev = {'repository': facts.get('repository'), 'branch': facts.get('branch')} - try: - # Best-effort mapping by header names - for i, h in enumerate(headers): - key = str(h).strip().lower() - ev[key] = r[i] if i < len(r) else None - except Exception: - pass - events.append(ev) - elif isinstance(notifications, dict): - for title, payload in notifications.items(): - if not isinstance(payload, dict): - continue - headers = payload.get('headers') or [] - for r in payload.get('rows') or []: - ev = {'repository': facts.get('repository'), 'branch': facts.get('branch'), 'group': title} - try: - for i, h in enumerate(headers): - ev[str(h).strip().lower()] = r[i] if i < len(r) else None - except Exception: - pass - events.append(ev) + # Get full scan URL if available + full_scan_url = facts.get('full_scan_html_url') + + # Validate format + valid_notifications = [] + for item in notifications: + if isinstance(item, dict) and 'title' in item and 'content' in item: + # Append full scan URL to content if available + content = item['content'] + if full_scan_url: + content += f"\n\nFull scan results: {full_scan_url}" + item = {'title': item['title'], 'content': content} + valid_notifications.append(item) + else: + logger.warning('MSSentinelNotifier: skipping invalid notification item: %s', type(item)) + + if not valid_notifications: + return - body = {'repository': facts.get('repository'), 'branch': facts.get('branch'), 'events': events} + # Send each notification as a separate Sentinel event + for item in valid_notifications: + title = item['title'] + content = item['content'] + self._send_sentinel_event(facts, title, content) - # If configuration not provided, print JSON to stdout for debugging - if not (self.workspace_id and self.shared_key): - logger.info('Sentinel credentials not configured; printing payload to stdout') - try: - print(json.dumps(body, indent=2)) - except Exception: - logger.debug('Failed to print sentinel payload to stdout') + def _send_sentinel_event(self, facts: Dict[str, Any], title: str, content: str) -> None: + """Send a single event to Microsoft Sentinel with title and content.""" + if not all([self.workspace_id, self.shared_key]): + logger.warning('MSSentinelNotifier: missing required configuration (workspace_id, shared_key)') return + # Get repository and branch info from config (discovered by main logic) + repo = self.config.get('repository', 'Unknown') + branch = self.config.get('branch', 'Unknown') + + # Create Sentinel event payload with pre-formatted content + event = { + 'TimeGenerated': facts.get('timestamp'), + 'Source': 'SocketSecurity', + 'Repository': repo, + 'Branch': branch, + 'Severity': 'High', + 'Title': title, + 'Content': content, + 'EventType': 'SecurityFinding' + } + try: - import requests - collector_url = self.config.get('collector_url') or self.collector_url - if collector_url: - resp = requests.post(collector_url, json=events) + if self.collector_url: + # Use custom collector URL if provided + import requests + resp = requests.post(self.collector_url, json=[event], timeout=10) if resp.status_code >= 400: - logger.error('Sentinel collector returned %s: %s', resp.status_code, resp.text) + logger.warning('MSSentinelNotifier: collector error %s: %s', resp.status_code, resp.text[:200]) + else: + logger.info('MSSentinelNotifier: sent event for "%s"', title) else: - try: - print(json.dumps(body, indent=2)) - except Exception: - logger.debug('Failed to print sentinel payload to stdout') - except Exception: - logger.exception('Failed to send to Sentinel') + # Would need to implement Sentinel HTTP Data Collector API authentication here + # For now, just log the event + logger.info('MSSentinelNotifier: would send event for "%s" (collector URL not configured)', title) + + except Exception as e: + logger.error('MSSentinelNotifier: exception sending event: %s', e) diff --git a/socket_basics/core/notification/ms_teams_notifier.py b/socket_basics/core/notification/ms_teams_notifier.py index e3bfbb3..9896b30 100644 --- a/socket_basics/core/notification/ms_teams_notifier.py +++ b/socket_basics/core/notification/ms_teams_notifier.py @@ -1,152 +1,105 @@ -import json +from typing import Any, Dict import logging -import os -import subprocess -from typing import Any, Dict, List -from .base import BaseNotifier +from socket_basics.core.notification.base import BaseNotifier +from socket_basics.core.config import get_msteams_webhook_url logger = logging.getLogger(__name__) class MSTeamsNotifier(BaseNotifier): - """Microsoft Teams notifier using incoming webhook connectors. - - This follows the same pattern as SlackNotifier: it reads a webhook URL from - params or environment variables and posts grouped notifications (produced - by NotificationManager) as a simple card payload compatible with Teams - incoming webhooks. + """Microsoft Teams notifier: posts security findings to Teams webhook. + + Simplified version that works with pre-formatted content from connectors. """ name = "msteams" def __init__(self, params: Dict[str, Any] | None = None): super().__init__(params or {}) + # Teams webhook URL from params, env variable, or app config self.webhook_url = ( - self.config.get('webhook_url') - or os.getenv('MSTEAMS_WEBHOOK_URL') - or os.getenv('INPUT_MSTEAMS_WEBHOOK_URL') + self.config.get('webhook_url') or + get_msteams_webhook_url() ) - self.enabled = True if self.webhook_url else False - self.title = self.config.get('title') or 'Socket Security' - - def _derive_repo_branch(self, facts: Dict[str, Any]) -> tuple[str | None, str | None]: - repo = facts.get('repository') or os.getenv('GITHUB_REPOSITORY') - branch = facts.get('branch') or os.getenv('GITHUB_REF') - if branch and branch.startswith('refs/heads/'): - branch = branch.split('refs/heads/')[-1] - - if not branch: - try: - branch = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD'], text=True).strip() - except Exception: - branch = None - - if not repo: - try: - url = subprocess.check_output(['git', 'config', '--get', 'remote.origin.url'], text=True).strip() - if url.endswith('.git'): - url = url[:-4] - if url.startswith('git@'): - repo = url.split(':', 1)[1] - else: - parts = url.rstrip('/').split('/') - if len(parts) >= 2: - repo = f"{parts[-2]}/{parts[-1]}" - else: - repo = url - except Exception: - repo = None - - return repo, branch + self.title = self.config.get('title', 'Socket Security') def notify(self, facts: Dict[str, Any]) -> None: - # Require canonical notifications provided by NotificationManager. - notifications = facts.get('notifications') + notifications = facts.get('notifications', []) or [] + + if not isinstance(notifications, list): + logger.error('MSTeamsNotifier: only supports new format - list of dicts with title/content') + return + if not notifications: - logger.info('MSTeamsNotifier: no notifications present in facts; skipping') + logger.info('MSTeamsNotifier: no notifications present; skipping') return - # Normalize groups into canonical list form and validate headers/rows - groups: List[Dict[str, Any]] = [] - if isinstance(notifications, list): - for item in notifications: - if not isinstance(item, dict): - continue - groups.append({'title': item.get('title') or 'results', 'headers': item.get('headers'), 'rows': item.get('rows') or []}) - elif isinstance(notifications, dict): - for title, payload in notifications.items(): - if isinstance(payload, dict): - groups.append({'title': title, 'headers': payload.get('headers'), 'rows': payload.get('rows') or []}) - elif isinstance(payload, list): - groups.append({'title': title, 'headers': None, 'rows': payload}) - - valid = any(isinstance(g.get('headers'), list) and isinstance(g.get('rows'), list) for g in groups) - if not valid: - logger.info('MSTeamsNotifier: notifications present but none match required {headers:list, rows:list} shape; skipping') + # Get full scan URL if available + full_scan_url = facts.get('full_scan_html_url') + + # Validate format + valid_notifications = [] + for item in notifications: + if isinstance(item, dict) and 'title' in item and 'content' in item: + # Append full scan URL to content if available + content = item['content'] + if full_scan_url: + content += f"\n\n🔗 [View complete scan results]({full_scan_url})" + item = {'title': item['title'], 'content': content} + valid_notifications.append(item) + else: + logger.warning('MSTeamsNotifier: skipping invalid notification item: %s', type(item)) + + if not valid_notifications: return - repo, branch = self._derive_repo_branch(facts) - repo_display = repo or (facts.get('workspace') or os.getenv('GITHUB_WORKSPACE') or 'unknown') - branch_display = branch or 'unknown' - - total = sum(len(g.get('rows') or []) for g in groups) + # Send each notification as a separate Teams message + for item in valid_notifications: + title = item['title'] + content = item['content'] + self._send_teams_message(facts, title, content) - # Build visually-pleasing MessageCard sections - sections: List[Dict[str, Any]] = [] - # Add a facts-style summary section with repo/branch/total - facts_list = [ - {"name": "Repository", "value": str(repo_display)}, - {"name": "Branch", "value": str(branch_display)}, - {"name": "Total Alerts", "value": str(total)}, - ] - - # Compose the card title and summary derived from repo/branch/total - derived_title = f"Socket Security  {repo_display}  branch {branch_display}  {total} alert(s)" - - # Build group sections with short, readable lines - for g in groups: - group_label = g.get('title') - rows = g.get('rows') or [] - if not rows: - continue - text_lines: List[str] = [] - # Show up to 10 items per group for brevity - for r in rows[:10]: - if isinstance(r, (list, tuple)) and len(r) >= 4: - rule = r[0] - file = r[1] - loc = r[2] - lines = r[3] - text_lines.append(f"\u2022 {rule} \u2014 {file}:{loc} (lines: {lines})") - else: - # fallback to stringified row - text_lines.append(f"\u2022 {str(r)}") - - sections.append({ - "activityTitle": group_label, - "text": "\n".join(text_lines), - }) - - payload = { - "@type": "MessageCard", - "@context": "https://schema.org/extensions", - "summary": derived_title, - "themeColor": "0078D7", - "title": derived_title, - "sections": [{"facts": facts_list, "markdown": True}] + sections, - } - - url = self.webhook_url or getattr(self, 'app_config', {}).get('msteams_webhook_url') - if not url: - logger.info('MS Teams webhook URL not configured; skipping notification') + def _send_teams_message(self, facts: Dict[str, Any], title: str, content: str) -> None: + """Send a single Teams message with title and content.""" + if not self.webhook_url: + logger.warning('MSTeamsNotifier: no Teams webhook URL configured') return + # Get repository and branch info from config (discovered by main logic) + repo = self.config.get('repository', 'Unknown') + branch = self.config.get('branch', 'Unknown') + try: + # Create Teams MessageCard payload with pre-formatted content + payload = { + "@type": "MessageCard", + "@context": "https://schema.org/extensions", + "summary": f"Socket Security - {title}", + "themeColor": "FF6B35", + "title": f"🔍 Socket Security - {title}", + "sections": [ + { + "facts": [ + {"name": "Repository", "value": repo}, + {"name": "Branch", "value": branch} + ], + "markdown": True + }, + { + "activityTitle": title, + "text": content, + "markdown": True + } + ] + } + import requests - - resp = requests.post(url, json=payload) + resp = requests.post(self.webhook_url, json=payload, timeout=10) if resp.status_code >= 400: - logger.error('MS Teams webhook error %s: %s', resp.status_code, resp.text) - except Exception: - logger.exception('Failed to send MS Teams notification') + logger.warning('MSTeamsNotifier: webhook error %s: %s', resp.status_code, resp.text[:200]) + else: + logger.info('MSTeamsNotifier: posted message for "%s"', title) + + except Exception as e: + logger.error('MSTeamsNotifier: exception posting message: %s', e) diff --git a/socket_basics/core/notification/slack_notifier.py b/socket_basics/core/notification/slack_notifier.py index 1bae1b9..ded2a22 100644 --- a/socket_basics/core/notification/slack_notifier.py +++ b/socket_basics/core/notification/slack_notifier.py @@ -1,294 +1,115 @@ -import json +from typing import Any, Dict import logging -import os -import subprocess -from typing import Any, Dict, List -from .base import BaseNotifier +from socket_basics.core.notification.base import BaseNotifier +from socket_basics.core.config import get_slack_webhook_url, get_github_repository logger = logging.getLogger(__name__) class SlackNotifier(BaseNotifier): - """Slack notifier that prefers grouped notifications (from NotificationManager) - and renders them into Slack Block Kit payloads for incoming webhooks. + """Slack notifier: posts security findings to Slack webhook. + + Simplified version that works with pre-formatted content from connectors. """ name = "slack" def __init__(self, params: Dict[str, Any] | None = None): super().__init__(params or {}) - # read configuration from params or environment variables - # prefer uppercase SLACK_* env vars; fall back to INPUT_* names for compatibility + # Slack webhook URL from params, env variable, or app config self.webhook_url = ( - self.config.get('webhook_url') - or os.getenv('SLACK_WEBHOOK_URL') - or os.getenv('INPUT_SLACK_WEBHOOK_URL') + self.config.get('webhook_url') or + get_slack_webhook_url() ) - # previously supported an explicit enabled flag; posting now depends solely on presence of webhook URL - self.enabled = True if self.webhook_url else False - # fixed username label self.username = "Socket Security" + + # Get repository from config or environment + self.repository = ( + self.config.get('repository') or + get_github_repository() or + 'Unknown' + ) def notify(self, facts: Dict[str, Any]) -> None: - # Normalize canonical notifications shape: list of tables -> mapping title -> {'headers','rows'} - raw_notifications = facts.get('notifications') or {} - notifications: Dict[str, Dict[str, Any]] = {} - if isinstance(raw_notifications, list): - for t in raw_notifications: - try: - title = t.get('title') or 'results' - headers = t.get('headers') - rows = t.get('rows') or [] - notifications[title] = {'headers': headers, 'rows': rows} - except Exception: - continue - elif isinstance(raw_notifications, dict): - for title, payload in raw_notifications.items(): - try: - if isinstance(payload, dict): - headers = payload.get('headers') - rows = payload.get('rows') or [] - notifications[title] = {'headers': headers, 'rows': rows} - elif isinstance(payload, list): - # legacy mapping of title -> rows (no headers); skip per-manager contract - logger.warning('SlackNotifier: legacy notification mapping received for %s without headers; skipping', title) - else: - logger.warning('SlackNotifier: unexpected payload type for %s: %s; skipping', title, type(payload)) - except Exception: - continue - + notifications = facts.get('notifications', []) or [] + + if not isinstance(notifications, list): + logger.error('SlackNotifier: only supports new format - list of dicts with title/content') + return + if not notifications: - logger.debug('No grouped notifications available for Slack') + logger.info('SlackNotifier: no notifications present; skipping') return - # Build Slack blocks using native Block Kit fields for better readability - blocks: List[Dict[str, Any]] = [] - total = sum(len(v) for v in notifications.values()) - - # Attempt to derive repository and branch from facts, env, or git - repo = facts.get('repository') or os.getenv('GITHUB_REPOSITORY') - branch = facts.get('branch') or os.getenv('GITHUB_REF') - if branch and branch.startswith('refs/heads/'): - branch = branch.split('refs/heads/')[-1] - - if not branch: - try: - branch = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD'], text=True).strip() - except Exception: - branch = None - - if not repo: - try: - url = subprocess.check_output(['git', 'config', '--get', 'remote.origin.url'], text=True).strip() - if url.endswith('.git'): - url = url[:-4] - if url.startswith('git@'): - # git@github.com:owner/repo - repo = url.split(':', 1)[1] - else: - parts = url.rstrip('/').split('/') - if len(parts) >= 2: - repo = f"{parts[-2]}/{parts[-1]}" - else: - repo = url - except Exception: - repo = None - - # Prefer the workspace name when repo is not available - repo_display = repo - if not repo_display: - # facts may include workspace path; prefer a final path component as name - workspace = facts.get('workspace') or os.getenv('GITHUB_WORKSPACE') - if workspace: - try: - from pathlib import Path - - repo_display = Path(workspace).name - except Exception: - repo_display = str(workspace) + # Get full scan URL if available + full_scan_url = facts.get('full_scan_html_url') + + # Validate format + valid_notifications = [] + for item in notifications: + if isinstance(item, dict) and 'title' in item and 'content' in item: + # Append full scan URL to content if available + content = item['content'] + if full_scan_url: + content += f"\n\n🔗 <{full_scan_url}|View complete scan results>" + item = {'title': item['title'], 'content': content} + valid_notifications.append(item) else: - repo_display = 'unknown' - branch_display = branch or 'unknown' - - header_section = { - "type": "section", - "text": { - "type": "mrkdwn", - "text": f"*Socket Security* — *{repo_display}* — branch `{branch_display}` — *{total} alert(s)*" - } - } - blocks.append(header_section) - blocks.append({"type": "divider"}) - - for group_label, payload in notifications.items(): - headers = payload.get('headers') - rows = payload.get('rows') or [] - # Strict: use only connector-provided headers. Skip groups without valid headers. - if not headers or not isinstance(headers, list): - logger.warning('SlackNotifier: skipping notification group %s because headers missing or invalid; Manager should filter these', group_label) - continue - if not rows: - continue - # Add a header for the group - blocks.append({"type": "section", "text": {"type": "mrkdwn", "text": f"*{group_label}*"}}) - - # helper to find header index by name (case-insensitive) - def hidx(name: str): - try: - for i, h in enumerate(headers): - if isinstance(h, str) and h.strip().lower() == name.lower(): - return i - except Exception: - pass - return None - - # Render up to 10 alerts per group as individual section blocks with fields - for r in rows[:10]: - fields = [] - snippet = '' - # SAST rendering: prefer header indices if present - if group_label.lower().startswith('sast'): - rule_i = hidx('rule') - file_i = hidx('file') or hidx('path') - loc_i = hidx('path') or hidx('location') - lines_i = hidx('lines') - code_i = hidx('code') - - rule = r[rule_i] if rule_i is not None and rule_i < len(r) else (r[0] if len(r) > 0 else '') - file = r[file_i] if file_i is not None and file_i < len(r) else (r[2] if len(r) > 2 else '') - loc = r[loc_i] if loc_i is not None and loc_i < len(r) else (r[3] if len(r) > 3 else '') - lines = r[lines_i] if lines_i is not None and lines_i < len(r) else (r[4] if len(r) > 4 else '') - snippet = str(r[code_i]) if code_i is not None and code_i < len(r) else '' - - fields = [ - {"type": "mrkdwn", "text": f"*Rule:*\n{rule}"}, - {"type": "mrkdwn", "text": f"*File:*\n{file}"}, - {"type": "mrkdwn", "text": f"*Location:*\n{loc}"}, - {"type": "mrkdwn", "text": f"*Lines:*\n{lines}"} - ] - else: - # Generic rendering: prefer 'file' or 'title' and 'severity' headers - title_i = hidx('title') or hidx('rule') or 0 - sev_i = hidx('severity') - loc_i = hidx('locator') or hidx('path') or hidx('location') - extra_i = hidx('location') or hidx('details') or None - - title_val = r[title_i] if title_i is not None and title_i < len(r) else (r[0] if len(r) > 0 else '') - sev_val = r[sev_i] if sev_i is not None and sev_i < len(r) else (r[1] if len(r) > 1 else '') - loc_val = r[loc_i] if loc_i is not None and loc_i < len(r) else (r[2] if len(r) > 2 else '') - details_val = r[extra_i] if extra_i is not None and extra_i < len(r) else (r[3] if len(r) > 3 else '') - - fields = [ - {"type": "mrkdwn", "text": f"*Title:*\n{title_val}"}, - {"type": "mrkdwn", "text": f"*Severity:*\n{sev_val}"}, - {"type": "mrkdwn", "text": f"*Locator:*\n{loc_val}"}, - {"type": "mrkdwn", "text": f"*Details:*\n{details_val}"} - ] - snippet = '' - - blocks.append({"type": "section", "fields": fields}) - if snippet: - sn = snippet if len(snippet) <= 400 else snippet[:400] + '...' - blocks.append({"type": "context", "elements": [{"type": "mrkdwn", "text": f"```{sn}```"}]}) - - blocks.append({"type": "divider"}) - - payload = {"username": self.username, "blocks": blocks} - - # Post only if webhook_url provided in params or app_config - url = self.webhook_url or getattr(self, 'app_config', {}).get('slack_webhook_url') - if not url: - # If no webhook configured, skip posting and do not print payload to stdout. - logger.info("Slack webhook URL not configured; skipping Slack notification") + logger.warning('SlackNotifier: skipping invalid notification item: %s', type(item)) + + if not valid_notifications: return - # Sanitize payload to avoid invalid_blocks errors from Slack. - # Ensure all text fields are strings and truncate very long snippets. - def _ensure_str(s: Any, max_len: int = 1000) -> str: - try: - if s is None: - return '' - t = str(s) - if len(t) > max_len: - return t[:max_len] + '...' - return t - except Exception: - return '' - try: - # Slack Block Kit limits and safe truncation parameters - MAX_BLOCKS = 45 - MAX_FIELDS_PER_SECTION = 10 - MAX_TEXT_CHARS = 3000 - MAX_CONTEXT_ELEMS = 10 - - blocks = payload.get('blocks', []) or [] - sanitized_blocks: List[Dict[str, Any]] = [] - omitted = 0 - - for b in blocks: - if not isinstance(b, dict): - continue - bt = b.get('type') - nb: Dict[str, Any] = {'type': bt} - - # Section blocks: sanitize 'text' and 'fields' - if bt == 'section': - text_obj = b.get('text') - if isinstance(text_obj, dict): - nb['text'] = {'type': text_obj.get('type', 'mrkdwn'), 'text': _ensure_str(text_obj.get('text', ''), MAX_TEXT_CHARS)} - # fields: ensure list of mrkdwn text objects, cap at MAX_FIELDS_PER_SECTION - flds = b.get('fields') or [] - if isinstance(flds, list) and flds: - new_fields = [] - for f in flds[:MAX_FIELDS_PER_SECTION]: - if isinstance(f, dict): - txt = f.get('text') if 'text' in f else '' - new_fields.append({'type': 'mrkdwn', 'text': _ensure_str(txt, 1000)}) - else: - new_fields.append({'type': 'mrkdwn', 'text': _ensure_str(f, 1000)}) - nb['fields'] = new_fields + # Send each notification as a separate Slack message + for item in valid_notifications: + title = item['title'] + content = item['content'] + self._send_slack_message(facts, title, content) - # Context blocks: sanitize elements and cap - elif bt == 'context': - elems = b.get('elements') or [] - new_elems = [] - if isinstance(elems, list) and elems: - for e in elems[:MAX_CONTEXT_ELEMS]: - if isinstance(e, dict): - # only allow mrkdwn or text elements - if e.get('type') == 'mrkdwn' or e.get('type') == 'text': - new_elems.append({'type': 'mrkdwn', 'text': _ensure_str(e.get('text', ''), 500)}) - else: - new_elems.append({'type': 'mrkdwn', 'text': _ensure_str(e, 500)}) - if new_elems: - nb['elements'] = new_elems - - # Divider and other allowed block types: pass through minimal - elif bt == 'divider': - # divider has no additional keys - pass - else: - # Unknown block type: skip - continue - - sanitized_blocks.append(nb) - # enforce max blocks - if len(sanitized_blocks) >= MAX_BLOCKS: - omitted += max(0, len(blocks) - len(sanitized_blocks)) - break - - # If we omitted blocks, append a short summary block - if omitted: - sanitized_blocks.append({'type': 'section', 'text': {'type': 'mrkdwn', 'text': _ensure_str(f'...omitted {omitted} additional blocks for brevity', 200)}}) - - payload['blocks'] = sanitized_blocks + def _send_slack_message(self, facts: Dict[str, Any], title: str, content: str) -> None: + """Send a single Slack message with title and content.""" + if not self.webhook_url: + logger.warning('SlackNotifier: no Slack webhook URL configured') + return - logger.debug('Slack payload sanitized: blocks=%d; Connector payload sample type=%s', len(sanitized_blocks), type(raw_notifications)) + # Get repository and branch info from config (discovered by main logic) + repo = self.repository + branch = self.config.get('branch', 'Unknown') + try: + # Truncate content if too long for Slack (3000 char limit per text block) + max_content_length = 2500 # Leave room for title and formatting + if len(content) > max_content_length: + content = content[:max_content_length] + "...\n[Content truncated]" + + # Create Slack payload with pre-formatted content + payload = { + "username": self.username, + "blocks": [ + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": f"🔍 *Security Findings* - {repo} ({branch})" + } + }, + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": f"*{title}*\n```\n{content}\n```" + } + } + ] + } + import requests - resp = requests.post(url, json=payload) + resp = requests.post(self.webhook_url, json=payload, timeout=10) if resp.status_code >= 400: - logger.error("Slack webhook error %s: %s", resp.status_code, resp.text) - except Exception: - logger.exception("Failed to send Slack notification") + logger.warning('SlackNotifier: webhook error %s: %s', resp.status_code, resp.text[:200]) + else: + logger.info('SlackNotifier: posted message for "%s"', title) + + except Exception as e: + logger.error('SlackNotifier: exception posting message: %s', e) diff --git a/socket_basics/core/notification/sumologic_notifier.py b/socket_basics/core/notification/sumologic_notifier.py index d62dbf6..4f09ff5 100644 --- a/socket_basics/core/notification/sumologic_notifier.py +++ b/socket_basics/core/notification/sumologic_notifier.py @@ -1,70 +1,89 @@ -import json +from typing import Any, Dict import logging -import os -from typing import Any, Dict, List -from .base import BaseNotifier +from socket_basics.core.notification.base import BaseNotifier +from socket_basics.core.config import get_sumologic_http_source_url logger = logging.getLogger(__name__) class SumoLogicNotifier(BaseNotifier): - """Sumo Logic notifier: emits compact JSON or will send to HTTP source if configured. - Follows JsonNotifier grouping shape for `notifications`. + """SumoLogic notifier: sends security findings to SumoLogic HTTP collector. + + Simplified version that works with pre-formatted content from connectors. """ name = "sumologic" def __init__(self, params: Dict[str, Any] | None = None): - """Initialize SumoLogicNotifier with optional config params. - - Params may include 'http_source_url'. Falls back to INPUT_SUMO_LOGIC_HTTP_SOURCE_URL. - """ super().__init__(params or {}) - # read configuration from params or environment variables - self.http_source_url = self.config.get('http_source_url') or os.getenv('INPUT_SUMO_LOGIC_HTTP_SOURCE_URL') + # SumoLogic HTTP source URL from params, env variable, or app config + self.http_source_url = ( + self.config.get('http_source_url') or + get_sumologic_http_source_url() + ) def notify(self, facts: Dict[str, Any]) -> None: - # Strict: use only canonical notifications attached to facts. Do not - # synthesize from components. If no notifications present, skip. - notifications = facts.get('notifications') + notifications = facts.get('notifications', []) or [] + + if not isinstance(notifications, list): + logger.error('SumoLogicNotifier: only supports new format - list of dicts with title/content') + return + if not notifications: - logger.info('SumoLogicNotifier: no notifications present in facts; skipping') + logger.info('SumoLogicNotifier: no notifications present; skipping') return - # Normalize and validate groups: ensure at least one group has headers and rows - groups: List[Dict[str, Any]] = [] - if isinstance(notifications, list): - for item in notifications: - if not isinstance(item, dict): - continue - groups.append({'title': item.get('title') or 'results', 'headers': item.get('headers'), 'rows': item.get('rows') or []}) - elif isinstance(notifications, dict): - for title, payload in notifications.items(): - if isinstance(payload, dict): - groups.append({'title': title, 'headers': payload.get('headers'), 'rows': payload.get('rows') or []}) - elif isinstance(payload, list): - groups.append({'title': title, 'headers': None, 'rows': payload}) - - valid = any(isinstance(g.get('headers'), list) and isinstance(g.get('rows'), list) for g in groups) - if not valid: - logger.info('SumoLogicNotifier: notifications present but none match required {headers:list, rows:list} shape; skipping') + # Validate format + valid_notifications = [] + for item in notifications: + if isinstance(item, dict) and 'title' in item and 'content' in item: + valid_notifications.append(item) + else: + logger.warning('SumoLogicNotifier: skipping invalid notification item: %s', type(item)) + + if not valid_notifications: return - body = {'repository': facts.get('repository'), 'branch': facts.get('branch'), 'notifications': groups} + # Send each notification as a separate log entry + for item in valid_notifications: + title = item['title'] + content = item['content'] + self._send_sumologic_log(facts, title, content) + def _send_sumologic_log(self, facts: Dict[str, Any], title: str, content: str) -> None: + """Send a single log entry to SumoLogic with title and content.""" if not self.http_source_url: - logger.info('Sumo Logic HTTP source not configured; printing payload to stdout') - try: - print(json.dumps(body, indent=2)) - except Exception: - logger.debug('Failed to print Sumo Logic payload to stdout') + logger.warning('SumoLogicNotifier: no HTTP source URL configured') return + # Get repository and branch info from config (discovered by main logic) + repo = self.config.get('repository', 'Unknown') + branch = self.config.get('branch', 'Unknown') + + # Add full scan URL if available + full_scan_url = facts.get('full_scan_url') + if full_scan_url: + content += f"\n\nfull_scan_url={full_scan_url}" + + # Create SumoLogic log payload with pre-formatted content + log_entry = { + 'timestamp': facts.get('timestamp'), + 'source': 'socket-security', + 'repository': repo, + 'branch': branch, + 'severity': 'high', + 'title': title, + 'content': content + } + try: import requests - resp = requests.post(self.http_source_url, json=body) + resp = requests.post(self.http_source_url, json=log_entry, timeout=10) if resp.status_code >= 400: - logger.error('Sumo Logic returned %s: %s', resp.status_code, resp.text) - except Exception: - logger.exception('Failed to send to Sumo Logic') + logger.warning('SumoLogicNotifier: HTTP error %s: %s', resp.status_code, resp.text[:200]) + else: + logger.info('SumoLogicNotifier: sent log entry for "%s"', title) + + except Exception as e: + logger.error('SumoLogicNotifier: exception sending log: %s', e) diff --git a/socket_basics/core/notification/webhook_notifier.py b/socket_basics/core/notification/webhook_notifier.py index 8c66e0f..4df6664 100644 --- a/socket_basics/core/notification/webhook_notifier.py +++ b/socket_basics/core/notification/webhook_notifier.py @@ -1,64 +1,85 @@ -import json -import logging -import os from typing import Any, Dict +import logging -from .base import BaseNotifier +from socket_basics.core.notification.base import BaseNotifier +from socket_basics.core.config import get_webhook_url logger = logging.getLogger(__name__) class WebhookNotifier(BaseNotifier): - """Webhook notifier: emits a structured JSON body similar to JsonNotifier - for posting to arbitrary HTTP webhook endpoints. + """Webhook notifier: sends security findings to HTTP webhook endpoints. + + Simplified version that works with pre-formatted content from connectors. """ name = "webhook" def __init__(self, params: Dict[str, Any] | None = None): super().__init__(params or {}) - # read configuration from params or environment variables - self.url = self.config.get('url') or os.getenv('INPUT_WEBHOOK_URL') + # Webhook URL from params, env variable, or app config + self.url = ( + self.config.get('url') or + get_webhook_url() + ) def notify(self, facts: Dict[str, Any]) -> None: - notifications = facts.get('notifications') + notifications = facts.get('notifications', []) or [] + + if not isinstance(notifications, list): + logger.error('WebhookNotifier: only supports new format - list of dicts with title/content') + return + if not notifications: - logger.info('WebhookNotifier: no notifications present in facts; skipping') + logger.info('WebhookNotifier: no notifications present; skipping') return - # Normalize groups into canonical list for downstream consumers - groups = [] - if isinstance(notifications, list): - for g in notifications: - if not isinstance(g, dict): - continue - groups.append({'title': g.get('title') or 'results', 'headers': g.get('headers'), 'rows': g.get('rows') or []}) - elif isinstance(notifications, dict): - for title, payload in notifications.items(): - if isinstance(payload, dict): - groups.append({'title': title, 'headers': payload.get('headers'), 'rows': payload.get('rows') or []}) - elif isinstance(payload, list): - groups.append({'title': title, 'headers': None, 'rows': payload}) - - valid = any(isinstance(g.get('headers'), list) and isinstance(g.get('rows'), list) for g in groups) - if not valid: - logger.info('WebhookNotifier: notifications present but none match required {headers:list, rows:list} shape; skipping') + # Validate format + valid_notifications = [] + for item in notifications: + if isinstance(item, dict) and 'title' in item and 'content' in item: + valid_notifications.append(item) + else: + logger.warning('WebhookNotifier: skipping invalid notification item: %s', type(item)) + + if not valid_notifications: return - body = {'repository': facts.get('repository'), 'branch': facts.get('branch'), 'notifications': groups} + # Send each notification as a separate webhook + for item in valid_notifications: + title = item['title'] + content = item['content'] + self._send_webhook(facts, title, content) + def _send_webhook(self, facts: Dict[str, Any], title: str, content: str) -> None: + """Send a single webhook with title and content.""" if not self.url: - logger.info('Webhook notifier target URL not configured; printing body to stdout') - try: - print(json.dumps(body, indent=2)) - except Exception: - logger.debug('Failed to print webhook body to stdout') + logger.warning('WebhookNotifier: no webhook URL configured') return + # Get repository and branch info from config (discovered by main logic) + repo = self.config.get('repository', 'Unknown') + branch = self.config.get('branch', 'Unknown') + + # Create webhook payload with pre-formatted content + payload = { + 'repository': repo, + 'branch': branch, + 'scanner': 'socket-security', + 'timestamp': facts.get('timestamp'), + 'notification': { + 'title': title, + 'content': content + } + } + try: import requests - resp = requests.post(self.url, json=body) + resp = requests.post(self.url, json=payload, timeout=10) if resp.status_code >= 400: - logger.error('Webhook target returned %s: %s', resp.status_code, resp.text) - except Exception: - logger.exception('Failed to post to webhook target') + logger.warning('WebhookNotifier: HTTP error %s: %s', resp.status_code, resp.text[:200]) + else: + logger.info('WebhookNotifier: sent webhook for "%s"', title) + + except Exception as e: + logger.error('WebhookNotifier: exception sending webhook: %s', e) diff --git a/socket_basics/core/utils/__init__.py b/socket_basics/core/utils/__init__.py new file mode 100644 index 0000000..0dca924 --- /dev/null +++ b/socket_basics/core/utils/__init__.py @@ -0,0 +1,32 @@ +import json + +__all__ = [ + "make_json_safe", + "is_trivy_dockerfile", +] + +def make_json_safe(value: str) -> str: + """ + Make a string JSON-safe by escaping special characters. + + Args: + value: The string to make JSON-safe + + Returns: + A JSON-safe string with proper escaping + """ + if not isinstance(value, str): + return value + + # Use json.dumps to properly escape the string, then remove the surrounding quotes + return json.dumps(value)[1:-1] + +def is_trivy_dockerfile(component: dict) -> bool: + """Check if a Trivy component represents a Dockerfile vulnerability. + + Dockerfile components have direct=True and no ecosystem qualifier. + Image package components have direct=False and an ecosystem qualifier. + """ + comp_direct = component.get('direct', False) + has_ecosystem = bool(component.get('qualifiers', {}).get('ecosystem')) + return comp_direct and not has_ecosystem \ No newline at end of file diff --git a/socket_basics/notifications.yaml b/socket_basics/notifications.yaml index 48fa147..066873e 100644 --- a/socket_basics/notifications.yaml +++ b/socket_basics/notifications.yaml @@ -102,4 +102,19 @@ notifiers: description: "MS Teams incoming webhook URL (also reads MSTEAMS_WEBHOOK_URL or INPUT_MSTEAMS_WEBHOOK_URL)" # Title is derived automatically from repository, branch, and total alerts +# Settings for notifier-specific limits +settings: + # Maximum number of results per notification for each notifier type + # These limits help ensure notifications stay within API/content limits + result_limits: + jira: 30 # Jira has strict content size limits for comments and descriptions + slack: 50 # Slack messages have size limits + msteams: 50 # MS Teams adaptive cards have size limits + github_pr: 100 # GitHub PR comments can handle more content + webhook: 100 # Generic webhooks, reasonable default + console: 1000 # Console output can handle many results + json: 10000 # JSON output has no practical limit + sumologic: 500 # Log aggregation can handle many events + ms_sentinel: 500 # Security info and event management can handle many events + default: 50 # Default limit for notifiers not specified above diff --git a/socket_basics/socket_basics.py b/socket_basics/socket_basics.py index b2c4e53..17ee51d 100644 --- a/socket_basics/socket_basics.py +++ b/socket_basics/socket_basics.py @@ -197,6 +197,171 @@ def load_notification_manager(self, notifications_cfg: Dict[str, Any] | None = N nm.load_from_config() self.notification_manager = nm + def submit_socket_facts(self, socket_facts_path: Path, results: Dict[str, Any]) -> Dict[str, Any]: + """Submit the socket facts file to Socket API and return full scan results. + + Args: + socket_facts_path: Path to the .socket.facts.json file + results: Current scan results dict to update with full scan info + + Returns: + Updated results dict with full scan information (id, html_url) + """ + try: + # Check if socket facts file is empty or has no components + if not socket_facts_path.exists(): + logger.debug("Socket facts file does not exist, skipping submission") + return results + + with open(socket_facts_path, 'r') as f: + facts_data = json.load(f) + + components = facts_data.get('components', []) + if not components: + logger.debug("Socket facts file is empty (no components), skipping submission") + return results + + # Check if we have the required Socket API configuration + socket_api_key = self.config.get('socket_api_key') + if not socket_api_key: + logger.debug("No Socket API key configured, skipping full scan submission") + return results + + socket_org = self.config.get('socket_org') + if not socket_org: + logger.debug("No Socket organization configured, skipping full scan submission") + return results + + # Import socketdev SDK + try: + logger.debug("Importing socketdev SDK") + from socketdev import socketdev + from socketdev.fullscans import FullScanParams + logger.debug("✓ socketdev SDK imported successfully") + except ImportError as e: + logger.warning(f"Failed to import socketdev SDK: {e}") + return results + + # Initialize SDK + logger.debug("Initializing socketdev SDK") + sdk = socketdev(token=socket_api_key, timeout=100) + logger.debug("✓ socketdev SDK initialized") + + # Prepare full scan parameters with proper defaults + repo_name = self.config.get('repo') or 'socket-basics-scan' + # Extract just the repo name if it contains org/repo format + if '/' in repo_name: + repo_name = repo_name.split('/')[-1] + + branch = self.config.get('branch') or 'main' + commit_hash = self.config.get('commit_hash') or '' + commit_message = self.config.get('commit_message') or 'Socket Basics security scan' + pull_request = self.config.get('pull_request') + committers = self.config.get('committers') or [] + + logger.debug(f"Creating FullScanParams with: org_slug={socket_org}, repo={repo_name}, branch={branch}") + + # Determine if this is the default branch + is_default_branch = self.config.is_default_branch + logger.debug(f"Is default branch: {is_default_branch}") + + try: + # Only include pull_request and committers if they have values + params_dict = { + 'org_slug': socket_org, + 'repo': repo_name, + 'branch': branch, + 'commit_message': commit_message, + 'commit_hash': commit_hash, + 'make_default_branch': is_default_branch, + 'set_as_pending_head': is_default_branch, + 'integration_type': "api" + } + + # Always include pull_request (0 if not a PR) + pull_request_num = pull_request if pull_request is not None else 0 + params_dict['pull_request'] = pull_request_num + + # Only add committers if there are any + if committers: + params_dict['committers'] = committers + + params = FullScanParams(**params_dict) + logger.debug(f"✓ FullScanParams created successfully") + except Exception as e: + logger.error(f"Failed to create FullScanParams: {type(e).__name__}: {str(e)}") + raise + + # Submit the socket facts file + logger.info(f"Submitting socket facts file to Socket API for organization: {socket_org}") + logger.debug(f"Full scan parameters: repo={repo_name}, branch={branch}, commit_hash={commit_hash}") + logger.debug(f"Socket facts file path: {socket_facts_path}") + + # Convert to absolute path to avoid SDK path parsing issues + absolute_socket_facts_path = socket_facts_path.absolute() + logger.debug(f"Absolute socket facts file path: {absolute_socket_facts_path}") + + try: + res = sdk.fullscans.post( + [ + "./.socket.facts.json"], + base_path="./", + params=params, + use_types=True, + use_lazy_loading=True, + max_open_files=50, + base_paths=[str(self.config.workspace) + ] + ) + logger.debug(f"✓ SDK call completed") + logger.debug(f"SDK response type: {type(res)}") + logger.debug(f"SDK response: {res}") + except Exception as e: + logger.error(f"Failed during SDK fullscans.post call: {type(e).__name__}: {str(e)}") + raise + + # SDK typically returns a dict-like response + if isinstance(res, dict): + logger.debug("Processing dict-type response") + # Check for errors in dict response + if 'error' in res or res.get('success') == False: + error_msg = res.get('error', res.get('message', 'Unknown error')) + logger.error(f"Error creating full scan: {error_msg}") + raise Exception(f"Error creating full scan: {error_msg}") + + # Extract the scan ID and HTML URL from the response + scan_id = res.get('id') + html_url = res.get('html_url') + logger.debug(f"Extracted from dict: scan_id={scan_id}, html_url={html_url}") + else: + logger.debug("Processing object-type response") + # Handle object-based response (if the SDK uses objects) + if hasattr(res, 'error') or (hasattr(res, 'success') and not res.success): + error_msg = getattr(res, 'error', getattr(res, 'message', 'Unknown error')) + logger.error(f"Error creating full scan: {error_msg}") + raise Exception(f"Error creating full scan: {error_msg}") + + # Extract the scan ID and HTML URL from the response + scan_id = getattr(res, 'id', None) + html_url = getattr(res, 'html_url', None) + logger.debug(f"Extracted from object: scan_id={scan_id}, html_url={html_url}") + + if scan_id: + logger.info(f"Full scan created successfully with ID: {scan_id}") + results['full_scan_id'] = scan_id + + if html_url: + logger.info(f"Full scan available at: {html_url}") + results['full_scan_html_url'] = html_url + + return results + + except Exception as e: + logger.error(f"Failed to submit socket facts file: {type(e).__name__}: {str(e)}") + logger.debug("Full traceback:", exc_info=True) + # Return original results if submission fails + return results + def main(): """Main entry point""" @@ -237,6 +402,12 @@ def main(): # Save results output_path = scanner.save_results(results, args.output) + # Submit socket facts file to Socket API if not empty + try: + results = scanner.submit_socket_facts(output_path, results) + except Exception: + logger.exception("Failed to submit socket facts file") + # Optionally upload to S3 if requested try: enable_s3 = getattr(args, 'enable_s3_upload', False) or config.get('enable_s3_upload', False) @@ -280,6 +451,9 @@ def main(): try: scanner.load_notification_manager() if scanner.notification_manager: + # Add full scan URL to results if available for notifiers + if 'full_scan_html_url' in results: + scanner.config.set('full_scan_html_url', results['full_scan_html_url']) scanner.notification_manager.notify_all(results) except Exception: logger.exception("Failed to run notifiers") diff --git a/uv.lock b/uv.lock index f36dc7c..976f3af 100644 --- a/uv.lock +++ b/uv.lock @@ -1,9 +1,6 @@ version = 1 revision = 3 requires-python = ">=3.10" -resolution-markers = [ - "python_version < '0'", -] [[package]] name = "black" @@ -453,13 +450,14 @@ wheels = [ ] [[package]] -name = "security-wrapper" -version = "1.0.0" +name = "socket-basics" +version = "1.0.2" source = { editable = "." } dependencies = [ { name = "light-s3-client" }, { name = "pyyaml" }, { name = "requests" }, + { name = "socketdev" }, { name = "tabulate" }, { name = "tomli", marker = "python_full_version < '3.11'" }, ] @@ -481,6 +479,7 @@ requires-dist = [ { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0.0" }, { name = "pyyaml", specifier = ">=6.0.0" }, { name = "requests", specifier = ">=2.31.0" }, + { name = "socketdev", specifier = ">=0.1.0" }, { name = "tabulate", specifier = "~=0.9.0" }, { name = "tomli", marker = "python_full_version < '3.11'" }, ] @@ -489,6 +488,19 @@ provides-extras = ["dev"] [package.metadata.requires-dev] dev = [] +[[package]] +name = "socketdev" +version = "3.0.13" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/1c/e8dda1051e5f10e9d36aa37081b02d451f3491edccc44d41ee77c7b1865a/socketdev-3.0.13.tar.gz", hash = "sha256:5b7810cb7bd05a123d454f0e20735e451a943c405649dbfe891d1750c8688c57", size = 131853, upload-time = "2025-10-14T15:49:47.598Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/61/c1d07e21b38dfda809539aab30c56c1ed47ca3b5ee58a117880e03d60ab7/socketdev-3.0.13-py3-none-any.whl", hash = "sha256:2acc38d532c8764c86fcca5007f518588a5f22ff56e480a59cdcbcb67e96defe", size = 57289, upload-time = "2025-10-14T15:49:45.917Z" }, +] + [[package]] name = "tabulate" version = "0.9.0"