RandomCodeSpace · aksOps · May 23, 2026 · May 23, 2026 · May 23, 2026 · May 23, 2026
diff --git a/.github/workflows/parity.yml b/.github/workflows/parity.yml
@@ -0,0 +1,207 @@
+name: Scan parity (self-scan ↔ SonarQube Cloud)
+
+# Runs BOTH scans on the same commit and diffs their issue lists. Every PR
+# answers: "does our daemon find what SonarSource's own pipeline finds?".
+#
+# This workflow supersedes .github/workflows/sonarqube-cloud.yml — that file
+# was removed in the same commit; the Cloud scan now happens here, alongside
+# the self-scan and the parity comparison. The standalone self-scan
+# (sonar.yml) is kept because it works on fork PRs (no SONAR_TOKEN needed),
+# whereas this one requires the token and so skips for forks.
+#
+# Setup required (one-time, by the repo admin):
+#   1. Sign in to https://sonarcloud.io with the repo's GitHub org.
+#   2. Import this repo as a SonarQube Cloud project.
+#      Project key: RandomCodeSpace_sonar-predict, organisation: randomcodespace
+#      (adjust the env values below if SonarCloud assigns different ones).
+#   3. Configure "Analysis Method" → "With GitHub Actions"; copy SONAR_TOKEN.
+#   4. Add SONAR_TOKEN as a repo secret.
+
+permissions:
+  contents: read
+  pull-requests: read
+
+on:
+  pull_request:
+    branches: [main]
+  push:
+    branches: [main]
+  workflow_dispatch:
+
+env:
+  SONAR_PROJECT_KEY: RandomCodeSpace_sonar-predict
+  SONAR_ORGANIZATION: randomcodespace
+  SONAR_HOST_URL: https://sonarcloud.io
+
+jobs:
+  parity:
+    name: Scan parity
+    runs-on: ubuntu-latest
+    # Skip when the token isn't reachable (fork PRs) — the standalone
+    # self-scan workflow still gates fork PRs on our daemon's findings.
+    if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
+    env:
+      # Same dist-assembly heap concern as sonar.yml — 2 GB to keep the
+      # 150 MB skill-bundle zip step from OOMing maven-assembly-plugin.
+      MAVEN_OPTS: -Xmx2g
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up JDK 17
+        uses: actions/setup-java@v4
+        with:
+          distribution: temurin
+          java-version: '17'
+
+      - name: Set up Node.js 20
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+
+      - name: Cache Maven repository
+        uses: actions/cache@v4
+        with:
+          path: ~/.m2/repository
+          key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
+          restore-keys: |
+            ${{ runner.os }}-maven-
+
+      - name: Cache SonarQube Cloud packages
+        uses: actions/cache@v4
+        with:
+          path: ~/.sonar/cache
+          key: ${{ runner.os }}-sonar
+          restore-keys: |
+            ${{ runner.os }}-sonar
+
+      # Fail fast if the SONAR_TOKEN secret isn't configured. The Cloud step
+      # below would just error obscurely; this is a clearer signal.
+      - name: Verify SONAR_TOKEN
+        env:
+          SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
+        run: |
+          if [ -z "${SONAR_TOKEN:-}" ]; then
+            echo "::error::SONAR_TOKEN not set — parity workflow needs SonarQube Cloud access. See the header of this workflow for setup."
+            exit 1
+          fi
+          echo "SONAR_TOKEN configured."
+
+      # Single build that both scans then consume. `verify` also produces
+      # the per-module JaCoCo XMLs that both scans use as coverage evidence.
+      - name: Build and test (generates JaCoCo XML)
+        run: mvn -B -ntp -pl '!dist' verify -Dsurefire.failIfNoSpecifiedTests=false
+
+      - name: Build dist (skill bundle for self-scan)
+        run: mvn -B -ntp -pl dist -am package -DskipTests
+
+      # --- (A) Self-scan ------------------------------------------------------
+
+      - name: Run self-scan
+        run: |
+          set +e
+          export SONAR_PREDICTOR_HOME="$(pwd)/dist/target/skill/sonar-predictor"
+          ./plugin/skills/sonar-predictor/bin/sonar agent-scan analyze . \
+            --coverage protocol/target/site/jacoco/jacoco.xml \
+            --coverage daemon/target/site/jacoco/jacoco.xml \
+            --coverage cli/target/site/jacoco/jacoco.xml
+          rc=$?
+          set -e
+          echo "Self-scan exit code: $rc (0=clean, 1=issues found, 2+=tool error)"
+          if [ "$rc" -ge 2 ]; then
+            echo "::error::Self-scan tool error (exit $rc)"
+            exit "$rc"
+          fi
+
+      # --- (B) SonarQube Cloud scan ------------------------------------------
+
+      - name: Run SonarQube Cloud scan
+        env:
+          SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          mvn -B -ntp -pl '!dist' \
+              org.sonarsource.scanner.maven:sonar-maven-plugin:sonar \
+              -Dsonar.projectKey="${SONAR_PROJECT_KEY}" \
+              -Dsonar.organization="${SONAR_ORGANIZATION}" \
+              -Dsonar.host.url="${SONAR_HOST_URL}" \
+              -Dsonar.qualitygate.wait=false \
+              -Dsonar.coverage.jacoco.xmlReportPaths=target/site/jacoco/jacoco.xml,../protocol/target/site/jacoco/jacoco.xml,../daemon/target/site/jacoco/jacoco.xml,../cli/target/site/jacoco/jacoco.xml
+
+      # SonarQube Cloud processes the scan asynchronously after upload. Poll
+      # the last completed analysis until the timestamp moves past the start
+      # of this run, or give up after ~3 minutes (most analyses complete in
+      # 30-60s; longer polls aren't worth holding the runner for).
+      - name: Wait for SonarQube Cloud processing
+        env:
+          SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
+        run: |
+          START_TS=$(date +%s)
+          for attempt in $(seq 1 18); do
+            # Constrain both the initial request (--proto =https) AND any redirect
+            # (--proto-redir =https) to HTTPS. Without --proto, githubactions:S6506
+            # still fires even when SONAR_HOST_URL is already https://; the rule
+            # wants both belts explicit at the curl-call site.
+            RESP=$(curl -fsSL --proto =https --proto-redir =https -u "$SONAR_TOKEN:" \
+              "${SONAR_HOST_URL}/api/project_analyses/search?project=${SONAR_PROJECT_KEY}&ps=1" || echo '{}')
+            ANALYSIS_TS=$(echo "$RESP" | jq -r '.analyses[0].date // empty' || true)
+            if [ -n "$ANALYSIS_TS" ]; then
+              ANALYSIS_EPOCH=$(date -d "$ANALYSIS_TS" +%s 2>/dev/null || echo 0)
+              if [ "$ANALYSIS_EPOCH" -ge "$START_TS" ]; then
+                echo "Latest analysis ($ANALYSIS_TS) is from this run."
+                exit 0
+              fi
+              echo "Attempt $attempt: latest analysis is $ANALYSIS_TS (before run start), waiting…"
+            else
+              echo "Attempt $attempt: no analyses yet on SonarQube Cloud, waiting…"
+            fi
+            sleep 10
+          done
+          echo "::warning::Timed out waiting for SonarQube Cloud to publish this run's analysis. Parity diff will use the most-recent published state."
+
+      # --- (C) Parity diff ----------------------------------------------------
+
+      - name: Diff self-scan vs SonarQube Cloud
+        env:
+          SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
+        run: |
+          PR_ARG=""
+          BRANCH_ARG=""
+          if [ "${{ github.event_name }}" = "pull_request" ]; then
+            PR_ARG="--pull-request ${{ github.event.pull_request.number }}"
+          else
+            BRANCH_ARG="--branch ${{ github.ref_name }}"
+          fi
+          python3 scripts/scan_parity.py \
+            --self-scan .sonar-predictor/scan.json \
+            --project-key "${SONAR_PROJECT_KEY}" \
+            --organization "${SONAR_ORGANIZATION}" \
+            --host "${SONAR_HOST_URL}" \
+            $PR_ARG $BRANCH_ARG \
+            --out .sonar-predictor/parity.json
+
+      - name: Upload scan + parity artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: scan-parity-${{ github.run_id }}
+          path: |
+            .sonar-predictor/scan.json
+            .sonar-predictor/parity.json
+          retention-days: 14
+
+      - name: Link to SonarQube Cloud dashboard
+        if: always()
+        run: |
+          {
+            echo ""
+            echo "---"
+            echo ""
+            echo "**SonarQube Cloud dashboard:** ${SONAR_HOST_URL}/project/overview?id=${SONAR_PROJECT_KEY}"
+            if [ "${{ github.event_name }}" = "pull_request" ]; then
+              echo ""
+              echo "**PR-scoped view:** ${SONAR_HOST_URL}/project/issues?id=${SONAR_PROJECT_KEY}&pullRequest=${{ github.event.pull_request.number }}"
+            fi
+          } >> "$GITHUB_STEP_SUMMARY"
diff --git a/.github/workflows/sonar.yml b/.github/workflows/sonar.yml
@@ -0,0 +1,169 @@
+name: Self-scan (sonar-predictor against itself)
+
+permissions:
+  contents: read
+
+# Runs the project's OWN scanner — the in-repo daemon, freshly built from this
+# branch — against the repository on every PR and on pushes to main. The point
+# is CI parity with the local self-scan we run during development: every change
+# passes through the same gate, so the bar we apply to others applies to us.
+#
+# We deliberately do NOT use the previously released bundle from Maven Central.
+# SONAR_PREDICTOR_HOME is repointed at dist/target/skill/sonar-predictor so the
+# scan exercises the branch's analyzer code, not yesterday's release.
+
+on:
+  pull_request:
+    branches: [main]
+  push:
+    branches: [main]
+  workflow_dispatch:
+
+jobs:
+  self-scan:
+    name: Self-scan
+    runs-on: ubuntu-latest
+    env:
+      # The dist-assembly step packages the ~150 MB skill bundle (CLI + daemon
+      # fat jars + 10 analyzer plugins). Maven's default heap is too small for
+      # that on ubuntu-latest — we'd get 'Execution exception: Java heap space'
+      # from maven-assembly-plugin:single. 2 GB is plenty and well under the
+      # runner's ~7 GB available memory.
+      MAVEN_OPTS: -Xmx2g
+    steps:
+      # fetch-depth: 0 keeps the full history available so we can switch to
+      # `--diff`-style semantics later without re-checking out the repo.
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      # JDK 17 is the project's build/runtime target. Temurin is the safe default.
+      - name: Set up JDK 17
+        uses: actions/setup-java@v4
+        with:
+          distribution: temurin
+          java-version: '17'
+
+      # The JS/TS analyzer plugin spawns Node at runtime to lint JS/TS sources,
+      # so Node must be on PATH when the scan runs (not just at build time).
+      - name: Set up Node.js 20
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+
+      # Cache the local Maven repo across runs. Keyed on every pom.xml in the
+      # tree so a dependency change invalidates cleanly; restore-keys lets a
+      # partial cache hit still seed most of ~/.m2.
+      - name: Cache Maven repository
+        uses: actions/cache@v4
+        with:
+          path: ~/.m2/repository
+          key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
+          restore-keys: |
+            ${{ runner.os }}-maven-
+
+      # Build + test the whole reactor except `dist` (the bundle module is
+      # packaged separately below). `verify` runs the integration tests AND
+      # produces the JaCoCo XML reports we feed back into the scan as coverage
+      # evidence. failIfNoSpecifiedTests=false keeps modules with no tests
+      # from breaking the reactor.
+      - name: Build and test (generates JaCoCo XML reports)
+        run: mvn -B -ntp -pl '!dist' verify -Dsurefire.failIfNoSpecifiedTests=false
+
+      # Now build the skill bundle. -am pulls in upstream modules if they
+      # weren't already installed by the previous step. -DskipTests because
+      # the tests already ran — re-running them here just wastes minutes.
+      - name: Build dist (skill bundle this scan will use)
+        run: mvn -B -ntp -pl dist -am package -DskipTests
+
+      # The actual self-scan. We override SONAR_PREDICTOR_HOME so the wrapper
+      # script picks up THIS branch's freshly-built daemon jar and analyzer
+      # plugins, not whatever happens to be installed globally. Three JaCoCo
+      # XMLs are passed in as coverage evidence — one per Java module that
+      # produces coverage. agent-scan writes JSON to .sonar-predictor/scan.json
+      # and prints a human summary on stdout; we want both.
+      #
+      # IMPORTANT: the CLI uses three-state exit codes
+      #   0 = clean (no findings at the floor)
+      #   1 = issues found (a normal scan outcome, not a failure)
+      #   2 = tool error (broken input, daemon unreachable, no Java)
+      # Step success means "the scanner ran". Whether the *result* should
+      # fail the build is decided by the Quality gate step below. We must
+      # not let `bash -e` propagate exit-1 from a healthy scan as a job
+      # failure; we propagate exit code only when it's >= 2.
+      - name: Run self-scan
+        run: |
+          set +e
+          export SONAR_PREDICTOR_HOME="$(pwd)/dist/target/skill/sonar-predictor"
+          ./plugin/skills/sonar-predictor/bin/sonar agent-scan analyze . \
+            --coverage protocol/target/site/jacoco/jacoco.xml \
+            --coverage daemon/target/site/jacoco/jacoco.xml \
+            --coverage cli/target/site/jacoco/jacoco.xml
+          rc=$?
+          set -e
+          echo "Self-scan exit code: $rc (0=clean, 1=issues found, 2+=tool error)"
+          if [ "$rc" -ge 2 ]; then
+            echo "::error::Self-scan tool error (exit $rc) — see step log."
+            exit "$rc"
+          fi
+
+      # Render headline counts into the GitHub job summary so reviewers see
+      # the scan result inline on the run page without downloading artifacts.
+      # `// ([.files[]?.issues[]?]|length)` is the fallback path when an older
+      # JSON shape doesn't carry a top-level issueCount.
+      - name: Render scan summary
+        if: always()
+        run: |
+          J=.sonar-predictor/scan.json
+          if [ ! -f "$J" ]; then
+            echo "## Sonar self-scan" >> "$GITHUB_STEP_SUMMARY"
+            echo "" >> "$GITHUB_STEP_SUMMARY"
+            echo "Scan JSON not produced — see the **Run self-scan** step log." >> "$GITHUB_STEP_SUMMARY"
+            exit 0
+          fi
+          {
+            echo "## Sonar self-scan"
+            echo
+            echo "| Metric | Value |"
+            echo "| --- | --- |"
+            echo "| Total issues | $(jq -r '.issueCount // ([.files[]?.issues[]?]|length)' "$J") |"
+            echo "| Coverage (line, overall) | $(jq -r '.coverage.overallPercent // "n/a"' "$J")% |"
+            echo "| Severity | $(jq -rc '[.files[]?.issues[]?.severity]|group_by(.)|map("\(.[0])=\(length)")|join(" ")' "$J") |"
+            echo "| Type | $(jq -rc '[.files[]?.issues[]?.type]|group_by(.)|map("\(.[0])=\(length)")|join(" ")' "$J") |"
+            echo "| Warnings | $(jq -r '.warnings // [] | length' "$J") |"
+          } >> "$GITHUB_STEP_SUMMARY"
+
+      # Always upload the scan JSON, even on failure, so a broken scan is
+      # still debuggable from the run page. 14-day retention is enough to
+      # cover a typical PR review cycle without pinning storage forever.
+      - name: Upload scan JSON
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: sonar-scan-${{ github.run_id }}
+          path: .sonar-predictor/scan.json
+          retention-days: 14
+
+      # Informational gate. We compute the count of CRITICAL bugs and security
+      # hotspots and emit a `::warning::` so they show on the PR's Checks tab,
+      # but we deliberately do NOT `exit 1` yet — first runs are baseline data
+      # while we work the existing findings down to zero.
+      #
+      # TODO: once the backlog is clear, flip this to `exit 1` on any
+      # CRITICAL bug or security hotspot and rename this step to "Quality gate".
+      - name: Quality gate (informational only)
+        if: always()
+        run: |
+          J=.sonar-predictor/scan.json
+          if [ ! -f "$J" ]; then
+            echo "No scan JSON found — skipping gate."
+            exit 0
+          fi
+          CRIT=$(jq -r '[.files[]?.issues[]? | select(.severity=="CRITICAL" and .type=="BUG")] | length' "$J")
+          HOT=$(jq -r '[.files[]?.issues[]? | select(.type=="SECURITY_HOTSPOT")] | length' "$J")
+          echo "Critical bugs: $CRIT"
+          echo "Security hotspots: $HOT"
+          if [ "$CRIT" -gt 0 ] || [ "$HOT" -gt 0 ]; then
+            echo "::warning::Self-scan found $CRIT critical bug(s) and $HOT security hotspot(s). Gate is informational for now; will enforce later."
+          fi
diff --git a/.gitignore b/.gitignore
@@ -18,3 +18,8 @@ daemon/plugins/*.jar
 
 # sonar-predictor scan output, written by `bin/sonar agent-scan`. Never commit.
 .sonar-predictor/
+
+# Python bytecode (scan_parity.py compiles to scripts/__pycache__ when run locally).
+__pycache__/
+*.pyc
+