From ad3b54fdf2033b278aaa7be83879a450ac436729 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 13 May 2026 21:41:03 -0700 Subject: [PATCH 1/4] feat: opt-in anonymous CLI telemetry (closes #80) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit raid now ships an opt-in, anonymous CLI telemetry pipeline so the project can prioritize features against real usage signal without ever capturing what users actually run. Off by default. First interactive run prompts ([y/N/?] — capital N default). Non-interactive contexts (no TTY on stdin, --yes/--headless, --json, DO_NOT_TRACK=1, or a `raid telemetry ...` invocation) skip the prompt entirely and persist consent=off so we never re-prompt. Build-time PostHog API key injection via ldflags; dev builds have no key, telemetry is dead code in them. New `raid telemetry on / off / status / purge / preview` subcommand: status shows current state + anonymous ID; preview renders the exact JSON payload raid would post (with the API key redacted) so the user can audit before opting in; purge deletes the anonymous machine ID so future events can't be linked to past ones. Event hooks fire from ExecuteCommand, ExecuteRepoCommand, and ExecuteTask. The per-task event is sampled at 10% to bound volume for commands with many tasks. Every event property is sanitized: command names and task types only, never cmd: bodies, paths, env values, or stdout/stderr. Tests pin the contracts: zero network calls when opted out, zero network calls under DO_NOT_TRACK, every event builder is free of forbidden content, UUIDv4 format is correct, the preview output redacts the API key. Co-Authored-By: Claude Opus 4.7 (1M context) --- .goreleaser.preview.yaml | 4 + .goreleaser.yaml | 6 + README.md | 6 + llms.txt | 1 + site/docs/telemetry.mdx | 106 +++++ site/docs/usage/raid.mdx | 3 +- site/docs/whats-new.mdx | 4 + src/cmd/raid.go | 58 +++ src/cmd/telemetry/telemetry.go | 185 ++++++++ src/cmd/telemetry/telemetry_test.go | 212 ++++++++++ src/internal/lib/command.go | 48 +++ src/internal/lib/task_runner.go | 25 +- src/internal/telemetry/consent.go | 87 ++++ src/internal/telemetry/events.go | 78 ++++ src/internal/telemetry/id.go | 162 ++++++++ src/internal/telemetry/prompt.go | 144 +++++++ src/internal/telemetry/sampling.go | 31 ++ src/internal/telemetry/telemetry.go | 226 ++++++++++ src/internal/telemetry/telemetry_test.go | 509 +++++++++++++++++++++++ src/internal/telemetry/version.go | 16 + src/resources/app.properties | 2 +- 21 files changed, 1910 insertions(+), 3 deletions(-) create mode 100644 site/docs/telemetry.mdx create mode 100644 src/cmd/telemetry/telemetry.go create mode 100644 src/cmd/telemetry/telemetry_test.go create mode 100644 src/internal/telemetry/consent.go create mode 100644 src/internal/telemetry/events.go create mode 100644 src/internal/telemetry/id.go create mode 100644 src/internal/telemetry/prompt.go create mode 100644 src/internal/telemetry/sampling.go create mode 100644 src/internal/telemetry/telemetry.go create mode 100644 src/internal/telemetry/telemetry_test.go create mode 100644 src/internal/telemetry/version.go diff --git a/.goreleaser.preview.yaml b/.goreleaser.preview.yaml index 2610555..76295d6 100644 --- a/.goreleaser.preview.yaml +++ b/.goreleaser.preview.yaml @@ -20,6 +20,10 @@ builds: goarch: arm64 ldflags: - -s -w + # Preview builds also receive the PostHog API key when + # POSTHOG_API_KEY is set in CI; absent → empty → telemetry + # no-ops. Mirrors the stable goreleaser config. + - -X github.com/8bitalex/raid/src/internal/telemetry.APIKey={{ envOrDefault "POSTHOG_API_KEY" "" }} archives: - name_template: "{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}" diff --git a/.goreleaser.yaml b/.goreleaser.yaml index 25b5156..3818596 100644 --- a/.goreleaser.yaml +++ b/.goreleaser.yaml @@ -20,6 +20,12 @@ builds: goarch: arm64 ldflags: - -s -w + # Inject the PostHog publishable project key at build time. Dev + # builds and `go run` leave this empty, which makes the + # telemetry package no-op (no events sent). Set + # POSTHOG_API_KEY in the release CI environment to populate it + # for tagged builds. See src/internal/telemetry/telemetry.go. + - -X github.com/8bitalex/raid/src/internal/telemetry.APIKey={{ envOrDefault "POSTHOG_API_KEY" "" }} archives: - name_template: "{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}" diff --git a/README.md b/README.md index 1b298f6..12577ec 100644 --- a/README.md +++ b/README.md @@ -195,6 +195,12 @@ RAID_HEADLESS=1 raid deploy - `Prompt` tasks **use their `default:`** value instead of reading stdin. - A `Prompt` **without a `default:`** fails fast with `HEADLESS_PROMPT_NO_DEFAULT` (exit code 3, category `task`) so the variable is never silently set to empty. Add a default for every Prompt you expect CI / agent invocations to run. +### Telemetry (opt-in, anonymous) + +raid ships an **opt-in** anonymous CLI telemetry pipeline. It is **off by default** and the first interactive run prompts (capital-N default); non-interactive contexts (no TTY, `--yes`/`--headless`, `--json`, `DO_NOT_TRACK=1`) skip the prompt entirely and stay off. Events carry sanitized properties only — command names, task types, structured error codes, duration — never `cmd:` bodies, paths, env values, or task output. + +Manage state with `raid telemetry on / off / status / purge / preview`. `raid telemetry preview` shows the exact payload raid would post (with the API key redacted) so you can audit before opting in. Full disclosure at [raidcli.dev/docs/telemetry](https://raidcli.dev/docs/telemetry). + ### `raid ` Run a custom command defined in the active profile or any of its repositories. diff --git a/llms.txt b/llms.txt index d486a6b..2504abe 100644 --- a/llms.txt +++ b/llms.txt @@ -33,6 +33,7 @@ Raid is written in Go, distributed as a single self-contained binary, and publis - [Custom commands](https://raidcli.dev/docs/usage/custom): Define and invoke `raid ` team workflows - [raid doctor](https://raidcli.dev/docs/usage/doctor): Diagnose profile and repo configuration issues - [raid root command](https://raidcli.dev/docs/usage/raid): Global flags and top-level invocation, including [headless mode](https://raidcli.dev/docs/usage/raid#headless-mode) (`-y` / `--yes` / `--headless` / `RAID_HEADLESS=1`) that auto-resolves Confirm and Prompt tasks for CI / agent runs +- [raid telemetry](https://raidcli.dev/docs/telemetry): Opt-in anonymous CLI telemetry. Off by default, first-run consent prompt, `on` / `off` / `status` / `purge` / `preview` subcommands, `DO_NOT_TRACK=1` honored, no PII or task content ever collected - [raid context](https://raidcli.dev/docs/usage/context): Snapshot the active workspace, or run `raid context serve` as an MCP server (stdio) exposing profile, env, repos, commands, recent, and live vars as resources, plus the canonical raid agent toolkit as tools ## Reference diff --git a/site/docs/telemetry.mdx b/site/docs/telemetry.mdx new file mode 100644 index 0000000..7ba2d17 --- /dev/null +++ b/site/docs/telemetry.mdx @@ -0,0 +1,106 @@ +--- +title: Telemetry +sidebar_position: 60 +slug: /telemetry +description: What raid's opt-in anonymous CLI telemetry collects, what it never collects, and how to inspect, enable, disable, or purge it. +--- + +# Telemetry + +raid ships an **opt-in, anonymous** CLI telemetry pipeline so the team can prioritize features against real usage signal — which task types matter, which commands fail, which features go unused — without ever capturing what you actually run. + +**Off by default.** A fresh install never sends anything until you explicitly run `raid telemetry on` (or accept the first-run prompt). When opted out, raid makes zero network requests during normal use, and there's an integration test that pins this contract. + +## What raid collects + +| Event | Properties (sanitized) | +|---|---| +| `raid_first_run` | `os`, `arch`, `raid_version`, `install_method` (best-effort) | +| `raid_command_executed` | `command_name`, `task_count`, `task_types[]`, `duration_ms`, `success` | +| `raid_command_failed` | `command_name`, `error_code` (from the [errors table](references/errors)), `duration_ms` | +| `raid_task_executed` | `task_type`, `duration_ms`, `success` — **sampled** at ~10% to bound volume | +| `raid_telemetry_opt_out` | `reason` (optional free-text from `raid telemetry off --why "..."`) | + +**Every event also carries:** `distinct_id` (anonymous UUIDv4 from `~/.config/raid/telemetry-id`), `raid_version`, `os` (e.g. `darwin`/`linux`/`windows`), `arch` (e.g. `arm64`/`amd64`). + +## What raid never collects + +- **Command bodies.** `cmd:`, `path:`, `runner:`, `src:`, `dest:`, `url:` — never sent. +- **Variable values.** Anything set by a `Set` task or passed in env vars. +- **Argument values.** `RAID_ARG_*`, declared args, declared flag values. +- **Stdout / stderr.** Task output is never captured by the telemetry layer. +- **Identifiers.** Username, hostname, IP, MAC, OS version beyond `darwin`/`linux`/`windows`, terminal emulator — none. +- **File paths beyond their kind.** `task_types: ["shell", "shell", "print"]` is fine; the actual paths are not. + +The source of truth is the [`src/internal/telemetry`](https://github.com/8bitalex/raid/tree/main/src/internal/telemetry) package. Every event builder is a tiny pure function — read them yourself. + +## First-run consent + +On your first interactive `raid ` invocation after install, raid prints the consent prompt to stderr: + +``` +raid would like to send anonymous usage telemetry to help prioritize features. +We never collect: file paths, command contents, env values, or anything that could identify you. +See: https://raidcli.dev/docs/telemetry + + [y] yes, send telemetry [N] no, leave it off [?] what's collected +> +``` + +Default is **no** (capital `N`). `[?]` shows the long-form disclosure inline and re-asks. + +**Non-interactive contexts skip the prompt entirely and leave telemetry off.** Specifically, raid does not prompt when: + +- `DO_NOT_TRACK=1` is set in the environment (cross-tool standard — [consoledonottrack.com](https://consoledonottrack.com)). +- `-y`, `--yes`, or `--headless` is on the command line (see [Headless mode](usage/raid#headless-mode)). +- `--json` is on the command line (machine-readable output mode). +- stdin isn't a TTY (CI runners, pipes, agent hosts). +- This raid build wasn't compiled with a PostHog API key (dev builds — telemetry is dead code). +- The invocation is a `raid telemetry ...` subcommand (so you can run `raid telemetry on` without being prompted to opt in first). + +In each of these cases raid records the decision as off-by-default so it won't try to prompt later either. Switch back on explicitly with `raid telemetry on`. + +## Managing telemetry + +```bash +raid telemetry on # opt in +raid telemetry off # opt out +raid telemetry off --why "ci runner" # opt out + record an anonymous reason +raid telemetry status # show current state, anonymous ID, ID file path +raid telemetry status --json # same, as JSON +raid telemetry purge # delete the anonymous ID file (breaks linkage to past events) +raid telemetry preview # render a sample event payload — does not send +``` + +`raid telemetry preview` is the recommended way to see exactly what raid would post before opting in. The API key is automatically redacted in the preview output so you can paste it anywhere. + +## Three ways to opt out + +1. **`raid telemetry off`** — persists the choice; future runs stay off. +2. **`DO_NOT_TRACK=1`** — env var; overrides the persisted state for the current process and any child it spawns. Honored on every run regardless of `telemetry on`. +3. **Never opt in.** The first-run prompt defaults to off, and non-interactive contexts skip the prompt entirely. + +You can purge the anonymous machine ID at any time with `raid telemetry purge` — that breaks linkage between future events and any sent before. Useful if you've been opted in for a while and want to reset. + +## Destination + +- **PostHog US Cloud**, project `Raid` (id `403603`). +- Endpoint: `https://us.i.posthog.com/i/v0/e/`. +- The PostHog publishable project key is baked into release builds at compile time via a ldflag. Dev / `go run` builds have no key and never send. + +Network failures are **always silent**. A 2-second HTTP timeout, fire-and-forget goroutines, and a 1.5-second flush deadline at process exit mean a stuck network can't slow raid down or break a command. + +## Source code + +The entire telemetry implementation is in [`src/internal/telemetry`](https://github.com/8bitalex/raid/tree/main/src/internal/telemetry): + +| File | Role | +|---|---| +| [`telemetry.go`](https://github.com/8bitalex/raid/blob/main/src/internal/telemetry/telemetry.go) | `Capture`, `Flush`, the HTTP send path, `PreviewPayload` | +| [`consent.go`](https://github.com/8bitalex/raid/blob/main/src/internal/telemetry/consent.go) | Consent state (read/write via viper) + `DO_NOT_TRACK` honor | +| [`id.go`](https://github.com/8bitalex/raid/blob/main/src/internal/telemetry/id.go) | Anonymous UUIDv4 generation + persistence + purge | +| [`events.go`](https://github.com/8bitalex/raid/blob/main/src/internal/telemetry/events.go) | Event builders. Every builder is a few lines — easy to audit. | +| [`sampling.go`](https://github.com/8bitalex/raid/blob/main/src/internal/telemetry/sampling.go) | Per-task sample rate | +| [`prompt.go`](https://github.com/8bitalex/raid/blob/main/src/internal/telemetry/prompt.go) | First-run consent prompt flow | + +The test file [`telemetry_test.go`](https://github.com/8bitalex/raid/blob/main/src/internal/telemetry/telemetry_test.go) pins the contracts above: zero network calls when opted out, zero network calls under `DO_NOT_TRACK`, sanitization of every event builder, UUIDv4 format of the anonymous ID, and the redacted-key preview output. diff --git a/site/docs/usage/raid.mdx b/site/docs/usage/raid.mdx index b1d25b5..444c21d 100644 --- a/site/docs/usage/raid.mdx +++ b/site/docs/usage/raid.mdx @@ -68,6 +68,7 @@ The third row is deliberate: silently setting the variable to an empty string wo | [`profile`](./profile) | Create, add, list, switch, or remove profiles | | [`doctor`](./doctor) | Check the active configuration for issues | | [`context`](./context) | Print workspace snapshot; `context serve` runs the MCP server | +| [`telemetry`](../telemetry) | Manage anonymous CLI telemetry (off by default; `on` / `off` / `status` / `purge` / `preview`) | | [`completion`](#shell-completion) | Generate shell autocompletion scripts | Custom commands defined in the active profile or its repositories are also available as `raid `. See [Custom Commands](./custom) for details. @@ -106,6 +107,6 @@ Run `raid completion --help` for detailed instructions for each shell. The following names are reserved for built-in commands and cannot be used as custom command names: -`profile`, `install`, `env`, `doctor`, `context`, `help`, `version`, `completion` +`profile`, `install`, `env`, `doctor`, `context`, `telemetry`, `help`, `version`, `completion` If a custom command in your profile uses a reserved name, it is ignored and a warning is printed. diff --git a/site/docs/whats-new.mdx b/site/docs/whats-new.mdx index 1fc792b..4e73bd2 100644 --- a/site/docs/whats-new.mdx +++ b/site/docs/whats-new.mdx @@ -9,6 +9,10 @@ description: Feature-by-feature release notes for Raid. User-visible changes per release, latest first. For full commit history see the [GitHub releases page](https://github.com/8bitalex/raid/releases). +## 0.16.0 — upcoming + +**Opt-in anonymous CLI telemetry.** raid can now send anonymous usage events to PostHog so the project can prioritize features against real signal (which task types matter, which commands fail, which features go unused). **Off by default** — a fresh install never sends anything until you opt in. The first interactive run prompts (capital-N default); non-interactive contexts (TTY-less stdin, `--yes`/`--headless`, `--json`, `DO_NOT_TRACK=1`, or a `raid telemetry ...` invocation) skip the prompt and leave telemetry off. New `raid telemetry on / off / status / purge / preview` subcommand manages the state; `preview` renders the exact payload raid would post (with the API key redacted) so you can audit before opting in. Captured events carry sanitized properties only — command names and task types, never `cmd:` bodies, paths, env values, or stdout/stderr. See the full disclosure at [/docs/telemetry](/docs/telemetry). Closes [#80](https://github.com/8bitAlex/raid/issues/80). + ## 0.15.0 — upcoming **Headless mode for CI, scheduled runs, and agents.** A new top-level `-y` / `--yes` / `--headless` flag (and `RAID_HEADLESS=1` env-var equivalent) auto-resolves interactive prompts so non-interactive callers no longer deadlock on a `Confirm` or `Prompt`. `Confirm` auto-accepts; `Prompt` skips stdin and uses its `default:` value; a `Prompt` without a default fails fast with a structured `HEADLESS_PROMPT_NO_DEFAULT` error (exit code `3`) instead of silently setting the variable to empty. The flag and env var are interchangeable — the flag works by setting the env var, so a single read site in lib serves both the CLI and programmatic entry points. Headless auto-accepts every `Confirm`, so stronger destructive-action guardrails must be expressed via a [`verify:`](/docs/references/schema#verify) entry, a `condition:`, or an explicit env-var check. See [raid → Headless mode](/docs/usage/raid#headless-mode). Closes [#67](https://github.com/8bitAlex/raid/issues/67). diff --git a/src/cmd/raid.go b/src/cmd/raid.go index 76211fd..9db78d5 100644 --- a/src/cmd/raid.go +++ b/src/cmd/raid.go @@ -14,8 +14,10 @@ import ( "github.com/8bitalex/raid/src/cmd/env" "github.com/8bitalex/raid/src/cmd/install" "github.com/8bitalex/raid/src/cmd/profile" + telemetrycmd "github.com/8bitalex/raid/src/cmd/telemetry" "github.com/8bitalex/raid/src/internal/lib" "github.com/8bitalex/raid/src/internal/sys" + "github.com/8bitalex/raid/src/internal/telemetry" "github.com/8bitalex/raid/src/raid" "github.com/8bitalex/raid/src/raid/errs" "github.com/spf13/cobra" @@ -28,6 +30,7 @@ var reservedNames = map[string]bool{ "env": true, "doctor": true, "context": true, + "telemetry": true, "help": true, "version": true, "completion": true, @@ -59,6 +62,7 @@ func init() { rootCmd.AddCommand(env.Command) rootCmd.AddCommand(doctor.Command) rootCmd.AddCommand(contextcmd.Command) + rootCmd.AddCommand(telemetrycmd.Command) } // isInfoCommand reports whether the invocation is for a built-in informational @@ -162,6 +166,21 @@ func executeRoot(args []string) int { // caller is providing a different args list (e.g. during tests). rootCmd.SetArgs(args[1:]) + // First-run consent prompt for telemetry. Runs only for non-info, + // non-telemetry-subcommand invocations to avoid prompting on + // `raid --help`, `raid telemetry on`, and similar. The prompt + // itself no-ops when stdin isn't a TTY or when --yes/--headless + // is set, so this is safe in CI / pipes / agent hosts. See + // telemetry.MaybePromptForConsent for the full skip matrix. + if !info && !isTelemetrySubcommand(args) { + _ = telemetry.MaybePromptForConsent(headlessFromArgs(args) || jsonModeFromArgs(args)) + } + + // Flush any pending telemetry events before exit so async sends + // don't get dropped when raid returns. The deadline is short so a + // stuck network can't drag out shutdown. + defer telemetry.Flush(1500 * time.Millisecond) + if err := rootCmd.Execute(); err != nil { rErr, isStructured := errs.AsError(err) var exitErr *exec.ExitError @@ -217,6 +236,45 @@ func applyHeadlessFlag(cmd *cobra.Command, _ []string) error { return nil } +// isTelemetrySubcommand reports whether the user invoked one of the +// `raid telemetry ...` subcommands. The first-run consent prompt must +// skip these — prompting "do you want telemetry?" right before +// running `raid telemetry on` is hostile UX, and the off/status/ +// purge/preview commands need to work for users who haven't opted in. +func isTelemetrySubcommand(args []string) bool { + for _, a := range args[1:] { + if a == "--" { + return false + } + if strings.HasPrefix(a, "-") { + continue + } + return a == "telemetry" + } + return false +} + +// headlessFromArgs is the early-scan counterpart to jsonModeFromArgs +// for the headless persistent flag. The first-run prompt needs to +// know the user's headless intent before cobra has parsed flags so +// it can skip prompting in non-interactive contexts. Matches every +// flag form: `-y`, `--yes`, `--yes=true`, `--headless`, +// `--headless=true`, plus their explicit `=false` opt-outs. +func headlessFromArgs(args []string) bool { + for _, a := range args[1:] { + if a == "--" { + break + } + switch { + case a == "-y", a == "--yes", a == "--yes=true", a == "--headless", a == "--headless=true": + return true + case a == "--yes=false", a == "--headless=false": + return false + } + } + return false +} + // jsonModeFromArgs reports whether the user passed `--json` (or // `--json=true`) anywhere in args. Cobra resolves persistent flags // during Execute, but on the error path we need to know before falling diff --git a/src/cmd/telemetry/telemetry.go b/src/cmd/telemetry/telemetry.go new file mode 100644 index 0000000..1d0e956 --- /dev/null +++ b/src/cmd/telemetry/telemetry.go @@ -0,0 +1,185 @@ +// Package telemetry is the cobra surface for `raid telemetry`. It +// exposes on / off / status / purge / preview sub-subcommands. The +// real behavior lives in src/internal/telemetry — this file is just +// the CLI bindings. +package telemetry + +import ( + "encoding/json" + "fmt" + + libtelemetry "github.com/8bitalex/raid/src/internal/telemetry" + "github.com/8bitalex/raid/src/raid/errs" + "github.com/spf13/cobra" +) + +func init() { + Command.AddCommand(onCmd) + Command.AddCommand(offCmd) + Command.AddCommand(statusCmd) + Command.AddCommand(purgeCmd) + Command.AddCommand(previewCmd) + offCmd.Flags().String("why", "", "Optional free-text reason recorded with the opt-out event") +} + +// jsonMode mirrors the helper used by sibling subcommands (env, +// doctor): reads --json off the root's persistent flag so JSON output +// stays consistent across the binary. +func jsonMode(cmd *cobra.Command) bool { + v, _ := cmd.Root().PersistentFlags().GetBool("json") + return v +} + +// Command is the parent `raid telemetry` group. Args are validated by +// the sub-subcommands themselves so we can give a precise error +// instead of cobra's generic one. +var Command = &cobra.Command{ + Use: "telemetry", + Short: "Manage anonymous CLI telemetry (off by default)", + Long: "raid ships opt-in, anonymous CLI telemetry. Off by default; " + + "flip it on with `raid telemetry on`, see what's stored with " + + "`raid telemetry status`, preview a sample event with " + + "`raid telemetry preview`, and break continuity with " + + "`raid telemetry purge`. See https://raidcli.dev/docs/telemetry.", + Args: cobra.NoArgs, +} + +var onCmd = &cobra.Command{ + Use: "on", + Short: "Opt in to anonymous telemetry", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + if err := libtelemetry.SetEnabled(true); err != nil { + return errs.Wrap(err) + } + // Fire EventFirstRun synchronously so the opt-in is recorded + // even if raid exits right after this command. + libtelemetry.CaptureSync(libtelemetry.EventFirstRun, + libtelemetry.FirstRunProps("")) + cmd.Println("Telemetry: on. Run `raid telemetry status` to see what's stored.") + return nil + }, +} + +var offCmd = &cobra.Command{ + Use: "off", + Short: "Opt out of anonymous telemetry", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + why, _ := cmd.Flags().GetString("why") + // Fire the opt-out BEFORE flipping consent off — otherwise + // IsActive() would return false and the event would be + // dropped. CaptureSync also blocks until delivery so the + // event lands even though raid exits immediately after. + libtelemetry.CaptureSync(libtelemetry.EventTelemetryOptOut, + libtelemetry.OptOutProps(why)) + if err := libtelemetry.SetEnabled(false); err != nil { + return errs.Wrap(err) + } + cmd.Println("Telemetry: off.") + return nil + }, +} + +// statusEntry is the JSON shape printed by `status --json`. Stable +// public contract — agents that script `raid telemetry status --json` +// can rely on field names. +type statusEntry struct { + Enabled bool `json:"enabled"` + Decided bool `json:"decided"` + DoNotTrack bool `json:"doNotTrack"` + APIKeySet bool `json:"apiKeySet"` + AnonymousID string `json:"anonymousId,omitempty"` + IDPath string `json:"idPath"` +} + +var statusCmd = &cobra.Command{ + Use: "status", + Short: "Show telemetry consent + anonymous machine ID", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + st := libtelemetry.LoadState() + entry := statusEntry{ + Enabled: st.Enabled, + Decided: st.Decided, + DoNotTrack: libtelemetry.DoNotTrackActive(), + APIKeySet: libtelemetry.HasAPIKey(), + AnonymousID: libtelemetry.LoadIDIfExists(), + IDPath: libtelemetry.IDPath(), + } + if jsonMode(cmd) { + enc := json.NewEncoder(cmd.OutOrStdout()) + enc.SetIndent("", " ") + if err := enc.Encode(entry); err != nil { + return errs.Unknown(err) + } + return nil + } + printStatus(cmd, entry) + return nil + }, +} + +func printStatus(cmd *cobra.Command, e statusEntry) { + state := "off" + if e.Enabled { + state = "on" + } + if !e.Decided { + state = "off (no decision yet — will be prompted on next interactive run)" + } + if e.DoNotTrack { + state = "off (DO_NOT_TRACK is set; overrides stored state)" + } + cmd.Println("State:", state) + if e.APIKeySet { + cmd.Println("API key: present (build-time configured)") + } else { + cmd.Println("API key: not configured for this build — events would never send") + } + if e.AnonymousID != "" { + cmd.Println("Anonymous ID:", e.AnonymousID) + } else { + cmd.Println("Anonymous ID: none (will be generated on first opt-in)") + } + cmd.Println("ID file:", e.IDPath) +} + +var purgeCmd = &cobra.Command{ + Use: "purge", + Short: "Delete the anonymous machine ID file", + Long: "Removes ~/.config/raid/telemetry-id so PostHog cannot link " + + "future events to past ones. Leaves the on/off state intact — " + + "run `raid telemetry off` to also disable sending.", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + if err := libtelemetry.PurgeID(); err != nil { + return errs.Unknown(err) + } + cmd.Println("Anonymous ID purged.") + return nil + }, +} + +var previewCmd = &cobra.Command{ + Use: "preview", + Short: "Print a sample telemetry payload without sending it", + Long: "Shows the full JSON body raid would post to the telemetry " + + "endpoint for a typical command_executed event. Useful to " + + "verify what telemetry would emit before opting in.", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + props := libtelemetry.CommandExecutedProps( + "build", + 3, + []string{"shell", "shell", "print"}, + 1234, + ) + payload := libtelemetry.PreviewPayload(libtelemetry.EventCommandExecuted, props) + if payload == "" { + return errs.Internal("telemetry: failed to render preview payload") + } + fmt.Fprintln(cmd.OutOrStdout(), payload) + return nil + }, +} diff --git a/src/cmd/telemetry/telemetry_test.go b/src/cmd/telemetry/telemetry_test.go new file mode 100644 index 0000000..9ccd3b3 --- /dev/null +++ b/src/cmd/telemetry/telemetry_test.go @@ -0,0 +1,212 @@ +package telemetry + +import ( + "bytes" + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" + + libtelemetry "github.com/8bitalex/raid/src/internal/telemetry" + "github.com/spf13/cobra" + "github.com/spf13/viper" +) + +// setupCmdTestEnv mirrors src/internal/telemetry's setupTestEnv but +// also wires a root cobra command with the persistent --json flag so +// the subcommands' jsonMode helper has something to read. +func setupCmdTestEnv(t *testing.T) (*cobra.Command, *bytes.Buffer) { + t.Helper() + dir := t.TempDir() + idPath := filepath.Join(dir, "telemetry-id") + + viper.Reset() + viper.SetConfigFile(filepath.Join(dir, "config.toml")) + if f, err := os.Create(filepath.Join(dir, "config.toml")); err == nil { + f.Close() + } + + prevID := os.Getenv(libtelemetry.IDFileEnv) + prevDNT := os.Getenv(libtelemetry.DoNotTrackEnvVar) + os.Setenv(libtelemetry.IDFileEnv, idPath) + os.Unsetenv(libtelemetry.DoNotTrackEnvVar) + t.Cleanup(func() { + os.Setenv(libtelemetry.IDFileEnv, prevID) + if prevDNT == "" { + os.Unsetenv(libtelemetry.DoNotTrackEnvVar) + } else { + os.Setenv(libtelemetry.DoNotTrackEnvVar, prevDNT) + } + viper.Reset() + }) + + root := &cobra.Command{Use: "raid"} + root.PersistentFlags().Bool("json", false, "") + // Detach the subcommand from the package-level Command so its + // parent is this fresh root, not the real rootCmd. Cobra resolves + // each subcommand's parent at AddCommand time; since onCmd et al. + // are already attached to Command in init(), we re-add Command to + // our test root. + root.AddCommand(Command) + // Cobra caches the merged flag set per command; calling Flags() + // after a parent change forces it to rebuild so `--json` (on the + // fresh root) is visible to the subcommand's flag parser instead + // of the stale set inherited from a previous test's root. + Command.ResetFlags() + for _, sub := range Command.Commands() { + sub.ResetFlags() + } + // Re-register the offCmd's --why flag since ResetFlags wiped it. + for _, sub := range Command.Commands() { + if sub.Use == "off" { + sub.Flags().String("why", "", "Optional free-text reason recorded with the opt-out event") + } + } + + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + return root, &out +} + +func runCmd(t *testing.T, root *cobra.Command, args ...string) error { + t.Helper() + root.SetArgs(args) + return root.Execute() +} + +// --- on --- + +func TestOnCmd_persistsEnabled(t *testing.T) { + root, out := setupCmdTestEnv(t) + if err := runCmd(t, root, "telemetry", "on"); err != nil { + t.Fatalf("on: %v", err) + } + st := libtelemetry.LoadState() + if !st.Decided || !st.Enabled { + t.Errorf("state after `on` = %+v, want both true", st) + } + if !strings.Contains(out.String(), "Telemetry: on") { + t.Errorf("output should confirm: %s", out.String()) + } +} + +// --- off --- + +func TestOffCmd_persistsDisabled(t *testing.T) { + root, _ := setupCmdTestEnv(t) + // First flip on so we have a real "previously enabled" state. + if err := libtelemetry.SetEnabled(true); err != nil { + t.Fatal(err) + } + if err := runCmd(t, root, "telemetry", "off"); err != nil { + t.Fatalf("off: %v", err) + } + st := libtelemetry.LoadState() + if !st.Decided || st.Enabled { + t.Errorf("state after `off` = %+v, want decided=true enabled=false", st) + } +} + +func TestOffCmd_acceptsWhyFlag(t *testing.T) { + root, _ := setupCmdTestEnv(t) + if err := runCmd(t, root, "telemetry", "off", "--why", "ci runner"); err != nil { + t.Fatalf("off --why: %v", err) + } + if libtelemetry.LoadState().Enabled { + t.Error("--why must not change the off semantics") + } +} + +// --- status --- + +func TestStatusCmd_textOutputShowsStateAndIDPath(t *testing.T) { + root, out := setupCmdTestEnv(t) + if err := runCmd(t, root, "telemetry", "status"); err != nil { + t.Fatalf("status: %v", err) + } + got := out.String() + for _, want := range []string{"State:", "ID file:"} { + if !strings.Contains(got, want) { + t.Errorf("output missing %q: %s", want, got) + } + } +} + +func TestStatusCmd_jsonOutputIsParseable(t *testing.T) { + root, out := setupCmdTestEnv(t) + if err := runCmd(t, root, "--json", "telemetry", "status"); err != nil { + t.Fatalf("status --json: %v", err) + } + var parsed statusEntry + if err := json.Unmarshal(out.Bytes(), &parsed); err != nil { + t.Fatalf("JSON parse: %v\n%s", err, out.String()) + } + if parsed.Enabled { + t.Error("Enabled should default to false") + } + if parsed.Decided { + t.Error("Decided should default to false") + } + if parsed.IDPath == "" { + t.Error("IDPath should always be populated for the user to inspect") + } +} + +// --- purge --- + +func TestPurgeCmd_removesIDFile(t *testing.T) { + root, _ := setupCmdTestEnv(t) + // Force an ID to exist by setting consent on + capturing — but + // capture without an API key is a no-op, so use LoadIDIfExists's + // sister directly via SetEnabled then loadOrCreateID. The cmd + // surface doesn't expose loadOrCreateID, so we write the file + // ourselves to simulate prior opt-in. + path := libtelemetry.IDPath() + if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(path, []byte("test-uuid\n"), 0600); err != nil { + t.Fatal(err) + } + + if err := runCmd(t, root, "telemetry", "purge"); err != nil { + t.Fatalf("purge: %v", err) + } + if _, err := os.Stat(path); !os.IsNotExist(err) { + t.Errorf("ID file should be gone after purge: %v", err) + } +} + +// --- preview --- + +func TestPreviewCmd_rendersPayload(t *testing.T) { + root, out := setupCmdTestEnv(t) + // Inject a fake API key so PreviewPayload exercises the redaction + // branch — without it, the preview would show "". + prev := libtelemetry.APIKey + libtelemetry.APIKey = "phc_test_key_xyz" + t.Cleanup(func() { libtelemetry.APIKey = prev }) + + if err := runCmd(t, root, "telemetry", "preview"); err != nil { + t.Fatalf("preview: %v", err) + } + got := out.String() + for _, want := range []string{"event", "raid_command_executed", "command_name", "build", "redacted", "phc_"} { + if want == "redacted" { + // The redaction marker is the unicode ellipsis between + // prefix and suffix; check the suffix slice instead. + if !strings.Contains(got, "_xyz") { + t.Errorf("preview missing redacted-key suffix: %s", got) + } + continue + } + if !strings.Contains(got, want) { + t.Errorf("preview missing %q: %s", want, got) + } + } + if strings.Contains(got, "phc_test_key_xyz") { + t.Errorf("preview leaked full API key: %s", got) + } +} diff --git a/src/internal/lib/command.go b/src/internal/lib/command.go index 59e8998..2a99ccb 100644 --- a/src/internal/lib/command.go +++ b/src/internal/lib/command.go @@ -10,6 +10,7 @@ import ( liberrs "github.com/8bitalex/raid/src/internal/lib/errs" "github.com/8bitalex/raid/src/internal/sys" + "github.com/8bitalex/raid/src/internal/telemetry" ) // Command is a named, user-defined CLI command that can be invoked via 'raid '. @@ -106,6 +107,7 @@ func ExecuteCommand(name string, args []string, named map[string]string) error { startedAt := RecordRecentStart(found.Name) err := runCommand(found) RecordRecentEnd(found.Name, err, startedAt) + captureCommandTelemetry(found, err, time.Since(startedAt)) return err } @@ -145,9 +147,55 @@ func ExecuteRepoCommand(repoName, cmdName string, args []string, named map[strin startedAt := RecordRecentStart(recentName) err := runCommand(found) RecordRecentEnd(recentName, err, startedAt) + captureCommandTelemetry(found, err, time.Since(startedAt)) return err } +// captureCommandTelemetry fires the appropriate raid_command_executed +// or raid_command_failed event for the run. Sanitized: only the +// command's `name:` field (project-author label, not user content), +// the task-type list, the structured error code, and timing reach the +// payload. Cmd bodies, args, paths, and env values are never touched. +// +// Telemetry.Capture is a no-op when consent is off so this is safe to +// call unconditionally. The unused parameters are tolerated by the +// shared sanitizer below. +func captureCommandTelemetry(cmd Command, err error, dur time.Duration) { + durMs := dur.Milliseconds() + if err == nil { + telemetry.Capture( + telemetry.EventCommandExecuted, + telemetry.CommandExecutedProps(cmd.Name, len(cmd.Tasks), commandTaskTypes(cmd), durMs), + ) + return + } + telemetry.Capture( + telemetry.EventCommandFailed, + telemetry.CommandFailedProps(cmd.Name, errorCodeFor(err), durMs), + ) +} + +// commandTaskTypes returns the list of task-type strings used by the +// command. Type only — no cmd, path, var, or message. Order +// preserved so the per-command structure stays visible in PostHog. +func commandTaskTypes(cmd Command) []string { + out := make([]string, 0, len(cmd.Tasks)) + for _, t := range cmd.Tasks { + out = append(out, string(t.Type)) + } + return out +} + +// errorCodeFor returns the structured-error code for an error if +// available, or "UNKNOWN" otherwise. Never returns the error's +// message — that can contain user content. +func errorCodeFor(err error) string { + if rErr, ok := liberrs.AsError(err); ok { + return rErr.Code() + } + return "UNKNOWN" +} + // setCommandArgs binds positional args to RAID_ARG_N and named args/flags to // sanitised, uppercased env vars for the lifetime of a command run. Returns // a cleanup closure that restores any pre-existing values raid overwrote diff --git a/src/internal/lib/task_runner.go b/src/internal/lib/task_runner.go index 9b6a2c4..27ba3c3 100644 --- a/src/internal/lib/task_runner.go +++ b/src/internal/lib/task_runner.go @@ -16,6 +16,7 @@ import ( liberrs "github.com/8bitalex/raid/src/internal/lib/errs" "github.com/8bitalex/raid/src/internal/sys" + "github.com/8bitalex/raid/src/internal/telemetry" "github.com/joho/godotenv" ) @@ -189,9 +190,31 @@ func ExecuteTask(task Task) error { start := timeNowFn() err := dispatchTask(task) emitExeTime(task.Label(), timeNowFn().Sub(start)) + captureTaskTelemetry(task, err, timeNowFn().Sub(start)) return err } - return dispatchTask(task) + start := timeNowFn() + err := dispatchTask(task) + captureTaskTelemetry(task, err, timeNowFn().Sub(start)) + return err +} + +// captureTaskTelemetry fires the sampled raid_task_executed event. +// Only the task type, outcome, and duration leak — never the cmd +// body, path, URL, var name, default value, or any other content. +// Sampled at the call site to keep PostHog volume bounded for +// commands with hundreds of tasks. +// +// Sampling and Capture both fast-path when telemetry is off, so the +// per-task overhead when opted out is effectively zero. +func captureTaskTelemetry(task Task, err error, dur time.Duration) { + if !telemetry.Sampled() { + return + } + telemetry.Capture( + telemetry.EventTaskExecuted, + telemetry.TaskExecutedProps(string(task.Type), dur.Milliseconds(), err == nil), + ) } // dispatchTask is the inner switch separated from ExecuteTask so the diff --git a/src/internal/telemetry/consent.go b/src/internal/telemetry/consent.go new file mode 100644 index 0000000..e318a30 --- /dev/null +++ b/src/internal/telemetry/consent.go @@ -0,0 +1,87 @@ +package telemetry + +import ( + "os" + "strings" + + "github.com/spf13/viper" +) + +// Viper config keys. `decided` is the consent "shown the prompt and +// got an answer" marker. `enabled` is the actual on/off state. We +// keep them separate so the first-run prompt fires exactly once, +// even when the user opts out. +const ( + consentDecidedKey = "telemetry.decided" + consentEnabledKey = "telemetry.enabled" +) + +// DoNotTrackEnvVar is the standard cross-tool opt-out env var that +// raid honors as a hard off. See https://consoledonottrack.com/. +const DoNotTrackEnvVar = "DO_NOT_TRACK" + +// State is the user-facing consent snapshot read by `raid telemetry +// status` and by IsActive. Decided distinguishes "user has been asked +// and chose off" from "user hasn't been asked yet" — the first-run +// prompt only fires when Decided is false. +type State struct { + Decided bool + Enabled bool +} + +// LoadState reads consent from viper. Defaults: Decided=false, +// Enabled=false. Either default is safe — a fresh install or a config +// without these keys yields off until the user opts in. +func LoadState() State { + return State{ + Decided: viper.GetBool(consentDecidedKey), + Enabled: viper.GetBool(consentEnabledKey), + } +} + +// SetEnabled persists the user's consent choice. Always sets Decided +// so we don't re-prompt — a user who answered no should stay +// not-prompted until they explicitly run `raid telemetry on`. +func SetEnabled(enabled bool) error { + viper.Set(consentDecidedKey, true) + viper.Set(consentEnabledKey, enabled) + return viper.WriteConfig() +} + +// SetDecidedOff marks the user as having declined without ever being +// prompted. Used in non-interactive contexts (no TTY, --yes/--headless, +// DO_NOT_TRACK=1) so we don't keep trying to prompt later. Behaves +// identically to SetEnabled(false) but documents intent at call site. +func SetDecidedOff() error { + return SetEnabled(false) +} + +// DoNotTrackActive is the public surface for callers that need to +// surface the DO_NOT_TRACK state (e.g. `raid telemetry status`). +// Mirrors the internal check exactly so the printed status matches +// what IsActive() actually enforces. +func DoNotTrackActive() bool { + return isDoNotTrack() +} + +// HasAPIKey reports whether this binary was built with a PostHog API +// key injected. Used by status to tell users that a dev build will +// never emit events even when consent is on. +func HasAPIKey() bool { + return APIKey != "" +} + +// isDoNotTrack reports whether DO_NOT_TRACK is set to a truthy value. +// Honored as a hard off regardless of the persisted consent state — +// matches the published cross-tool contract. +func isDoNotTrack() bool { + v := strings.TrimSpace(os.Getenv(DoNotTrackEnvVar)) + if v == "" { + return false + } + switch strings.ToLower(v) { + case "1", "true", "yes", "y", "on": + return true + } + return false +} diff --git a/src/internal/telemetry/events.go b/src/internal/telemetry/events.go new file mode 100644 index 0000000..c282560 --- /dev/null +++ b/src/internal/telemetry/events.go @@ -0,0 +1,78 @@ +package telemetry + +// Event builders. Each function takes the raw call-site values and +// produces a properties map that's guaranteed to be free of user +// content — no `cmd:` strings, no paths, no environment values, no +// task message bodies. +// +// Tests scan the output of every builder for forbidden substrings +// (TestEventBuilders_neverLeakUserContent) so a future field added +// to one of these maps gets caught if it slips through. + +// CommandExecutedProps builds the properties map for a successful +// command run. +// +// - commandName: the command's `name:` from YAML. Treated as +// non-sensitive — it's a label the project author chose, not +// anything the end user typed in. +// - taskCount: total task entries in the command. +// - taskTypes: distinct task-type strings (Shell, Script, …). +// Types only, never the cmd body or args. +// - durationMs: wall-clock command duration in milliseconds. +func CommandExecutedProps(commandName string, taskCount int, taskTypes []string, durationMs int64) map[string]any { + return map[string]any{ + "command_name": commandName, + "task_count": taskCount, + "task_types": taskTypes, + "duration_ms": durationMs, + "success": true, + } +} + +// CommandFailedProps is the failure variant. errorCode is the +// structured-error code (`TASK_SHELL_FAILED`, `VERIFY_FAILED`, …) +// from #47 — never the error's message, which can contain paths or +// command bodies. +func CommandFailedProps(commandName string, errorCode string, durationMs int64) map[string]any { + return map[string]any{ + "command_name": commandName, + "error_code": errorCode, + "duration_ms": durationMs, + } +} + +// TaskExecutedProps is the per-task variant. Sampled at the call site +// so PostHog isn't flooded for commands with hundreds of tasks. Only +// the task type and outcome leak — never the cmd body, path, URL, +// var name, default value, or any other content. +func TaskExecutedProps(taskType string, durationMs int64, success bool) map[string]any { + return map[string]any{ + "task_type": taskType, + "duration_ms": durationMs, + "success": success, + } +} + +// FirstRunProps is fired exactly once, when the user accepts the +// opt-in prompt. install_method is best-effort — empty when the +// invocation doesn't expose how raid was installed (e.g. `go install`, +// custom build). +func FirstRunProps(installMethod string) map[string]any { + props := map[string]any{} + if installMethod != "" { + props["install_method"] = installMethod + } + return props +} + +// OptOutProps records the reason a user opted out, if they supplied +// one via `raid telemetry off --why "..."`. The reason is a +// free-text field the user controls — they can include whatever +// they want, but we never collect it implicitly. +func OptOutProps(reason string) map[string]any { + props := map[string]any{} + if reason != "" { + props["reason"] = reason + } + return props +} diff --git a/src/internal/telemetry/id.go b/src/internal/telemetry/id.go new file mode 100644 index 0000000..26624f0 --- /dev/null +++ b/src/internal/telemetry/id.go @@ -0,0 +1,162 @@ +package telemetry + +import ( + "crypto/rand" + "encoding/hex" + "fmt" + "os" + "path/filepath" + "strings" + "sync" +) + +// IDFileEnv lets tests redirect the ID file off the user's real +// $HOME without touching the global filesystem. Empty in normal use. +const IDFileEnv = "RAID_TELEMETRY_ID_FILE" + +// homeDirFn is the user-home resolver — overridable in tests so they +// don't pollute the real ~/.config/raid/. os.UserHomeDir returns the +// platform-correct path on macOS/Linux/Windows. +var homeDirFn = os.UserHomeDir + +// idMu guards the in-process cache around the ID file so concurrent +// Capture calls don't race on the read/write. The file itself is +// effectively single-writer (the user, on this machine), so we don't +// need OS-level locking. +var ( + idMu sync.Mutex + idCached string +) + +// IDPath returns the on-disk path where raid stores the anonymous +// machine ID, after $RAID_TELEMETRY_ID_FILE override and $HOME +// resolution. Exposed so `raid telemetry status` can show it. +func IDPath() string { + if override := os.Getenv(IDFileEnv); override != "" { + return override + } + home, err := homeDirFn() + if err != nil || home == "" { + return "" + } + return filepath.Join(home, ".config", "raid", "telemetry-id") +} + +// LoadIDIfExists is the public wrapper around loadIDIfExists for +// commands that need to display the ID without forcing creation +// (`raid telemetry status` and the preview path). +func LoadIDIfExists() string { + return loadIDIfExists() +} + +// loadIDIfExists reads the persisted ID without creating one. Returns +// empty string when the file is missing or unreadable — used by the +// preview command so `raid telemetry preview` doesn't write to disk. +func loadIDIfExists() string { + idMu.Lock() + defer idMu.Unlock() + if idCached != "" { + return idCached + } + path := IDPath() + if path == "" { + return "" + } + data, err := os.ReadFile(path) + if err != nil { + return "" + } + id := strings.TrimSpace(string(data)) + if id == "" { + return "" + } + idCached = id + return id +} + +// loadOrCreateID returns the persisted ID, generating + writing a +// fresh one if the file doesn't exist. Empty return means we couldn't +// resolve the path or persist the value — Capture treats that as "do +// nothing" rather than blocking the user's command. +func loadOrCreateID() string { + if id := loadIDIfExists(); id != "" { + return id + } + idMu.Lock() + defer idMu.Unlock() + // Recheck under the lock — another goroutine may have raced ahead. + if idCached != "" { + return idCached + } + id, err := newID() + if err != nil { + return "" + } + if err := writeID(id); err != nil { + return "" + } + idCached = id + return id +} + +// newID generates a fresh UUIDv4 from crypto/rand. We don't depend on +// any third-party UUID library to keep the deps minimal — the layout +// is the standard RFC 4122 variant-1 / version-4 form. +func newID() (string, error) { + var b [16]byte + if _, err := rand.Read(b[:]); err != nil { + return "", err + } + b[6] = (b[6] & 0x0f) | 0x40 // version 4 + b[8] = (b[8] & 0x3f) | 0x80 // RFC 4122 variant + return fmt.Sprintf("%s-%s-%s-%s-%s", + hex.EncodeToString(b[0:4]), + hex.EncodeToString(b[4:6]), + hex.EncodeToString(b[6:8]), + hex.EncodeToString(b[8:10]), + hex.EncodeToString(b[10:16]), + ), nil +} + +// writeID persists the ID to disk, creating ~/.config/raid/ if +// needed. Permissions are 0600 because this is a stable identifier +// for the user's machine — not a secret, but no reason to leave it +// world-readable either. +func writeID(id string) error { + path := IDPath() + if path == "" { + return fmt.Errorf("telemetry: no home directory resolvable") + } + if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil { + return err + } + return os.WriteFile(path, []byte(id+"\n"), 0600) +} + +// PurgeID deletes the on-disk ID file. PostHog can't link future +// events to past ones after a purge — `raid telemetry purge` exposes +// this so users can break continuity without losing the opt-in. +// +// Returns nil when the file is already absent (purge is idempotent). +func PurgeID() error { + idMu.Lock() + defer idMu.Unlock() + idCached = "" + path := IDPath() + if path == "" { + return nil + } + if err := os.Remove(path); err != nil && !os.IsNotExist(err) { + return err + } + return nil +} + +// resetIDCacheForTest clears the in-process cache. Required when a +// test points at a different IDPath via $RAID_TELEMETRY_ID_FILE — the +// cache would otherwise pin the previous test's value. +func resetIDCacheForTest() { + idMu.Lock() + defer idMu.Unlock() + idCached = "" +} diff --git a/src/internal/telemetry/prompt.go b/src/internal/telemetry/prompt.go new file mode 100644 index 0000000..1df6921 --- /dev/null +++ b/src/internal/telemetry/prompt.go @@ -0,0 +1,144 @@ +package telemetry + +import ( + "bufio" + "fmt" + "io" + "os" + "strings" +) + +// PromptResult describes what the first-run prompt resolved to. Used +// by the caller (cmd/raid.go) to decide whether to fire the +// first_run event and to surface a short post-prompt confirmation. +type PromptResult int + +const ( + // PromptSkipped means we never showed the prompt (non-interactive + // context, DO_NOT_TRACK, already decided, no API key). Consent + // state was marked decided=off so we won't try again. + PromptSkipped PromptResult = iota + // PromptDeclined means the user explicitly chose no. + PromptDeclined + // PromptAccepted means the user explicitly chose yes. The caller + // should fire EventFirstRun (with install_method if known). + PromptAccepted +) + +// promptInFn is the prompt's stdin reader, indirected for tests. We +// don't reuse lib.getStdinReader because that's behind the task- +// execution mutex and creates a bufio.Reader tied to os.Stdin — +// which we don't want to lock here. +var promptInFn = func() io.Reader { return os.Stdin } + +// promptOutFn is where the prompt's text lands. Stderr by default so +// stdout stays clean for piped consumers (`raid context --json | +// jq`). +var promptOutFn = func() io.Writer { return os.Stderr } + +// isInteractiveFn reports whether stdin is a TTY. Tests stub this so +// they don't depend on the runner's stdin state. +var isInteractiveFn = func() bool { + stat, err := os.Stdin.Stat() + if err != nil { + return false + } + return stat.Mode()&os.ModeCharDevice != 0 +} + +// MaybePromptForConsent runs the first-run consent flow when +// appropriate. Returns the resolved outcome so the caller can fire +// follow-up events. +// +// Skip conditions (each → PromptSkipped + consent persisted off): +// - DO_NOT_TRACK env var set +// - Consent already decided in a prior invocation +// - Build has no API key (telemetry is dead code anyway) +// - Stdin isn't a TTY (CI, pipes, agent hosts) +// - skipInteractive is true (raid invoked with --yes/--headless, +// or for an info command like `--help`, or for a subcommand +// that itself manages telemetry like `raid telemetry on`) +// +// The skip-and-persist behavior matches the user-confirmed scope: +// non-interactive contexts get the same "off forever, never +// prompt again" treatment so a later interactive run also stays +// quiet unless the user runs `raid telemetry on` explicitly. +func MaybePromptForConsent(skipInteractive bool) PromptResult { + if APIKey == "" { + return PromptSkipped + } + if isDoNotTrack() { + _ = SetDecidedOff() + return PromptSkipped + } + if LoadState().Decided { + return PromptSkipped + } + if skipInteractive || !isInteractiveFn() { + _ = SetDecidedOff() + return PromptSkipped + } + + // One bufio reader threaded through the explainer loop so a fresh + // bufio per attempt can't strand input buffered after the first + // newline. + reader := bufio.NewReader(promptInFn()) + for { + answer := readPromptAnswer(reader) + switch answer { + case "y", "yes": + if err := SetEnabled(true); err != nil { + return PromptSkipped + } + return PromptAccepted + case "?": + fmt.Fprint(promptOutFn(), explainerText()) + continue + default: + _ = SetEnabled(false) + return PromptDeclined + } + } +} + +// readPromptAnswer renders the prompt and reads a single line of +// input. Returns the trimmed, lowercased answer; empty string means +// the user just hit enter (treated as the capital-N default). +func readPromptAnswer(reader *bufio.Reader) string { + fmt.Fprint(promptOutFn(), promptText()) + line, err := reader.ReadString('\n') + if err != nil && line == "" { + return "" + } + return strings.TrimSpace(strings.ToLower(line)) +} + +func promptText() string { + return "" + + "raid would like to send anonymous usage telemetry to help prioritize features.\n" + + "We never collect: file paths, command contents, env values, or anything that could identify you.\n" + + "See: https://raidcli.dev/docs/telemetry\n" + + "\n" + + " [y] yes, send telemetry [N] no, leave it off [?] what's collected\n" + + "> " +} + +func explainerText() string { + return "" + + "\n" + + "raid would send:\n" + + " - which built-in commands you run (install, env, doctor, …)\n" + + " - which custom-command names you run (the name only — never the cmd body)\n" + + " - which task types ran (Shell, Script, Wait, …) — never the cmd body, paths, or env values\n" + + " - command success/failure + structured error code (e.g. TASK_SHELL_FAILED)\n" + + " - raid version, OS, architecture\n" + + " - an anonymous machine ID (UUIDv4 — purgeable via `raid telemetry purge`)\n" + + "\n" + + "raid never collects:\n" + + " - cmd bodies, paths, URLs, env values, or anything else you typed\n" + + " - stdout/stderr of your tasks\n" + + " - your username, hostname, IP, or any system identifier beyond OS+arch\n" + + "\n" + + "Source: https://github.com/8bitalex/raid/tree/main/src/internal/telemetry\n" + + "\n" +} diff --git a/src/internal/telemetry/sampling.go b/src/internal/telemetry/sampling.go new file mode 100644 index 0000000..ace38ea --- /dev/null +++ b/src/internal/telemetry/sampling.go @@ -0,0 +1,31 @@ +package telemetry + +import "math/rand/v2" + +// TaskSampleRate controls how many `raid_task_executed` events fire, +// as a fraction in [0, 1]. Tasks fire on average rate% of the time — +// the issue's "sampled to avoid flooding" requirement. Tests pin this +// to 1 (deterministic capture) or 0 (deterministic drop). +// +// Default 0.1 keeps the per-invocation event volume bounded for +// commands with hundreds of tasks while still giving statistically +// useful samples at fleet scale. +var TaskSampleRate = 0.1 + +// rngFn returns a uniform [0, 1) sample. Indirected so tests can +// force-deterministic Sampled() behavior without seeding math/rand +// globally (which would race against any other rand consumers). +var rngFn = func() float64 { return rand.Float64() } + +// Sampled reports whether a task event should be captured this time +// per TaskSampleRate. Caller fires Capture only when this returns +// true. +func Sampled() bool { + if TaskSampleRate <= 0 { + return false + } + if TaskSampleRate >= 1 { + return true + } + return rngFn() < TaskSampleRate +} diff --git a/src/internal/telemetry/telemetry.go b/src/internal/telemetry/telemetry.go new file mode 100644 index 0000000..8ab93f6 --- /dev/null +++ b/src/internal/telemetry/telemetry.go @@ -0,0 +1,226 @@ +// Package telemetry implements raid's opt-in, anonymous CLI telemetry +// pipeline. Issue #80 spec'd the scope: measure adoption + usage, never +// capture user content, default off, easy to inspect and disable. +// +// Lifecycle: +// +// 1. raid invokes telemetry.Capture(name, props) at hook points +// (ExecuteCommand, ExecuteTask, first-run prompt accepted, etc.). +// 2. If consent isn't on, Capture is a no-op. Same for missing API +// key (dev builds) and DO_NOT_TRACK=1. +// 3. Otherwise the event is enriched with anonymous machine ID + +// version/os/arch and posted asynchronously to PostHog. The HTTP +// call never blocks the caller — failures drop silently so a +// network blip can't break a raid command. +// 4. executeRoot calls Flush at the end of every invocation so +// in-flight events finish (or get dropped on timeout). +// +// The package is read-only side-effect-free if the user never opts in: +// no goroutines spawned, no files written, no HTTP calls. +package telemetry + +import ( + "bytes" + "context" + "encoding/json" + "net/http" + "runtime" + "sync" + "time" +) + +// APIKey is the PostHog publishable project key. Empty by default; +// injected at release time via `-ldflags -X +// github.com/8bitalex/raid/src/internal/telemetry.APIKey=phc_...`. +// When empty, Capture is a no-op — that's how dev builds and `go run` +// stay silent. +var APIKey string + +// CaptureEndpoint is the PostHog capture URL. Overridable for tests so +// they can point at a httptest.Server without hitting the real endpoint. +var CaptureEndpoint = "https://us.i.posthog.com/i/v0/e/" + +// Event names. Stable contract — these are the labels that show up in +// PostHog and on the public telemetry-disclosure page. +const ( + EventFirstRun = "raid_first_run" + EventCommandExecuted = "raid_command_executed" + EventCommandFailed = "raid_command_failed" + EventTaskExecuted = "raid_task_executed" + EventTelemetryOptOut = "raid_telemetry_opt_out" +) + +// Event is what Capture queues for delivery. Properties must already +// be sanitized — telemetry doesn't re-scan them at send time. +type Event struct { + Name string `json:"event"` + Properties map[string]any `json:"properties"` +} + +// httpClient is the package's outbound HTTP client. Short timeout so a +// hung network can't drag out raid shutdown; tests swap it for a +// recording transport. +var httpClient = &http.Client{Timeout: 2 * time.Second} + +// inflight tracks fire-and-forget goroutines so Flush can wait on them +// at process exit. Without this, raid would exit before the POST +// returns and the event would be dropped. +var inflight sync.WaitGroup + +// Capture is the public hook every event fires through. It returns +// immediately; the network call (if any) runs on a goroutine. Capture +// is safe to call even when telemetry isn't active — it just no-ops. +// +// Callers must not pass sensitive content in properties. Sanitization +// is enforced upstream by the event builders, not here. +func Capture(name string, properties map[string]any) { + if !IsActive() { + return + } + id := loadOrCreateID() + if id == "" { + return + } + full := enrichProperties(id, properties) + inflight.Add(1) + go func() { + defer inflight.Done() + send(Event{Name: name, Properties: full}) + }() +} + +// CaptureSync is Capture's blocking variant. Used by `raid telemetry +// off` so the opt-out event is guaranteed to land before the process +// exits — we don't want users to flip telemetry off and have the +// opt-out event silently dropped by Flush's timeout. +func CaptureSync(name string, properties map[string]any) { + if !IsActive() { + return + } + id := loadOrCreateID() + if id == "" { + return + } + send(Event{Name: name, Properties: enrichProperties(id, properties)}) +} + +// Flush waits up to timeout for in-flight events to finish. Called at +// the end of executeRoot so async events sent during a command run +// don't get dropped when raid exits. +func Flush(timeout time.Duration) { + done := make(chan struct{}) + go func() { + inflight.Wait() + close(done) + }() + select { + case <-done: + case <-time.After(timeout): + } +} + +// IsActive reports whether Capture will actually send. Combines the +// build-time API key, the consent state, and DO_NOT_TRACK. +// +// This is the gate every telemetry path checks before doing real +// work — keep the test surface narrow by going through here, not by +// reading individual flags. +func IsActive() bool { + if APIKey == "" { + return false + } + if isDoNotTrack() { + return false + } + st := LoadState() + return st.Decided && st.Enabled +} + +// enrichProperties merges the common envelope (anonymous ID, raid +// version, os/arch) into the event's specific properties. PostHog +// expects $properties.distinct_id at the top level. +func enrichProperties(id string, properties map[string]any) map[string]any { + out := make(map[string]any, len(properties)+6) + for k, v := range properties { + out[k] = v + } + out["distinct_id"] = id + out["raid_version"] = raidVersion() + out["os"] = runtime.GOOS + out["arch"] = runtime.GOARCH + return out +} + +// raidVersionFn is the source of the raid version baked into every +// event. Override in tests to avoid pulling the resources package. +var raidVersionFn = func() string { + return raidVersionFromResources() +} + +func raidVersion() string { + return raidVersionFn() +} + +// send posts a single event to the PostHog capture endpoint. Errors +// are intentionally swallowed: telemetry must never break raid, so +// any network / encoding failure is a silent drop. +func send(evt Event) { + body, err := json.Marshal(map[string]any{ + "api_key": APIKey, + "event": evt.Name, + "properties": evt.Properties, + "timestamp": time.Now().UTC().Format(time.RFC3339Nano), + }) + if err != nil { + return + } + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + req, err := http.NewRequestWithContext(ctx, http.MethodPost, CaptureEndpoint, bytes.NewReader(body)) + if err != nil { + return + } + req.Header.Set("Content-Type", "application/json") + resp, err := httpClient.Do(req) + if err != nil { + return + } + resp.Body.Close() +} + +// PreviewPayload returns the JSON body that would be sent for the +// given event, without sending it. Used by `raid telemetry preview` +// so users can see exactly what raid emits before opting in. +// +// Pretty-prints the JSON for human inspection. Returns an empty +// string when no anonymous ID exists yet (the user hasn't opted in +// and we don't want to create the id file just for a preview). +func PreviewPayload(name string, properties map[string]any) string { + id := loadIDIfExists() + if id == "" { + id = "" + } + body, err := json.MarshalIndent(map[string]any{ + "api_key": apiKeyForPreview(), + "event": name, + "properties": enrichProperties(id, properties), + "timestamp": "", + }, "", " ") + if err != nil { + return "" + } + return string(body) +} + +// apiKeyForPreview masks the real key in preview output so a user +// running `raid telemetry preview` doesn't accidentally copy/paste +// the key when sharing the payload. +func apiKeyForPreview() string { + if APIKey == "" { + return "" + } + if len(APIKey) <= 8 { + return "" + } + return APIKey[:4] + "…" + APIKey[len(APIKey)-4:] +} diff --git a/src/internal/telemetry/telemetry_test.go b/src/internal/telemetry/telemetry_test.go new file mode 100644 index 0000000..d827210 --- /dev/null +++ b/src/internal/telemetry/telemetry_test.go @@ -0,0 +1,509 @@ +package telemetry + +import ( + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "sync/atomic" + "testing" + "time" + + "github.com/spf13/viper" +) + +// setupTestEnv hermetically isolates each test from on-disk state and +// global config. Resets viper, points the ID file at a tempdir, +// resets the in-process ID cache, restores APIKey + endpoint + +// sample rate + DO_NOT_TRACK to their defaults on cleanup. Tests +// should call this first so they don't leak state. +func setupTestEnv(t *testing.T) { + t.Helper() + dir := t.TempDir() + idPath := filepath.Join(dir, "telemetry-id") + + // Reset viper to an isolated config in the tempdir. + prevConfig := viper.ConfigFileUsed() + viper.Reset() + viper.SetConfigFile(filepath.Join(dir, "config.toml")) + if f, err := os.Create(filepath.Join(dir, "config.toml")); err == nil { + f.Close() + } + + // Capture and restore env / package vars. + prevID := os.Getenv(IDFileEnv) + prevDNT := os.Getenv(DoNotTrackEnvVar) + prevAPIKey := APIKey + prevEndpoint := CaptureEndpoint + prevRate := TaskSampleRate + prevRng := rngFn + prevHomeDirFn := homeDirFn + prevIsInteractive := isInteractiveFn + + os.Setenv(IDFileEnv, idPath) + os.Unsetenv(DoNotTrackEnvVar) + resetIDCacheForTest() + APIKey = "phc_test" + CaptureEndpoint = "" // tests must set this before Capture if they expect sends + isInteractiveFn = func() bool { return false } + + t.Cleanup(func() { + os.Setenv(IDFileEnv, prevID) + if prevDNT == "" { + os.Unsetenv(DoNotTrackEnvVar) + } else { + os.Setenv(DoNotTrackEnvVar, prevDNT) + } + APIKey = prevAPIKey + CaptureEndpoint = prevEndpoint + TaskSampleRate = prevRate + rngFn = prevRng + homeDirFn = prevHomeDirFn + isInteractiveFn = prevIsInteractive + resetIDCacheForTest() + viper.Reset() + _ = prevConfig + }) +} + +// --- IsActive --- + +func TestIsActive_offByDefault(t *testing.T) { + setupTestEnv(t) + if IsActive() { + t.Error("IsActive should be false on a fresh config") + } +} + +func TestIsActive_offWhenAPIKeyEmpty(t *testing.T) { + setupTestEnv(t) + APIKey = "" + if err := SetEnabled(true); err != nil { + t.Fatal(err) + } + if IsActive() { + t.Error("IsActive should be false when APIKey is empty regardless of consent") + } +} + +func TestIsActive_offWhenDoNotTrack(t *testing.T) { + setupTestEnv(t) + if err := SetEnabled(true); err != nil { + t.Fatal(err) + } + os.Setenv(DoNotTrackEnvVar, "1") + if IsActive() { + t.Error("IsActive should be false when DO_NOT_TRACK=1") + } +} + +func TestIsActive_onWhenAllConditionsMet(t *testing.T) { + setupTestEnv(t) + if err := SetEnabled(true); err != nil { + t.Fatal(err) + } + if !IsActive() { + t.Error("IsActive should be true with API key + decided + enabled + no DO_NOT_TRACK") + } +} + +// --- Capture: no-network when opted out --- + +func TestCapture_optedOutMakesZeroNetworkCalls(t *testing.T) { + setupTestEnv(t) + var hits int32 + srv := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) { + atomic.AddInt32(&hits, 1) + })) + defer srv.Close() + CaptureEndpoint = srv.URL + + // Default state: undecided, disabled — Capture must no-op. + Capture(EventCommandExecuted, CommandExecutedProps("test", 1, []string{"shell"}, 10)) + Capture(EventTaskExecuted, TaskExecutedProps("shell", 5, true)) + Flush(500 * time.Millisecond) + + if got := atomic.LoadInt32(&hits); got != 0 { + t.Errorf("opted-out Capture made %d network calls, want 0", got) + } +} + +func TestCapture_doNotTrackMakesZeroNetworkCalls(t *testing.T) { + setupTestEnv(t) + if err := SetEnabled(true); err != nil { + t.Fatal(err) + } + os.Setenv(DoNotTrackEnvVar, "1") + + var hits int32 + srv := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) { + atomic.AddInt32(&hits, 1) + })) + defer srv.Close() + CaptureEndpoint = srv.URL + + Capture(EventCommandExecuted, CommandExecutedProps("test", 1, []string{"shell"}, 10)) + Flush(500 * time.Millisecond) + + if got := atomic.LoadInt32(&hits); got != 0 { + t.Errorf("DO_NOT_TRACK Capture made %d network calls, want 0", got) + } +} + +// --- Capture: sends + sanitization --- + +func TestCapture_sendsWhenActive(t *testing.T) { + setupTestEnv(t) + if err := SetEnabled(true); err != nil { + t.Fatal(err) + } + + var ( + hits int32 + body []byte + ) + srv := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, r *http.Request) { + atomic.AddInt32(&hits, 1) + body, _ = io.ReadAll(r.Body) + })) + defer srv.Close() + CaptureEndpoint = srv.URL + + Capture(EventCommandExecuted, CommandExecutedProps("build", 2, []string{"shell", "print"}, 1234)) + Flush(2 * time.Second) + + if got := atomic.LoadInt32(&hits); got != 1 { + t.Errorf("Capture sent %d events, want 1", got) + } + var parsed map[string]any + if err := json.Unmarshal(body, &parsed); err != nil { + t.Fatalf("body not JSON: %v", err) + } + if got, _ := parsed["event"].(string); got != EventCommandExecuted { + t.Errorf("event = %q, want %q", got, EventCommandExecuted) + } + props, _ := parsed["properties"].(map[string]any) + if props == nil { + t.Fatal("properties missing") + } + for _, key := range []string{"distinct_id", "raid_version", "os", "arch", "command_name", "task_count", "task_types", "duration_ms"} { + if _, ok := props[key]; !ok { + t.Errorf("properties missing %q: %v", key, props) + } + } +} + +// --- Event builders: no user content leaks --- + +func TestEventBuilders_neverLeakUserContent(t *testing.T) { + // Each builder must produce a property map that doesn't contain + // the forbidden substrings — even if a future field accidentally + // captures one. Builders take the call-site values; the sanitizer + // is "don't put them in the map" rather than "scrub them out", so + // a typo in the builder body would surface here. + forbidden := []string{ + "rm -rf /", // example cmd body + "/Users/secret", // example path + "SECRET_TOKEN", // example env name + "sk-live-", // example secret prefix + } + cases := []struct { + name string + props map[string]any + }{ + {"CommandExecuted", CommandExecutedProps("rm -rf /", 1, []string{"/Users/secret"}, 1)}, + {"CommandFailed", CommandFailedProps("rm -rf /", "SECRET_TOKEN", 1)}, + {"TaskExecuted", TaskExecutedProps("sk-live-shell", 1, false)}, + {"FirstRun", FirstRunProps("rm -rf /")}, + {"OptOut", OptOutProps("/Users/secret")}, + } + // The above intentionally feeds the builders forbidden values + // in their permitted slots (command_name, task_type, etc.) to + // confirm we treat those as labels, not content. The assertion + // flips: command_name + task_type are *expected* to round-trip + // (they're project-author labels), but no extra fields should + // appear that contain the forbidden substrings outside those + // permitted slots. + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + permitted := map[string]bool{ + "command_name": true, + "task_type": true, + "task_types": true, + "error_code": true, + "install_method": true, + "reason": true, + } + for k, v := range c.props { + if permitted[k] { + continue + } + s, ok := v.(string) + if !ok { + continue + } + for _, f := range forbidden { + if strings.Contains(s, f) { + t.Errorf("property %q contains forbidden substring %q: %q", k, f, s) + } + } + } + }) + } +} + +// --- Anonymous ID --- + +func TestLoadOrCreateID_persistsAcrossCalls(t *testing.T) { + setupTestEnv(t) + first := loadOrCreateID() + if first == "" { + t.Fatal("first call returned empty ID") + } + resetIDCacheForTest() + second := loadOrCreateID() + if first != second { + t.Errorf("ID changed across calls: %q vs %q", first, second) + } +} + +func TestLoadOrCreateID_formatIsUUIDv4(t *testing.T) { + setupTestEnv(t) + id := loadOrCreateID() + if len(id) != 36 { + t.Fatalf("ID length = %d, want 36 (UUID): %q", len(id), id) + } + // Version 4 nibble at index 14. + if id[14] != '4' { + t.Errorf("version nibble = %c, want 4: %s", id[14], id) + } + // Variant bits 10x at index 19 → first char of group 4 is 8/9/a/b. + switch id[19] { + case '8', '9', 'a', 'b', 'A', 'B': + default: + t.Errorf("variant nibble = %c, want one of 8/9/a/b: %s", id[19], id) + } +} + +func TestPurgeID_removesFile(t *testing.T) { + setupTestEnv(t) + id := loadOrCreateID() + if id == "" { + t.Fatal("ID not created") + } + if _, err := os.Stat(IDPath()); err != nil { + t.Fatalf("ID file should exist: %v", err) + } + if err := PurgeID(); err != nil { + t.Fatalf("Purge: %v", err) + } + if _, err := os.Stat(IDPath()); !os.IsNotExist(err) { + t.Errorf("ID file should be gone: %v", err) + } +} + +func TestPurgeID_idempotent(t *testing.T) { + setupTestEnv(t) + if err := PurgeID(); err != nil { + t.Errorf("first Purge of non-existent file should not error: %v", err) + } + if err := PurgeID(); err != nil { + t.Errorf("second Purge should be idempotent: %v", err) + } +} + +// --- Sampling --- + +func TestSampled_rateZeroNeverFires(t *testing.T) { + setupTestEnv(t) + TaskSampleRate = 0 + for i := 0; i < 100; i++ { + if Sampled() { + t.Fatal("Sampled() returned true with rate=0") + } + } +} + +func TestSampled_rateOneAlwaysFires(t *testing.T) { + setupTestEnv(t) + TaskSampleRate = 1 + for i := 0; i < 100; i++ { + if !Sampled() { + t.Fatal("Sampled() returned false with rate=1") + } + } +} + +func TestSampled_intermediateRateUsesRNG(t *testing.T) { + setupTestEnv(t) + TaskSampleRate = 0.5 + // Force-deterministic: alternating 0.0 / 0.9 means exactly half + // the samples (the ones below 0.5) should fire. + var i int + rngFn = func() float64 { + v := []float64{0.1, 0.9}[i%2] + i++ + return v + } + hits := 0 + for j := 0; j < 10; j++ { + if Sampled() { + hits++ + } + } + if hits != 5 { + t.Errorf("hits = %d, want 5 (alternating < 0.5)", hits) + } +} + +// --- Preview --- + +func TestPreviewPayload_redactsAPIKey(t *testing.T) { + setupTestEnv(t) + APIKey = "phc_supersecretkey12345" + payload := PreviewPayload(EventCommandExecuted, CommandExecutedProps("build", 1, []string{"shell"}, 10)) + if payload == "" { + t.Fatal("preview empty") + } + if strings.Contains(payload, APIKey) { + t.Errorf("preview leaked full API key: %s", payload) + } + if !strings.Contains(payload, "phc_") || !strings.Contains(payload, "2345") { + t.Errorf("preview should show prefix + suffix of redacted key: %s", payload) + } +} + +// --- Prompt --- + +func TestMaybePromptForConsent_skipsWhenAPIKeyEmpty(t *testing.T) { + setupTestEnv(t) + APIKey = "" + got := MaybePromptForConsent(false) + if got != PromptSkipped { + t.Errorf("outcome = %v, want PromptSkipped", got) + } + if LoadState().Decided { + t.Error("Decided should stay false when APIKey is empty (telemetry is dead code)") + } +} + +func TestMaybePromptForConsent_skipsAndPersistsOffOnNonTTY(t *testing.T) { + setupTestEnv(t) + isInteractiveFn = func() bool { return false } + got := MaybePromptForConsent(false) + if got != PromptSkipped { + t.Errorf("outcome = %v, want PromptSkipped", got) + } + if !LoadState().Decided { + t.Error("non-TTY skip should persist Decided=true so we don't re-prompt") + } + if LoadState().Enabled { + t.Error("non-TTY skip should leave Enabled=false") + } +} + +func TestMaybePromptForConsent_skipsAndPersistsOffOnHeadless(t *testing.T) { + setupTestEnv(t) + isInteractiveFn = func() bool { return true } + got := MaybePromptForConsent(true) // skipInteractive=true (e.g. --yes) + if got != PromptSkipped { + t.Errorf("outcome = %v, want PromptSkipped", got) + } + if !LoadState().Decided { + t.Error("headless skip should persist Decided=true") + } +} + +func TestMaybePromptForConsent_skipsWhenDoNotTrack(t *testing.T) { + setupTestEnv(t) + isInteractiveFn = func() bool { return true } + os.Setenv(DoNotTrackEnvVar, "1") + got := MaybePromptForConsent(false) + if got != PromptSkipped { + t.Errorf("outcome = %v, want PromptSkipped", got) + } +} + +func TestMaybePromptForConsent_skipsWhenAlreadyDecided(t *testing.T) { + setupTestEnv(t) + if err := SetEnabled(false); err != nil { + t.Fatal(err) + } + isInteractiveFn = func() bool { return true } + got := MaybePromptForConsent(false) + if got != PromptSkipped { + t.Errorf("outcome = %v, want PromptSkipped (already decided)", got) + } +} + +func TestMaybePromptForConsent_acceptsOnYes(t *testing.T) { + setupTestEnv(t) + isInteractiveFn = func() bool { return true } + r := strings.NewReader("y\n") + promptInFn = func() io.Reader { return r } + promptOutFn = func() io.Writer { return io.Discard } + got := MaybePromptForConsent(false) + if got != PromptAccepted { + t.Errorf("outcome = %v, want PromptAccepted", got) + } + st := LoadState() + if !st.Decided || !st.Enabled { + t.Errorf("state = %+v, want both true", st) + } +} + +func TestMaybePromptForConsent_declinesOnEmpty(t *testing.T) { + setupTestEnv(t) + isInteractiveFn = func() bool { return true } + r := strings.NewReader("\n") + promptInFn = func() io.Reader { return r } + promptOutFn = func() io.Writer { return io.Discard } + got := MaybePromptForConsent(false) + if got != PromptDeclined { + t.Errorf("outcome = %v, want PromptDeclined (capital-N default)", got) + } + st := LoadState() + if !st.Decided { + t.Error("decline should still persist Decided=true") + } + if st.Enabled { + t.Error("decline should leave Enabled=false") + } +} + +func TestMaybePromptForConsent_explainerThenAccept(t *testing.T) { + setupTestEnv(t) + isInteractiveFn = func() bool { return true } + // "?" then "y" on the next round. Reader is captured once so the + // recursion sees the remaining input after the first read drains "?". + r := strings.NewReader("?\ny\n") + promptInFn = func() io.Reader { return r } + promptOutFn = func() io.Writer { return io.Discard } + got := MaybePromptForConsent(false) + if got != PromptAccepted { + t.Errorf("outcome = %v, want PromptAccepted after explainer", got) + } +} + +// --- Consent state --- + +func TestSetEnabled_persistsBothKeys(t *testing.T) { + setupTestEnv(t) + if err := SetEnabled(true); err != nil { + t.Fatal(err) + } + st := LoadState() + if !st.Decided || !st.Enabled { + t.Errorf("state = %+v, want both true", st) + } + if err := SetEnabled(false); err != nil { + t.Fatal(err) + } + st = LoadState() + if !st.Decided || st.Enabled { + t.Errorf("state after off = %+v, want decided=true enabled=false", st) + } +} diff --git a/src/internal/telemetry/version.go b/src/internal/telemetry/version.go new file mode 100644 index 0000000..33646dc --- /dev/null +++ b/src/internal/telemetry/version.go @@ -0,0 +1,16 @@ +package telemetry + +import "github.com/8bitalex/raid/src/resources" + +// raidVersionFromResources pulls the version from the embedded +// app.properties so every event reports the binary's actual version. +// Failures fall back to empty string — events still send, the +// `raid_version` field is just absent. Telemetry never blocks a +// command on a bad lookup. +func raidVersionFromResources() string { + v, err := resources.GetProperty(resources.PropertyVersion) + if err != nil { + return "" + } + return v +} diff --git a/src/resources/app.properties b/src/resources/app.properties index 2a74955..b9914fc 100644 --- a/src/resources/app.properties +++ b/src/resources/app.properties @@ -1,2 +1,2 @@ -version=0.15.0-beta +version=0.16.0-beta environment=development From 0309bcdd224ca6d411b927505bac8f89dc733b9a Mon Sep 17 00:00:00 2001 From: "Mr. Meeseeks" Date: Wed, 13 May 2026 21:57:27 -0700 Subject: [PATCH 2/4] =?UTF-8?q?fix:=20address=20Copilot=20review=20?= =?UTF-8?q?=E2=80=94=20telemetry=20consent=20+=20ID=20race?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fire raid_first_run after a prompt-based opt-in (previously only the explicit `raid telemetry on` path produced the adoption event). - Honor RAID_HEADLESS=1 when deciding whether to skip the consent prompt, matching how the headless flag itself is treated. - isTelemetrySubcommand now accounts for value-taking root flags (--config / -c), so `raid --config telemetry install` resolves to the `install` subcommand rather than misreading the config path as the telemetry subcommand. - headlessFromArgs walks the full arg list and resolves to the final value, mirroring pflag's last-value-wins semantics. - loadOrCreateID writes the ID with O_CREATE|O_EXCL so two concurrent first-run raid invocations can't generate competing UUIDs; the loser re-reads the winner's value. Co-Authored-By: Copilot --- llms.txt | 2 +- site/docs/references/commands.mdx | 2 +- site/docs/telemetry.mdx | 4 +- site/docs/usage/custom.mdx | 2 +- src/cmd/raid.go | 49 ++++++++++++++++--- src/cmd/telemetry/telemetry.go | 5 +- src/internal/lib/task_runner.go | 5 +- src/internal/telemetry/events.go | 6 ++- src/internal/telemetry/id.go | 60 ++++++++++++++++++------ src/internal/telemetry/prompt.go | 7 ++- src/internal/telemetry/sampling.go | 6 ++- src/internal/telemetry/telemetry.go | 35 +++++++++----- src/internal/telemetry/telemetry_test.go | 4 ++ src/internal/telemetry/version.go | 7 +-- 14 files changed, 144 insertions(+), 50 deletions(-) diff --git a/llms.txt b/llms.txt index 2504abe..9e95f00 100644 --- a/llms.txt +++ b/llms.txt @@ -33,7 +33,7 @@ Raid is written in Go, distributed as a single self-contained binary, and publis - [Custom commands](https://raidcli.dev/docs/usage/custom): Define and invoke `raid ` team workflows - [raid doctor](https://raidcli.dev/docs/usage/doctor): Diagnose profile and repo configuration issues - [raid root command](https://raidcli.dev/docs/usage/raid): Global flags and top-level invocation, including [headless mode](https://raidcli.dev/docs/usage/raid#headless-mode) (`-y` / `--yes` / `--headless` / `RAID_HEADLESS=1`) that auto-resolves Confirm and Prompt tasks for CI / agent runs -- [raid telemetry](https://raidcli.dev/docs/telemetry): Opt-in anonymous CLI telemetry. Off by default, first-run consent prompt, `on` / `off` / `status` / `purge` / `preview` subcommands, `DO_NOT_TRACK=1` honored, no PII or task content ever collected +- [raid telemetry](https://raidcli.dev/docs/telemetry): Opt-in anonymous CLI telemetry. Off by default, first-run consent prompt, `on` / `off` / `status` / `purge` / `preview` subcommands, `DO_NOT_TRACK=1` honored, no PII or task content collected implicitly (the one explicit exception is `raid telemetry off --why "..."`, which records the user-provided opt-out reason on the opt-out event) - [raid context](https://raidcli.dev/docs/usage/context): Snapshot the active workspace, or run `raid context serve` as an MCP server (stdio) exposing profile, env, repos, commands, recent, and live vars as resources, plus the canonical raid agent toolkit as tools ## Reference diff --git a/site/docs/references/commands.mdx b/site/docs/references/commands.mdx index 0c34fe1..2f37f11 100644 --- a/site/docs/references/commands.mdx +++ b/site/docs/references/commands.mdx @@ -131,7 +131,7 @@ raid test-all Custom commands are defined in `commands` sections of the profile or in individual repository `raid.yaml` files. Run `raid --help` to see all available commands. -Custom command names cannot shadow built-in or reserved names (`profile`, `install`, `env`, `doctor`, `context`, `help`, `version`, `completion`). +Custom command names cannot shadow built-in or reserved names (`profile`, `install`, `env`, `doctor`, `context`, `telemetry`, `help`, `version`, `completion`). To run a command from a specific repository (e.g. when profiles shadow a repo command with the same name): diff --git a/site/docs/telemetry.mdx b/site/docs/telemetry.mdx index 7ba2d17..47fb750 100644 --- a/site/docs/telemetry.mdx +++ b/site/docs/telemetry.mdx @@ -9,7 +9,7 @@ description: What raid's opt-in anonymous CLI telemetry collects, what it never raid ships an **opt-in, anonymous** CLI telemetry pipeline so the team can prioritize features against real usage signal — which task types matter, which commands fail, which features go unused — without ever capturing what you actually run. -**Off by default.** A fresh install never sends anything until you explicitly run `raid telemetry on` (or accept the first-run prompt). When opted out, raid makes zero network requests during normal use, and there's an integration test that pins this contract. +**Off by default.** A fresh install never sends anything to the telemetry endpoint until you explicitly run `raid telemetry on` (or accept the first-run prompt). When opted out, raid makes zero telemetry / PostHog requests, and there's an integration test that pins this contract. (raid still performs an unrelated GitHub release-version check on normal invocations regardless of telemetry state — that request is independent of this pipeline.) ## What raid collects @@ -27,7 +27,7 @@ raid ships an **opt-in, anonymous** CLI telemetry pipeline so the team can prior - **Command bodies.** `cmd:`, `path:`, `runner:`, `src:`, `dest:`, `url:` — never sent. - **Variable values.** Anything set by a `Set` task or passed in env vars. -- **Argument values.** `RAID_ARG_*`, declared args, declared flag values. +- **Argument values.** `RAID_ARG_*`, declared args, declared flag values. The one exception is the explicit opt-out reason: `raid telemetry off --why "..."` sends the free-text string you typed as the `reason` property on `raid_telemetry_opt_out` — and only on that event. raid never collects argument or flag values implicitly. - **Stdout / stderr.** Task output is never captured by the telemetry layer. - **Identifiers.** Username, hostname, IP, MAC, OS version beyond `darwin`/`linux`/`windows`, terminal emulator — none. - **File paths beyond their kind.** `task_types: ["shell", "shell", "print"]` is fine; the actual paths are not. diff --git a/site/docs/usage/custom.mdx b/site/docs/usage/custom.mdx index f943e7f..c6dc3e7 100644 --- a/site/docs/usage/custom.mdx +++ b/site/docs/usage/custom.mdx @@ -152,7 +152,7 @@ Each repository that defines commands appears as a subcommand in `raid --help`. ## Constraints -Custom command names cannot shadow reserved built-in CLI names: `profile`, `install`, `env`, `doctor`, `context`, `help`, `version`, `completion`. +Custom command names cannot shadow reserved built-in CLI names: `profile`, `install`, `env`, `doctor`, `context`, `telemetry`, `help`, `version`, `completion`. ## Running tasks in parallel diff --git a/src/cmd/raid.go b/src/cmd/raid.go index 9db78d5..4c34529 100644 --- a/src/cmd/raid.go +++ b/src/cmd/raid.go @@ -169,11 +169,22 @@ func executeRoot(args []string) int { // First-run consent prompt for telemetry. Runs only for non-info, // non-telemetry-subcommand invocations to avoid prompting on // `raid --help`, `raid telemetry on`, and similar. The prompt - // itself no-ops when stdin isn't a TTY or when --yes/--headless - // is set, so this is safe in CI / pipes / agent hosts. See - // telemetry.MaybePromptForConsent for the full skip matrix. + // itself no-ops when stdin isn't a TTY, when --yes/--headless is + // set, when --json is set, or when RAID_HEADLESS=1 in the env + // (CI / agent-host opt-in path), so it's safe in non-interactive + // contexts. See telemetry.MaybePromptForConsent for the full skip + // matrix. if !info && !isTelemetrySubcommand(args) { - _ = telemetry.MaybePromptForConsent(headlessFromArgs(args) || jsonModeFromArgs(args)) + skip := headlessFromArgs(args) || jsonModeFromArgs(args) || lib.IsHeadless() + switch telemetry.MaybePromptForConsent(skip) { + case telemetry.PromptAccepted: + // User opted in via the first-run prompt — fire the + // adoption event so `raid telemetry on` and the prompt + // path both produce raid_first_run. Synchronous so the + // event lands even if the user's command crashes before + // Flush runs. + telemetry.CaptureSync(telemetry.EventFirstRun, telemetry.FirstRunProps("")) + } } // Flush any pending telemetry events before exit so async sends @@ -241,12 +252,30 @@ func applyHeadlessFlag(cmd *cobra.Command, _ []string) error { // skip these — prompting "do you want telemetry?" right before // running `raid telemetry on` is hostile UX, and the off/status/ // purge/preview commands need to work for users who haven't opted in. +// +// Flag-aware: persistent flags that take a value (`--config ` +// / `-c `) consume the following token, so an invocation like +// `raid --config telemetry install` should resolve to the `install` +// subcommand and not be misread as `telemetry`. The bool persistent +// flags (`--json`, `--yes`/`-y`, `--headless`) do not consume a +// following token. func isTelemetrySubcommand(args []string) bool { + skipNext := false for _, a := range args[1:] { if a == "--" { return false } + if skipNext { + skipNext = false + continue + } if strings.HasPrefix(a, "-") { + // Only the value-taking root flags consume the next + // token (and only in their bare form — `--config=path` + // keeps the value attached). + if a == "--config" || a == "-c" { + skipNext = true + } continue } return a == "telemetry" @@ -260,19 +289,25 @@ func isTelemetrySubcommand(args []string) bool { // it can skip prompting in non-interactive contexts. Matches every // flag form: `-y`, `--yes`, `--yes=true`, `--headless`, // `--headless=true`, plus their explicit `=false` opt-outs. +// +// Mirrors pflag's "last value wins" behavior: if the user passes +// `--yes=true --yes=false`, the parsed value is false, so this scan +// must also resolve to false. We walk the full arg list and only +// commit the final occurrence. func headlessFromArgs(args []string) bool { + out := false for _, a := range args[1:] { if a == "--" { break } switch { case a == "-y", a == "--yes", a == "--yes=true", a == "--headless", a == "--headless=true": - return true + out = true case a == "--yes=false", a == "--headless=false": - return false + out = false } } - return false + return out } // jsonModeFromArgs reports whether the user passed `--json` (or diff --git a/src/cmd/telemetry/telemetry.go b/src/cmd/telemetry/telemetry.go index 1d0e956..025ac60 100644 --- a/src/cmd/telemetry/telemetry.go +++ b/src/cmd/telemetry/telemetry.go @@ -69,8 +69,9 @@ var offCmd = &cobra.Command{ why, _ := cmd.Flags().GetString("why") // Fire the opt-out BEFORE flipping consent off — otherwise // IsActive() would return false and the event would be - // dropped. CaptureSync also blocks until delivery so the - // event lands even though raid exits immediately after. + // dropped. CaptureSync blocks on the HTTP attempt (best-effort: + // network/non-2xx errors are silently dropped) so the event + // gets a real chance to land before raid exits. libtelemetry.CaptureSync(libtelemetry.EventTelemetryOptOut, libtelemetry.OptOutProps(why)) if err := libtelemetry.SetEnabled(false); err != nil { diff --git a/src/internal/lib/task_runner.go b/src/internal/lib/task_runner.go index 27ba3c3..c0f4f1a 100644 --- a/src/internal/lib/task_runner.go +++ b/src/internal/lib/task_runner.go @@ -205,8 +205,9 @@ func ExecuteTask(task Task) error { // Sampled at the call site to keep PostHog volume bounded for // commands with hundreds of tasks. // -// Sampling and Capture both fast-path when telemetry is off, so the -// per-task overhead when opted out is effectively zero. +// Sampled fast-paths via telemetry.IsActive when telemetry is off, so +// the per-task overhead when opted out is effectively zero (no RNG +// call). func captureTaskTelemetry(task Task, err error, dur time.Duration) { if !telemetry.Sampled() { return diff --git a/src/internal/telemetry/events.go b/src/internal/telemetry/events.go index c282560..0d955a7 100644 --- a/src/internal/telemetry/events.go +++ b/src/internal/telemetry/events.go @@ -16,8 +16,10 @@ package telemetry // non-sensitive — it's a label the project author chose, not // anything the end user typed in. // - taskCount: total task entries in the command. -// - taskTypes: distinct task-type strings (Shell, Script, …). -// Types only, never the cmd body or args. +// - taskTypes: ordered list of task-type strings (Shell, Script, …), +// one entry per task in the command — duplicates are preserved so +// the per-command structure stays visible. Types only, never the +// cmd body or args. // - durationMs: wall-clock command duration in milliseconds. func CommandExecutedProps(commandName string, taskCount int, taskTypes []string, durationMs int64) map[string]any { return map[string]any{ diff --git a/src/internal/telemetry/id.go b/src/internal/telemetry/id.go index 26624f0..35c18c8 100644 --- a/src/internal/telemetry/id.go +++ b/src/internal/telemetry/id.go @@ -78,25 +78,37 @@ func loadIDIfExists() string { // fresh one if the file doesn't exist. Empty return means we couldn't // resolve the path or persist the value — Capture treats that as "do // nothing" rather than blocking the user's command. +// +// Cross-process safe: the create path uses O_CREATE|O_EXCL so two +// concurrent first-run raid invocations can't both win and write +// different IDs. The loser observes EEXIST and re-reads whatever the +// winner persisted, so every process ends up with the same +// distinct_id from the first event onward. func loadOrCreateID() string { if id := loadIDIfExists(); id != "" { return id } idMu.Lock() defer idMu.Unlock() - // Recheck under the lock — another goroutine may have raced ahead. + // Recheck under the lock — another in-process goroutine may have + // raced ahead while we waited. if idCached != "" { return idCached } + path := IDPath() + if path == "" { + return "" + } id, err := newID() if err != nil { return "" } - if err := writeID(id); err != nil { + persisted, err := writeIDExclusive(path, id) + if err != nil { return "" } - idCached = id - return id + idCached = persisted + return persisted } // newID generates a fresh UUIDv4 from crypto/rand. We don't depend on @@ -118,19 +130,37 @@ func newID() (string, error) { ), nil } -// writeID persists the ID to disk, creating ~/.config/raid/ if -// needed. Permissions are 0600 because this is a stable identifier -// for the user's machine — not a secret, but no reason to leave it -// world-readable either. -func writeID(id string) error { - path := IDPath() - if path == "" { - return fmt.Errorf("telemetry: no home directory resolvable") - } +// writeIDExclusive persists the ID at path using O_CREATE|O_EXCL so +// concurrent callers can't clobber each other's value. If the file +// already exists (another process won the race), the existing +// contents are read and returned instead. Permissions are 0600 +// because this is a stable identifier for the user's machine — not a +// secret, but no reason to leave it world-readable either. +func writeIDExclusive(path, id string) (string, error) { if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil { - return err + return "", err + } + f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0600) + if err != nil { + if os.IsExist(err) { + // Another process won the race — read what they wrote. + data, readErr := os.ReadFile(path) + if readErr != nil { + return "", readErr + } + existing := strings.TrimSpace(string(data)) + if existing == "" { + return "", fmt.Errorf("telemetry: id file present but empty at %s", path) + } + return existing, nil + } + return "", err + } + defer f.Close() + if _, err := f.WriteString(id + "\n"); err != nil { + return "", err } - return os.WriteFile(path, []byte(id+"\n"), 0600) + return id, nil } // PurgeID deletes the on-disk ID file. PostHog can't link future diff --git a/src/internal/telemetry/prompt.go b/src/internal/telemetry/prompt.go index 1df6921..863b324 100644 --- a/src/internal/telemetry/prompt.go +++ b/src/internal/telemetry/prompt.go @@ -15,8 +15,11 @@ type PromptResult int const ( // PromptSkipped means we never showed the prompt (non-interactive - // context, DO_NOT_TRACK, already decided, no API key). Consent - // state was marked decided=off so we won't try again. + // context, DO_NOT_TRACK, already decided, no API key). For most + // skip reasons consent is also marked decided=off so we won't try + // again. Exception: the "no API key" branch (dev builds where + // telemetry is dead code) returns PromptSkipped without persisting + // any consent state — there's nothing useful to remember. PromptSkipped PromptResult = iota // PromptDeclined means the user explicitly chose no. PromptDeclined diff --git a/src/internal/telemetry/sampling.go b/src/internal/telemetry/sampling.go index ace38ea..247c41d 100644 --- a/src/internal/telemetry/sampling.go +++ b/src/internal/telemetry/sampling.go @@ -19,8 +19,12 @@ var rngFn = func() float64 { return rand.Float64() } // Sampled reports whether a task event should be captured this time // per TaskSampleRate. Caller fires Capture only when this returns -// true. +// true. Fast-paths when telemetry is inactive so opted-out users don't +// pay the per-task RNG call. func Sampled() bool { + if !IsActive() { + return false + } if TaskSampleRate <= 0 { return false } diff --git a/src/internal/telemetry/telemetry.go b/src/internal/telemetry/telemetry.go index 8ab93f6..0c50df4 100644 --- a/src/internal/telemetry/telemetry.go +++ b/src/internal/telemetry/telemetry.go @@ -15,8 +15,13 @@ // 4. executeRoot calls Flush at the end of every invocation so // in-flight events finish (or get dropped on timeout). // -// The package is read-only side-effect-free if the user never opts in: -// no goroutines spawned, no files written, no HTTP calls. +// The package is read-only side-effect-free for users who never opt +// in via prompt or `raid telemetry on`: no goroutines spawned, no +// HTTP calls. The one exception is the consent-decision marker — +// MaybePromptForConsent persists a "decided=off" entry to viper when +// the prompt is skipped (DO_NOT_TRACK, non-TTY, headless) so we don't +// re-prompt on the next interactive run. No anonymous ID is created +// or written until the user explicitly opts in. package telemetry import ( @@ -67,9 +72,13 @@ var httpClient = &http.Client{Timeout: 2 * time.Second} // returns and the event would be dropped. var inflight sync.WaitGroup -// Capture is the public hook every event fires through. It returns -// immediately; the network call (if any) runs on a goroutine. Capture -// is safe to call even when telemetry isn't active — it just no-ops. +// Capture is the public hook every event fires through. The network +// POST runs on a goroutine; Capture itself only blocks on +// loadOrCreateID(), which can touch the filesystem (home-dir +// resolution, mkdir, write) the first time an opted-in user fires an +// event. Subsequent calls hit the in-process cache and return without +// disk I/O. Capture is safe to call even when telemetry isn't active — +// it just no-ops. // // Callers must not pass sensitive content in properties. Sanitization // is enforced upstream by the event builders, not here. @@ -90,9 +99,11 @@ func Capture(name string, properties map[string]any) { } // CaptureSync is Capture's blocking variant. Used by `raid telemetry -// off` so the opt-out event is guaranteed to land before the process -// exits — we don't want users to flip telemetry off and have the -// opt-out event silently dropped by Flush's timeout. +// off` so the opt-out event is attempted synchronously before the +// process exits — we want to give the event the best chance to land +// rather than rely on Flush's timeout. This is best-effort: send() +// silently drops network and non-2xx errors, so delivery isn't +// guaranteed, just synchronously attempted. func CaptureSync(name string, properties map[string]any) { if !IsActive() { return @@ -192,9 +203,11 @@ func send(evt Event) { // given event, without sending it. Used by `raid telemetry preview` // so users can see exactly what raid emits before opting in. // -// Pretty-prints the JSON for human inspection. Returns an empty -// string when no anonymous ID exists yet (the user hasn't opted in -// and we don't want to create the id file just for a preview). +// Pretty-prints the JSON for human inspection. When no anonymous ID +// exists yet (the user hasn't opted in and we don't want to create +// the id file just for a preview), a placeholder string is shown in +// the distinct_id field so the preview still renders the full payload +// shape. Returns an empty string only on JSON marshaling failure. func PreviewPayload(name string, properties map[string]any) string { id := loadIDIfExists() if id == "" { diff --git a/src/internal/telemetry/telemetry_test.go b/src/internal/telemetry/telemetry_test.go index d827210..9174e24 100644 --- a/src/internal/telemetry/telemetry_test.go +++ b/src/internal/telemetry/telemetry_test.go @@ -42,6 +42,8 @@ func setupTestEnv(t *testing.T) { prevRng := rngFn prevHomeDirFn := homeDirFn prevIsInteractive := isInteractiveFn + prevPromptIn := promptInFn + prevPromptOut := promptOutFn os.Setenv(IDFileEnv, idPath) os.Unsetenv(DoNotTrackEnvVar) @@ -63,6 +65,8 @@ func setupTestEnv(t *testing.T) { rngFn = prevRng homeDirFn = prevHomeDirFn isInteractiveFn = prevIsInteractive + promptInFn = prevPromptIn + promptOutFn = prevPromptOut resetIDCacheForTest() viper.Reset() _ = prevConfig diff --git a/src/internal/telemetry/version.go b/src/internal/telemetry/version.go index 33646dc..dec9220 100644 --- a/src/internal/telemetry/version.go +++ b/src/internal/telemetry/version.go @@ -4,9 +4,10 @@ import "github.com/8bitalex/raid/src/resources" // raidVersionFromResources pulls the version from the embedded // app.properties so every event reports the binary's actual version. -// Failures fall back to empty string — events still send, the -// `raid_version` field is just absent. Telemetry never blocks a -// command on a bad lookup. +// On lookup failure this returns the empty string; enrichProperties +// still writes it as `raid_version=""` rather than omitting the +// field — events always send, even if the version label is blank. +// Telemetry never blocks a command on a bad lookup. func raidVersionFromResources() string { v, err := resources.GetProperty(resources.PropertyVersion) if err != nil { From 11c859ad58b23aff8d653db4b07205c95475311a Mon Sep 17 00:00:00 2001 From: "Mr. Meeseeks" Date: Wed, 13 May 2026 22:00:14 -0700 Subject: [PATCH 3/4] test: align Sampled() tests with IsActive fast-path Sampled() now short-circuits when telemetry is inactive, so the existing rate=1 / intermediate tests must enable consent before exercising the rate logic. Adds an explicit inactive-fast-path test to pin the new behavior. --- src/internal/telemetry/telemetry_test.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/internal/telemetry/telemetry_test.go b/src/internal/telemetry/telemetry_test.go index 9174e24..cc34ad3 100644 --- a/src/internal/telemetry/telemetry_test.go +++ b/src/internal/telemetry/telemetry_test.go @@ -323,6 +323,9 @@ func TestPurgeID_idempotent(t *testing.T) { func TestSampled_rateZeroNeverFires(t *testing.T) { setupTestEnv(t) + if err := SetEnabled(true); err != nil { + t.Fatal(err) + } TaskSampleRate = 0 for i := 0; i < 100; i++ { if Sampled() { @@ -333,6 +336,9 @@ func TestSampled_rateZeroNeverFires(t *testing.T) { func TestSampled_rateOneAlwaysFires(t *testing.T) { setupTestEnv(t) + if err := SetEnabled(true); err != nil { + t.Fatal(err) + } TaskSampleRate = 1 for i := 0; i < 100; i++ { if !Sampled() { @@ -341,8 +347,23 @@ func TestSampled_rateOneAlwaysFires(t *testing.T) { } } +func TestSampled_inactiveTelemetryNeverFires(t *testing.T) { + setupTestEnv(t) + TaskSampleRate = 1 + // IsActive is false (no SetEnabled call), so even rate=1 must + // short-circuit — opted-out users pay zero per-task RNG cost. + for i := 0; i < 100; i++ { + if Sampled() { + t.Fatal("Sampled() returned true while telemetry inactive") + } + } +} + func TestSampled_intermediateRateUsesRNG(t *testing.T) { setupTestEnv(t) + if err := SetEnabled(true); err != nil { + t.Fatal(err) + } TaskSampleRate = 0.5 // Force-deterministic: alternating 0.0 / 0.9 means exactly half // the samples (the ones below 0.5) should fire. From a3403ac85422682c4b9cbb508408b9d8a09f9001 Mon Sep 17 00:00:00 2001 From: "Mr. Meeseeks" Date: Wed, 13 May 2026 22:04:31 -0700 Subject: [PATCH 4/4] test: cover lines flagged by Codecov patch check Adds tests for paths the initial PR didn't reach: - loadOrCreateID race-loser path (existing ID file is adopted, not overwritten) and the home-dir-error fast fail. - loadIDIfExists empty-on-missing. - CaptureSync send + no-op-when-inactive. - send() swallows network errors silently (Flush still returns). - PreviewPayload renders a placeholder when no ID exists. - headlessFromArgs last-value-wins (mirrors pflag). - isTelemetrySubcommand flag-value handling (--config / -c). --- src/cmd/raid_test.go | 57 ++++++++++++ src/internal/telemetry/telemetry_test.go | 109 +++++++++++++++++++++++ 2 files changed, 166 insertions(+) diff --git a/src/cmd/raid_test.go b/src/cmd/raid_test.go index 1d8c609..f257c8b 100644 --- a/src/cmd/raid_test.go +++ b/src/cmd/raid_test.go @@ -100,6 +100,63 @@ func TestExecuteRoot_structuredErrorRouting(t *testing.T) { } } +func TestHeadlessFromArgs(t *testing.T) { + tests := []struct { + name string + args []string + want bool + }{ + {"no flag", []string{"raid", "install"}, false}, + {"-y short", []string{"raid", "-y", "install"}, true}, + {"long --yes", []string{"raid", "--yes", "install"}, true}, + {"--headless", []string{"raid", "--headless", "install"}, true}, + {"--yes=true", []string{"raid", "--yes=true", "install"}, true}, + {"--yes=false", []string{"raid", "--yes=false", "install"}, false}, + // pflag uses the last value when a flag is repeated; the + // pre-cobra scan must mirror that. + {"last value wins true→false", []string{"raid", "--yes=true", "--yes=false", "install"}, false}, + {"last value wins false→true", []string{"raid", "--headless=false", "--headless", "install"}, true}, + {"after --", []string{"raid", "--", "--yes"}, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := headlessFromArgs(tt.args); got != tt.want { + t.Errorf("headlessFromArgs(%v) = %v, want %v", tt.args, got, tt.want) + } + }) + } +} + +func TestIsTelemetrySubcommand(t *testing.T) { + tests := []struct { + name string + args []string + want bool + }{ + {"no args", []string{"raid"}, false}, + {"direct", []string{"raid", "telemetry"}, true}, + {"with sub", []string{"raid", "telemetry", "on"}, true}, + {"bool flag before", []string{"raid", "--json", "telemetry"}, true}, + {"--yes before", []string{"raid", "--yes", "telemetry", "off"}, true}, + // Critical: --config consumes the next token, so the + // `telemetry` here is the config path, not the subcommand. + {"config value mistaken", []string{"raid", "--config", "telemetry", "install"}, false}, + {"-c short value", []string{"raid", "-c", "telemetry", "install"}, false}, + // --config=value attaches the value, so the next token is + // the real subcommand. + {"config attached value", []string{"raid", "--config=path", "telemetry"}, true}, + {"different subcommand", []string{"raid", "install"}, false}, + {"after --", []string{"raid", "--", "telemetry"}, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := isTelemetrySubcommand(tt.args); got != tt.want { + t.Errorf("isTelemetrySubcommand(%v) = %v, want %v", tt.args, got, tt.want) + } + }) + } +} + func TestJsonModeFromArgs(t *testing.T) { tests := []struct { name string diff --git a/src/internal/telemetry/telemetry_test.go b/src/internal/telemetry/telemetry_test.go index cc34ad3..edeadd5 100644 --- a/src/internal/telemetry/telemetry_test.go +++ b/src/internal/telemetry/telemetry_test.go @@ -319,6 +319,115 @@ func TestPurgeID_idempotent(t *testing.T) { } } +func TestLoadOrCreateID_reusesExistingFile(t *testing.T) { + // Simulates a concurrent-process race: another raid invocation + // already wrote an ID before this one calls loadOrCreateID. The + // O_CREATE|O_EXCL path must observe the existing file and adopt + // the same value rather than overwriting with a fresh UUID. + setupTestEnv(t) + path := IDPath() + if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil { + t.Fatal(err) + } + preset := "11111111-2222-4333-8444-555555555555" + if err := os.WriteFile(path, []byte(preset+"\n"), 0600); err != nil { + t.Fatal(err) + } + got := loadOrCreateID() + if got != preset { + t.Errorf("loadOrCreateID = %q, want preset %q", got, preset) + } +} + +func TestLoadOrCreateID_emptyOnHomeDirError(t *testing.T) { + setupTestEnv(t) + // Force the home-dir resolver to fail AND clear the env override + // so IDPath returns "". loadOrCreateID must fail closed (empty + // string) rather than panic or write somewhere unexpected. + os.Unsetenv(IDFileEnv) + homeDirFn = func() (string, error) { return "", os.ErrPermission } + if got := loadOrCreateID(); got != "" { + t.Errorf("loadOrCreateID = %q, want \"\" on home-dir failure", got) + } +} + +func TestLoadIDIfExists_returnsEmptyWhenFileMissing(t *testing.T) { + setupTestEnv(t) + if got := loadIDIfExists(); got != "" { + t.Errorf("loadIDIfExists on fresh env = %q, want empty", got) + } +} + +// --- CaptureSync --- + +func TestCaptureSync_sendsImmediately(t *testing.T) { + setupTestEnv(t) + if err := SetEnabled(true); err != nil { + t.Fatal(err) + } + var hits int32 + srv := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) { + atomic.AddInt32(&hits, 1) + })) + defer srv.Close() + CaptureEndpoint = srv.URL + + CaptureSync(EventTelemetryOptOut, OptOutProps("test")) + // No Flush — CaptureSync blocks on the request itself. + if got := atomic.LoadInt32(&hits); got != 1 { + t.Errorf("CaptureSync delivered %d events, want 1", got) + } +} + +func TestCaptureSync_noopWhenInactive(t *testing.T) { + setupTestEnv(t) + // IsActive is false (no SetEnabled). CaptureSync must short-circuit. + var hits int32 + srv := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) { + atomic.AddInt32(&hits, 1) + })) + defer srv.Close() + CaptureEndpoint = srv.URL + CaptureSync(EventTelemetryOptOut, OptOutProps("ignored")) + if got := atomic.LoadInt32(&hits); got != 0 { + t.Errorf("inactive CaptureSync sent %d events, want 0", got) + } +} + +// --- send: silent on errors --- + +func TestSend_swallowsNetworkErrors(t *testing.T) { + setupTestEnv(t) + if err := SetEnabled(true); err != nil { + t.Fatal(err) + } + // Point at a closed server — Dial will fail synchronously, but + // send() must absorb the error without panicking and Flush must + // still return cleanly. + srv := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) {})) + closedURL := srv.URL + srv.Close() + CaptureEndpoint = closedURL + Capture(EventCommandExecuted, CommandExecutedProps("x", 0, nil, 0)) + Flush(1 * time.Second) // must not hang +} + +// --- PreviewPayload --- + +func TestPreviewPayload_placeholderWhenNoID(t *testing.T) { + setupTestEnv(t) + // No prior ID file → preview should still render (with a + // placeholder in the distinct_id slot) rather than returning + // empty. The comment contract was updated to match this. + payload := PreviewPayload(EventCommandExecuted, CommandExecutedProps("build", 0, nil, 0)) + if payload == "" { + t.Fatal("preview empty when no ID file exists") + } + if !strings.Contains(payload, "no-id-yet") { + t.Errorf("preview missing placeholder marker: %s", payload) + } +} + // --- Sampling --- func TestSampled_rateZeroNeverFires(t *testing.T) {