diff --git a/CHANGELOG.md b/CHANGELOG.md index d810fca7..f5cbc7ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.18.9] - 2026-04-24 + +### Fixed + +- **`wfctl ci run --phase deploy` now uses env-resolved module name** — `pluginDeployProvider.resourceName` was populated from `m.Name` (base config name) instead of `ResolvedModule.Name` (env-override lifted from `Config["name"]`). When infra apply had env-renamed a module (e.g. BMW's `bmw-app` → `bmw-staging` for staging), the deploy phase used the base name for `driver.Read` lookup, didn't find the resource, and went down the Create path — producing duplicate DO resources. Same class as v0.18.7 Task #32 fix, but in the ci-run code path. Root cause from BMW deploy run 24888583717. +- **`infra_output` source resolution now applies env override to module name** — `secrets.generate[].source: "bmw-database.uri"` now resolves to the env-resolved state key (e.g. `bmw-staging-db`) when `--env staging` is set, matching how infra apply persists state. Closes task #56. + +### Tests + +- `cmd/wfctl/deploy_providers_env_test.go` — `TestPluginDeployProvider_UsesEnvResolvedName`, `TestPluginDeployProvider_FallsBackToModuleNameWhenNoEnv` +- `cmd/wfctl/infra_secrets_env_test.go` — `TestInfraOutput_EnvResolvesModuleSource`, `TestInfraOutput_NoEnvUsesBaseName` + ## [0.18.8] - 2026-04-24 ### Fixed diff --git a/cmd/wfctl/deploy_providers.go b/cmd/wfctl/deploy_providers.go index f7484416..7fea7c9d 100644 --- a/cmd/wfctl/deploy_providers.go +++ b/cmd/wfctl/deploy_providers.go @@ -703,18 +703,19 @@ func newPluginDeployProvider(providerName string, wfCfg *config.WorkflowConfig, return nil, fmt.Errorf("unsupported deploy provider %q (built-ins: kubernetes, docker, aws-ecs; to use a plugin provider, declare an iac.provider module in your workflow config)%s", providerName, fmt.Sprintf(hint, providerName)) } - // resolveModCfg returns the effective config map for m after applying the + // resolveModule returns the effective ResolvedModule for m after applying the // per-env overlay (when envName is set). ok=false means the module is // explicitly deleted for this env and should be skipped. - resolveModCfg := func(m *config.ModuleConfig) (map[string]any, bool) { + // Callers must read resolved.Name (not m.Name) to get the env-overridden identity. + resolveModule := func(m *config.ModuleConfig) (*config.ResolvedModule, bool) { if envName == "" { - return m.Config, true + return &config.ResolvedModule{ + Name: m.Name, + Type: m.Type, + Config: m.Config, + }, true } - resolved, ok := m.ResolveForEnv(envName) - if !ok { - return nil, false - } - return resolved.Config, true + return m.ResolveForEnv(envName) } // Find the iac.provider module matching the requested provider name. @@ -725,14 +726,14 @@ func newPluginDeployProvider(providerName string, wfCfg *config.WorkflowConfig, if m.Type != "iac.provider" { continue } - cfg, ok := resolveModCfg(m) + resolved, ok := resolveModule(m) if !ok { continue } - cfgProvider, _ := cfg["provider"].(string) - if cfgProvider == providerName || m.Name == providerName { - providerModName = m.Name - providerModCfg = cfg + cfgProvider, _ := resolved.Config["provider"].(string) + if cfgProvider == providerName || resolved.Name == providerName { + providerModName = resolved.Name + providerModCfg = resolved.Config break } } @@ -761,14 +762,14 @@ func newPluginDeployProvider(providerName string, wfCfg *config.WorkflowConfig, if m.Type != target { continue } - cfg, ok := resolveModCfg(m) + resolved, ok := resolveModule(m) if !ok { continue } - if p, _ := cfg["provider"].(string); p == providerModName { - resourceName = m.Name - resourceType = m.Type - resourceCfg = cfg + if p, _ := resolved.Config["provider"].(string); p == providerModName { + resourceName = resolved.Name + resourceType = resolved.Type + resourceCfg = resolved.Config return true } } @@ -786,16 +787,16 @@ func newPluginDeployProvider(providerName string, wfCfg *config.WorkflowConfig, if m.Type == "iac.provider" || m.Type == "" { continue } - cfg, ok := resolveModCfg(m) + resolved, ok := resolveModule(m) if !ok { continue } - if p, _ := cfg["provider"].(string); p == providerModName { + if p, _ := resolved.Config["provider"].(string); p == providerModName { fmt.Fprintf(os.Stderr, "warning: no deploy-target module (%v) found for provider %q; falling back to first infra module %q (type %q)\n", - deployTargetTypes, providerModName, m.Name, m.Type) - resourceName = m.Name - resourceType = m.Type - resourceCfg = cfg + deployTargetTypes, providerModName, resolved.Name, resolved.Type) + resourceName = resolved.Name + resourceType = resolved.Type + resourceCfg = resolved.Config break } } diff --git a/cmd/wfctl/deploy_providers_env_test.go b/cmd/wfctl/deploy_providers_env_test.go new file mode 100644 index 00000000..edd467dd --- /dev/null +++ b/cmd/wfctl/deploy_providers_env_test.go @@ -0,0 +1,73 @@ +package main + +import ( + "testing" + + "github.com/GoCodeAlone/workflow/config" +) + +func TestPluginDeployProvider_UsesEnvResolvedName(t *testing.T) { + wfCfg := &config.WorkflowConfig{ + Modules: []config.ModuleConfig{ + { + Name: "do-provider", + Type: "iac.provider", + Config: map[string]any{"provider": "digitalocean"}, + }, + { + Name: "bmw-app", + Type: "infra.container_service", + Config: map[string]any{"provider": "do-provider"}, + Environments: map[string]*config.InfraEnvironmentResolution{ + "staging": { + Config: map[string]any{"name": "bmw-staging"}, + }, + }, + }, + }, + } + + dp, err := newPluginDeployProvider("digitalocean", wfCfg, "staging") + if err != nil { + t.Fatalf("newPluginDeployProvider: %v", err) + } + + pdp, ok := dp.(*pluginDeployProvider) + if !ok { + t.Fatalf("expected *pluginDeployProvider, got %T", dp) + } + if pdp.resourceName != "bmw-staging" { + t.Errorf("resourceName = %q, want %q (env-resolved name)", pdp.resourceName, "bmw-staging") + } +} + +func TestPluginDeployProvider_FallsBackToModuleNameWhenNoEnv(t *testing.T) { + wfCfg := &config.WorkflowConfig{ + Modules: []config.ModuleConfig{ + { + Name: "do-provider", + Type: "iac.provider", + Config: map[string]any{"provider": "digitalocean"}, + }, + { + Name: "bmw-app", + Type: "infra.container_service", + Config: map[string]any{"provider": "do-provider"}, + // NOTE: no Environments block — base name should be used + }, + }, + } + + dp, err := newPluginDeployProvider("digitalocean", wfCfg, "") + if err != nil { + t.Fatalf("newPluginDeployProvider: %v", err) + } + + pdp, ok := dp.(*pluginDeployProvider) + if !ok { + t.Fatalf("expected *pluginDeployProvider, got %T", dp) + } + if pdp.resourceName != "bmw-app" { + t.Errorf("resourceName = %q, want %q (base module name when no env)", pdp.resourceName, "bmw-app") + } +} diff --git a/cmd/wfctl/infra.go b/cmd/wfctl/infra.go index 7308ca60..c43592a1 100644 --- a/cmd/wfctl/infra.go +++ b/cmd/wfctl/infra.go @@ -825,7 +825,24 @@ func runInfraApply(args []string) error { return fmt.Errorf("resolve secrets provider for infra_output sync: %w", err) } states := loadCurrentState(cfgFile, envName) - return syncInfraOutputSecrets(ctx, secretsCfg, secretsProvider, states) + // Only reload the workflow config when env resolution is actually needed: + // it is needed only when --env is set AND at least one infra_output secret + // generator is configured (otherwise syncInfraOutputSecrets is a no-op for + // env resolution regardless). + var wfCfg *config.WorkflowConfig + if envName != "" { + for _, g := range secretsCfg.Generate { + if g.Type == "infra_output" { + var loadErr error + wfCfg, loadErr = config.LoadFromFile(cfgFile) + if loadErr != nil { + return fmt.Errorf("load config for infra_output env resolution: %w", loadErr) + } + break + } + } + } + return syncInfraOutputSecrets(ctx, secretsCfg, secretsProvider, states, wfCfg, envName) } func runInfraStatus(args []string) error { diff --git a/cmd/wfctl/infra_output_secrets.go b/cmd/wfctl/infra_output_secrets.go index 1265a617..80697c74 100644 --- a/cmd/wfctl/infra_output_secrets.go +++ b/cmd/wfctl/infra_output_secrets.go @@ -4,7 +4,10 @@ import ( "context" "errors" "fmt" + "sort" + "strings" + "github.com/GoCodeAlone/workflow/config" "github.com/GoCodeAlone/workflow/interfaces" "github.com/GoCodeAlone/workflow/secrets" ) @@ -23,10 +26,78 @@ func buildStateOutputsMap(states []interfaces.ResourceState) map[string]map[stri return m } +// resolveInfraOutput resolves a single "module.field" source string against the +// pre-loaded state outputs map, applying per-env module name resolution so that +// a source like "bmw-database.uri" finds the state keyed by the env-resolved +// name (e.g. "bmw-staging-db") when --env staging renames the module. +// +// wfCfg may be nil (e.g. tests that only care about base-name resolution). +// When envName is empty no resolution is performed and the source module name +// is used verbatim. +func resolveInfraOutput(wfCfg *config.WorkflowConfig, source, envName string, stateOutputs map[string]map[string]any) (string, error) { + if source == "" { + return "", fmt.Errorf("infra_output: source is required (format: \"module.field\")") + } + dot := strings.Index(source, ".") + if dot < 1 || dot >= len(source)-1 { + return "", fmt.Errorf("infra_output: invalid source %q: expected \"module.field\" format", source) + } + moduleName := source[:dot] + field := source[dot+1:] + + // Apply env resolution: the state was persisted under the env-resolved name. + if envName != "" && wfCfg != nil { + for i := range wfCfg.Modules { + m := &wfCfg.Modules[i] + if m.Name != moduleName { + continue + } + resolved, ok := m.ResolveForEnv(envName) + if !ok { + return "", fmt.Errorf("infra_output: module %q is explicitly disabled for environment %q — cannot read infra_output from a disabled module", moduleName, envName) + } + if resolved.Name != "" { + moduleName = resolved.Name + } + break + } + } + + if stateOutputs == nil { + return "", fmt.Errorf("infra_output: state outputs not available for source %q — did infra apply succeed?", source) + } + outputs, ok := stateOutputs[moduleName] + if !ok { + return "", fmt.Errorf("infra_output: module %q not found in state (available: %s)", moduleName, strings.Join(stateKeys(stateOutputs), ", ")) + } + val, ok := outputs[field] + if !ok { + return "", fmt.Errorf("infra_output: field %q not found in outputs of module %q", field, moduleName) + } + s, ok := val.(string) + if !ok { + return "", fmt.Errorf("infra_output: output field %q of module %q is %T, expected string", field, moduleName, val) + } + return s, nil +} + +// stateKeys returns the sorted keys of a state outputs map for error messages. +func stateKeys(m map[string]map[string]any) []string { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + sort.Strings(keys) + return keys +} + // syncInfraOutputSecrets writes infra_output-typed secrets after a successful // apply. It skips secrets that already exist in the provider so idempotent // re-runs never overwrite live values. -func syncInfraOutputSecrets(ctx context.Context, secretsCfg *SecretsConfig, provider secrets.Provider, states []interfaces.ResourceState) error { +// wfCfg and envName are used to resolve source module names through per-env +// overrides so that "bmw-database.uri" finds "bmw-staging-db" in state when +// --env staging renames the module. +func syncInfraOutputSecrets(ctx context.Context, secretsCfg *SecretsConfig, provider secrets.Provider, states []interfaces.ResourceState, wfCfg *config.WorkflowConfig, envName string) error { if secretsCfg == nil { return nil } @@ -89,11 +160,7 @@ func syncInfraOutputSecrets(ctx context.Context, secretsCfg *SecretsConfig, prov continue } - genConfig := map[string]any{ - "source": gen.Source, - "_state_outputs": stateOutputs, - } - value, err := generateSecret(ctx, "infra_output", genConfig) + value, err := resolveInfraOutput(wfCfg, gen.Source, envName, stateOutputs) if err != nil { return fmt.Errorf("generate infra_output secret %q: %w", gen.Key, err) } diff --git a/cmd/wfctl/infra_output_secrets_test.go b/cmd/wfctl/infra_output_secrets_test.go index c7da7b6f..ae05c7c8 100644 --- a/cmd/wfctl/infra_output_secrets_test.go +++ b/cmd/wfctl/infra_output_secrets_test.go @@ -109,7 +109,7 @@ func TestBuildStateOutputsMap_Empty(t *testing.T) { func TestSyncInfraOutputSecrets_NilConfig(t *testing.T) { p := newSimpleProvider() - err := syncInfraOutputSecrets(context.Background(), nil, p, sampleStates()) + err := syncInfraOutputSecrets(context.Background(), nil, p, sampleStates(), nil, "") if err != nil { t.Fatalf("nil config should be no-op: %v", err) } @@ -125,7 +125,7 @@ func TestSyncInfraOutputSecrets_NoInfraOutputGens(t *testing.T) { {Key: "JWT_SECRET", Type: "random_hex", Length: 32}, }, } - err := syncInfraOutputSecrets(context.Background(), cfg, p, sampleStates()) + err := syncInfraOutputSecrets(context.Background(), cfg, p, sampleStates(), nil, "") if err != nil { t.Fatalf("no infra_output generators should be no-op: %v", err) } @@ -141,7 +141,7 @@ func TestSyncInfraOutputSecrets_WritesSecret(t *testing.T) { {Key: "STAGING_DATABASE_URL", Type: "infra_output", Source: "bmw-database.uri"}, }, } - err := syncInfraOutputSecrets(context.Background(), cfg, p, sampleStates()) + err := syncInfraOutputSecrets(context.Background(), cfg, p, sampleStates(), nil, "") if err != nil { t.Fatalf("syncInfraOutputSecrets: %v", err) } @@ -158,7 +158,7 @@ func TestSyncInfraOutputSecrets_WritesMultiple(t *testing.T) { {Key: "REDIS_URL", Type: "infra_output", Source: "bmw-cache.url"}, }, } - err := syncInfraOutputSecrets(context.Background(), cfg, p, sampleStates()) + err := syncInfraOutputSecrets(context.Background(), cfg, p, sampleStates(), nil, "") if err != nil { t.Fatalf("syncInfraOutputSecrets: %v", err) } @@ -178,7 +178,7 @@ func TestSyncInfraOutputSecrets_SkipsExisting(t *testing.T) { {Key: "DATABASE_URL", Type: "infra_output", Source: "bmw-database.uri"}, }, } - err := syncInfraOutputSecrets(context.Background(), cfg, p, sampleStates()) + err := syncInfraOutputSecrets(context.Background(), cfg, p, sampleStates(), nil, "") if err != nil { t.Fatalf("syncInfraOutputSecrets: %v", err) } @@ -198,7 +198,7 @@ func TestSyncInfraOutputSecrets_WriteOnlyProviderSkips(t *testing.T) { {Key: "DATABASE_URL", Type: "infra_output", Source: "bmw-database.uri"}, }, } - err := syncInfraOutputSecrets(context.Background(), cfg, p, sampleStates()) + err := syncInfraOutputSecrets(context.Background(), cfg, p, sampleStates(), nil, "") if err != nil { t.Fatalf("syncInfraOutputSecrets: %v", err) } @@ -217,7 +217,7 @@ func TestSyncInfraOutputSecrets_WriteOnlyProviderWrites(t *testing.T) { {Key: "DATABASE_URL", Type: "infra_output", Source: "bmw-database.uri"}, }, } - err := syncInfraOutputSecrets(context.Background(), cfg, p, sampleStates()) + err := syncInfraOutputSecrets(context.Background(), cfg, p, sampleStates(), nil, "") if err != nil { t.Fatalf("syncInfraOutputSecrets: %v", err) } @@ -233,7 +233,7 @@ func TestSyncInfraOutputSecrets_MissingModule(t *testing.T) { {Key: "X", Type: "infra_output", Source: "nonexistent.uri"}, }, } - err := syncInfraOutputSecrets(context.Background(), cfg, p, sampleStates()) + err := syncInfraOutputSecrets(context.Background(), cfg, p, sampleStates(), nil, "") if err == nil { t.Fatal("expected error for missing module in state") } @@ -246,7 +246,7 @@ func TestSyncInfraOutputSecrets_EmptyStates(t *testing.T) { {Key: "X", Type: "infra_output", Source: "bmw-database.uri"}, }, } - err := syncInfraOutputSecrets(context.Background(), cfg, p, nil) + err := syncInfraOutputSecrets(context.Background(), cfg, p, nil, nil, "") if err == nil { t.Fatal("expected error when state has no matching module") } diff --git a/cmd/wfctl/infra_secrets_env_test.go b/cmd/wfctl/infra_secrets_env_test.go new file mode 100644 index 00000000..5daf3193 --- /dev/null +++ b/cmd/wfctl/infra_secrets_env_test.go @@ -0,0 +1,96 @@ +package main + +import ( + "strings" + "testing" + + "github.com/GoCodeAlone/workflow/config" +) + +func TestInfraOutput_EnvResolvesModuleSource(t *testing.T) { + // Staging env renames bmw-database → bmw-staging-db. State is keyed by + // env-resolved name. Secret generation reads source "bmw-database.uri" + // which must resolve to "bmw-staging-db" for the state lookup to succeed. + + wfCfg := &config.WorkflowConfig{ + Modules: []config.ModuleConfig{ + { + Name: "bmw-database", + Type: "infra.database", + Config: map[string]any{"provider": "do-provider"}, + Environments: map[string]*config.InfraEnvironmentResolution{ + "staging": {Config: map[string]any{"name": "bmw-staging-db"}}, + }, + }, + }, + } + + // Simulate pre-populated state with env-resolved name. + fakeState := map[string]map[string]any{ + "bmw-staging-db": {"uri": "postgresql://test"}, + } + + // Function under test: resolve infra_output source with envName. + // Must transform "bmw-database.uri" → lookup "bmw-staging-db" → "uri" field. + val, err := resolveInfraOutput(wfCfg, "bmw-database.uri", "staging", fakeState) + if err != nil { + t.Fatalf("resolveInfraOutput: %v", err) + } + if val != "postgresql://test" { + t.Errorf("got %q, want %q (state lookup via env-resolved name)", val, "postgresql://test") + } +} + +func TestInfraOutput_NoEnvUsesBaseName(t *testing.T) { + wfCfg := &config.WorkflowConfig{ + Modules: []config.ModuleConfig{ + { + Name: "bmw-database", + Type: "infra.database", + Config: map[string]any{"provider": "do-provider"}, + }, + }, + } + + fakeState := map[string]map[string]any{ + "bmw-database": {"uri": "postgresql://base"}, + } + + val, err := resolveInfraOutput(wfCfg, "bmw-database.uri", "", fakeState) + if err != nil { + t.Fatalf("resolveInfraOutput: %v", err) + } + if val != "postgresql://base" { + t.Errorf("got %q, want %q (base name when no env)", val, "postgresql://base") + } +} + +func TestInfraOutput_ExplicitlyDisabledModuleErrors(t *testing.T) { + // A nil environments entry means the module is explicitly removed for this env. + // resolveInfraOutput must return a clear error rather than silently falling + // back to the base name (which would read stale/incorrect state). + wfCfg := &config.WorkflowConfig{ + Modules: []config.ModuleConfig{ + { + Name: "bmw-database", + Type: "infra.database", + Config: map[string]any{"provider": "do-provider"}, + Environments: map[string]*config.InfraEnvironmentResolution{ + "staging": nil, // explicitly disabled + }, + }, + }, + } + + fakeState := map[string]map[string]any{ + "bmw-database": {"uri": "postgresql://base"}, + } + + _, err := resolveInfraOutput(wfCfg, "bmw-database.uri", "staging", fakeState) + if err == nil { + t.Fatal("expected error when module is explicitly disabled for env") + } + if !strings.Contains(err.Error(), "explicitly disabled") { + t.Errorf("error should mention 'explicitly disabled', got: %v", err) + } +} diff --git a/docs/plans/2026-04-24-v0.19.0-architectural-cleanup-design.md b/docs/plans/2026-04-24-v0.19.0-architectural-cleanup-design.md new file mode 100644 index 00000000..e363e23d --- /dev/null +++ b/docs/plans/2026-04-24-v0.19.0-architectural-cleanup-design.md @@ -0,0 +1,733 @@ +# Workflow v0.19.0 — Architectural Cleanup — Design + +**Status:** Approved (autonomous pipeline, 2026-04-24) + +**Goal:** Land eight architectural changes together (Features A-H) that share config-file shape (`wfctl.yaml` + `.wfctl-lock.yaml`), release boundary (workflow v0.19.0), and consumer impact (BMW CI deploy.yml collapses to pure declarative). Each is independently valuable; bundling avoids breaking downstream config format twice. + +**Why:** Tonight's BMW deploy surfaced multiple pieces of architectural debt through the blocker chain. Four are enumerated below; Features E-H were added during design iteration based on additional findings (state-independent teardown, output reading, healthcheck gating, declarative secret sinks): + +1. `app.yaml` conflates application config with plugin version pins — `.wfctl-lock.yaml` exists but is uncommitted, stale, and only covers one plugin. Conventional dep-manifest + lockfile pattern (npm/cargo/go) is not followed. +2. BMW's `deploy.yml` has `doctl registry login` / `doctl registry create` shell steps because wfctl doesn't know how to auth Docker to DOCR. Provider-specific shell in consumer CI is the anti-pattern PR #171 rejected for plugin installs; we missed registries. +3. `IaCProvider` gRPC boundary uses `map[string]any` for method args. The v0.7.5 BootstrapStateBackend regression (deploy run 24878055496) was a silent schema mismatch across that boundary — client sent cfg directly, server expected `args["cfg"]`, no compile-time or runtime check caught it. +4. BMW's `deploy.yml` clones `GoCodeAlone/workflow` at a pinned SHA and `docker build`s `workflow-migrate` inline. Duplicated build logic, no supply-chain signing, coupled to a manual SHA bump, burns a PAT on the checkout. + +User's principle: "if BMW CI has provider-specific shell, we should fix it in workflow/wfctl so the CI stays declarative." These four changes together deliver that. + +## Scope + +"Source task" column references IDs from the platform-maturity-stage2 team task list (not workflow-repo PR numbers). + +| Feature | Source task | Touches | +|---|---|---| +| A. Plugin manifest + lockfile split | #42, #43 | workflow core | +| B. Multi-registry + IaCProvider.EnsureRegistryAuth | #48 | workflow core + all 4 IaC plugins | +| C. Typed-args refactor for IaCProvider gRPC | #41 | workflow core + all 4 IaC plugins | +| D. Official workflow-migrate Docker image | #49 | workflow core (CI only) | +| E. `wfctl infra teardown` — state-independent recovery-mode destroy | new | workflow core | +| F. `wfctl infra outputs` — masked-by-default output reader | new (elevates #14) | workflow core | +| G. `wfctl deploy verify` — configurable healthcheck gate | new | workflow core | +| H. Declarative secret sinks (plaintext-never-leaves-process) | new | workflow core + future plugin fan-out | + +## Non-goals + +- Schema evolution of `ResourceSpec` or `ResourceState` — stable. +- Registry types beyond the six enumerated (digitalocean/aws-ecr/gcp-ar/azure-acr/github/docker). Future types file a follow-up; the dispatch is extensible. +- Constraint-based plugin version resolution (e.g. `^0.7.0`). v0.19.0 ships exact pins only; semver ranges are a v0.20.0 follow-up once the lockfile schema is battle-tested. +- Replacing `app.yaml` wholesale. Plugin section moves out; everything else stays. +- Registry auth for anything other than Docker image pushes. OCI chart registries, artifact registries, etc. — out of scope. + +## Architecture + +One design doc because the four features share the config file shape. Landing them individually would force downstream consumers (BMW, ratchet, workflow-cloud) to migrate config three times. + +### A. Plugin manifest + lockfile split + +**New file: `wfctl.yaml`** at repo root (or `.wfctl/config.yaml` for nested projects). Declares plugins with exact version pins: + +```yaml +version: 1 +plugins: + - name: workflow-plugin-digitalocean + version: v0.7.6 + source: github.com/GoCodeAlone/workflow-plugin-digitalocean + auth: + env: GH_TOKEN # optional — env var used for auth'd downloads + - name: workflow-plugin-supply-chain + version: v0.3.0 + source: github.com/GoCodeAlone/workflow-plugin-supply-chain + verify: + identity: "https://github.com/GoCodeAlone/workflow-plugin-supply-chain/.github/workflows/release.yml@refs/tags/v0.3.0" +``` + +**Lockfile: `.wfctl-lock.yaml`** — regenerated by wfctl subcommands; committed. Resolved to exact versions with sha256 for supply-chain verify: + +```yaml +version: 1 +generated_at: "2026-04-24T10:00:00Z" +plugins: + workflow-plugin-digitalocean: + version: v0.7.6 + source: github.com/GoCodeAlone/workflow-plugin-digitalocean + sha256: "6f510f50...beb538" + platforms: + linux-amd64: + url: "https://github.com/GoCodeAlone/workflow-plugin-digitalocean/releases/download/v0.7.6/workflow-plugin-digitalocean-linux-amd64.tar.gz" + sha256: "ab12cd34..." +``` + +**New wfctl subcommands:** +- `wfctl plugin add [@version] [--source URL]` — appends to manifest, resolves to lock +- `wfctl plugin remove ` — removes from both +- `wfctl plugin update []` — re-resolves version (exact pin → latest matching release); updates lockfile +- `wfctl plugin lock` — regenerates lockfile from manifest without version changes (fresh sha256s) +- `wfctl plugin install` (existing) — reads **lockfile** as authoritative; downloads exact versions + verifies sha256 +- `wfctl plugin list` — prints manifest + lock-resolved versions side-by-side +- `wfctl migrate plugins` — one-time migration: extracts inline `version:`/`source:`/`auth:` from `app.yaml`'s `requires.plugins[]`; writes `wfctl.yaml` + regenerates `.wfctl-lock.yaml`; prompts to delete inline pins from `app.yaml` (or adds deprecation warning on next load) + +**Backward compatibility:** +- `app.yaml requires.plugins[]` inline fields still parsed for one release (v0.19.x). Warning emitted on load: `DEPRECATED: plugin version in app.yaml; run 'wfctl migrate plugins' to move to wfctl.yaml + .wfctl-lock.yaml`. +- Inline fields removed entirely in v0.20.0. + +### B. Multi-registry support + IaCProvider.EnsureRegistryAuth + +**New top-level section in `wfctl.yaml`:** + +```yaml +registries: + - name: docr-bmw + type: digitalocean + host: registry.digitalocean.com + # credentials resolved via do-provider (iac.provider module auto-linked) + provider: do-provider + - name: ghcr + type: github + host: ghcr.io + namespace: gocodealone + auth: + username_source: "github.actor" # or literal value + password_env: GITHUB_TOKEN + - name: docker-hub + type: docker + host: registry-1.docker.io + namespace: gocodealone + auth: + username_env: DOCKER_USERNAME + password_env: DOCKER_PASSWORD +``` + +**Registry resolver dispatch** (new package: `registry/`): +- `digitalocean` / `aws-ecr` / `gcp-ar` / `azure-acr` → delegates to the referenced IaCProvider plugin's `EnsureRegistryAuth(ctx, spec)` method (plugin uses its provider credentials) +- `github` → built-in handler writes Docker credential store entry using the resolved username + password env var (works in GH Actions with `GITHUB_TOKEN` or App installation tokens) +- `docker` → built-in handler for generic registries (Docker Hub, self-hosted, Harbor, etc.) + +**New IaCProvider method:** + +```go +// EnsureRegistryAuth writes Docker registry credentials to ~/.docker/config.json +// for the given registry spec. Implementations use their provider credentials +// (already in the plugin's module config) to derive the registry-specific +// auth payload. Returns nil on success; non-nil error aborts the build. +type IaCProvider interface { + // ... existing methods ... + EnsureRegistryAuth(ctx context.Context, spec RegistrySpec) error + EnsureRegistry(ctx context.Context, spec RegistrySpec) error +} + +type RegistrySpec struct { + Name string + Host string + Namespace string + Tier string // optional, provider-specific (e.g. DOCR "basic") + Region string // optional (e.g. DOCR region) + Extra map[string]any // provider-specific fields passed through +} +``` + +**Pipeline integration** — `container_build` step gains `registries` field: + +```yaml +pipelines: + build-and-push: + - type: container_build + image: bmw-app + registries: [docr-bmw, ghcr] # push to both + tags: ["${IMAGE_SHA}", "latest"] +``` + +wfctl's build orchestrator: +1. Reads the step's `registries` list → looks up each in the config's `registries:` block. +2. For each, calls `resolver.Ensure(ctx, spec)` which dispatches to provider-plugin `EnsureRegistry` + `EnsureRegistryAuth` (or built-in handler for github/docker). +3. Runs `docker buildx build --push` once per tag, pushing to each `namespace/image:tag` — buildx supports multi-destination push in one invocation (`--output type=registry,dest=...` or multiple `--tag` entries with fully-qualified hosts). + +**BMW post-migration:** +```yaml +# deploy.yml drops these shell steps: +# - Ensure DOCR registry exists (doctl registry create) +# - Log in to DOCR (doctl registry login) +# wfctl.yaml declares the registry; wfctl build --push handles everything. +``` + +### C. Typed-args refactor for IaCProvider gRPC + +**Per-method request/response structs** in new file `interfaces/iac_provider_args.go`: + +```go +// BootstrapStateBackend +type BootstrapStateBackendReq struct { + Config map[string]any `json:"config"` +} +type BootstrapStateBackendResp struct { + Bucket string `json:"bucket"` + Region string `json:"region"` + Endpoint string `json:"endpoint"` + EnvVars map[string]string `json:"env_vars"` +} + +// Plan +type PlanReq struct { + Desired []ResourceSpec `json:"desired"` + Current []ResourceState `json:"current"` +} +type PlanResp struct { + Plan IaCPlan `json:"plan"` +} + +// Apply, Destroy, Status, DetectDrift, Import, ResolveSizing, Initialize, +// Capabilities, BootstrapStateBackend, EnsureRegistry, EnsureRegistryAuth, +// ResourceDriver, Update, HealthCheck — all get typed req/resp pairs. +// ~20 structs total. +``` + +**Shared marshal helpers** in `interfaces/grpc_args.go`: + +```go +// MarshalArgs JSON-round-trips a typed struct into a map[string]any +// for the gRPC wire format. Preserves field ordering via deterministic +// JSON encoding. +func MarshalArgs(v any) (map[string]any, error) + +// UnmarshalArgs is the inverse: map[string]any → typed struct. +// Returns a clear error if required fields are missing (unlike the +// current silent-fallthrough map lookup pattern). +func UnmarshalArgs(m map[string]any, v any) error +``` + +**Client side** (`cmd/wfctl/deploy_providers.go`): + +```go +// Before: +res, err := r.invoker.InvokeService("IaCProvider.BootstrapStateBackend", cfg) + +// After: +args, err := interfaces.MarshalArgs(interfaces.BootstrapStateBackendReq{Config: cfg}) +if err != nil { + return nil, fmt.Errorf("marshal: %w", err) +} +res, err := r.invoker.InvokeService("IaCProvider.BootstrapStateBackend", args) +``` + +**Server side** (each plugin's `module_instance.go`): + +```go +// Before: +cfg, _ := args["cfg"].(map[string]any) // silent nil-fallthrough + +// After: +var req interfaces.BootstrapStateBackendReq +if err := interfaces.UnmarshalArgs(args, &req); err != nil { + return nil, fmt.Errorf("IaCProvider.BootstrapStateBackend: %w", err) +} +result, err := m.provider.BootstrapStateBackend(ctx, req.Config) +``` + +**Migration path per plugin** (workflow-plugin-digitalocean first, as reference): +1. Import `workflow/interfaces` typed structs. +2. Replace each `invoke*` handler's manual map unpacking with `UnmarshalArgs`. +3. Add integration test per method covering (nil args, empty struct, populated, partial) inputs — catches server-side decode bugs at CI time. + +After the DO plugin lands in v0.8.0, replicate the pattern in aws/gcp/azure/tofu plugins (each v0.2.0 bump). Each plugin pin in `wfctl.yaml` bumps in a consumer. + +### E. `wfctl infra teardown` — state-independent recovery-mode destroy + +**Problem:** BMW's tonight deploy exposed a gap: the old `wfctl infra destroy` operates on **state records**. If state was never persisted (v0.18.8 and earlier, before the fix landed) or corrupted, orphan cloud resources pile up. Recovery requires hand-crafted doctl scripts (`.github/workflows/teardown-staging.yml` with awk-matched resource IDs) — provider-specific shell, exactly the anti-pattern we're removing. + +**New command:** `wfctl infra teardown --env -c [--dry-run] [--approve | --yes]` + +**Safety model (mandatory, non-negotiable):** + +Teardown is the most destructive operation in wfctl — it deletes cloud resources AND wipes state. The command refuses to proceed unless one of these conditions holds: + +| Flag | Behavior | +|---|---| +| `--dry-run` (default on first run without approval) | Prints a detailed preview of what would be destroyed + what state keys would be wiped. No API calls that modify anything. Exits 0 after preview. | +| `--approve` (explicit) | Skips interactive prompt. Required in non-interactive environments (CI, scripts). Cannot be aliased via env var. | +| Interactive TTY | Prints the same preview as `--dry-run`, then prompts `Type 'yes' to destroy all N resources and wipe state: ` on stderr. Only proceeds on exact `yes` literal match. | +| No flag, non-interactive | Errors out with "refusing to destroy without --approve or interactive confirmation. Run with --dry-run first." | + +Rationale: this mirrors `terraform destroy --auto-approve` and `kubectl delete --force` safety patterns. BMW's existing `.github/workflows/teardown-staging.yml` already uses a `workflow_dispatch` input `confirm: 'yes'` — same UX pattern at the GH Actions layer; `--approve` is the wfctl-layer equivalent. + +**Behavior when allowed to proceed:** + +1. Env-resolves the config to get expected module names (uses same resolution as plan/apply). +2. For each `infra.*` module, calls `provider.ResourceDriver(type).Read(ctx, spec.Name)` to discover actual cloud state, **bypassing state records entirely**. +3. Prints preview in both dry-run and execute modes BEFORE any destructive action: + ``` + Teardown plan for env=staging: + infra.vpc bmw-staging-vpc (provider_id: d5a4f3b2-...) → DESTROY + infra.firewall bmw-staging-firewall (provider_id: fw-7e3a...) → DESTROY + infra.database bmw-staging-db (provider_id: db-1234...) → DESTROY + infra.container_service bmw-staging (provider_id: app-abcd...) → DESTROY + State prefix s3://bmw-iac-state/staging/ → WIPE (7 objects) + + Total: 4 cloud resources, 7 state objects. + ``` +4. If `--dry-run`: exit 0 after preview. +5. If `--approve` or interactive `yes`: for each discovered resource, call `provider.Destroy(ctx, []ResourceRef{...})` with the resolved provider IDs. +6. Wipes the state backend prefix for this env (e.g., `s3://bmw-iac-state/staging/`). New method `IaCStateStore.WipeEnv(ctx, envName) error` deletes all objects under the env's prefix. +7. Idempotent: resources not found → skip and continue, not an error (explicit message: `bmw-staging-vpc: not found — skipping`). +8. Reports summary: `Discovered N resources across M modules; destroyed K; skipped S; errors E. State objects wiped: W.` + +**Discovery fidelity caveat:** `Read(ctx, name)` only finds resources whose name matches the env-resolved config. Orphan resources from pre-env-override releases (e.g., a stale `bmw-vpc` alongside the current `bmw-staging-vpc`) will not be discovered by teardown — they must be cleaned up manually or via `wfctl infra teardown --include-orphans` (new flag, v0.20.0 follow-up). For v0.19.0, teardown targets what the current config describes, not archaeological residue. + +### F. `wfctl infra outputs` — masked-by-default output reader + +**Problem:** Resource outputs (DB connection strings, IPs, DNS names, app URLs) are stored in `ResourceState.Outputs` after apply. Currently there's no CLI to extract them — BMW's `Capture staging DB URL` step uses `doctl databases connection $DB_ID --format URI` + awk parsing, completely bypassing wfctl's state. + +**New command:** `wfctl infra outputs [--module ] [--env ] [--output ] [--format value|env|json] [--show-sensitive] -c ` + +**Module schema extension** — outputs are declared in the module config with sensitivity hints: + +```yaml +- name: bmw-staging-db + type: infra.database + config: + # ... + outputs: + connection_url: + sensitive: true + description: "Postgres connection URI with embedded credentials" + host: + sensitive: false + port: + sensitive: false + database: + sensitive: false +``` + +The provider-plugin's driver populates `ResourceState.Outputs[]` during Apply; wfctl doesn't need to know per-type what fields exist — the config declares their sensitivity. + +**Masking rules:** + +| Scenario | Behavior | +|---|---| +| Output declared `sensitive: true`, `--show-sensitive` NOT passed | Value printed as `***MASKED*** (sensitive — use --show-sensitive to reveal)` | +| Output declared `sensitive: true`, `--show-sensitive` passed | Plaintext printed + warning to stderr: `WARNING: printing sensitive value '' to stdout — ensure terminal is not recorded` | +| Output declared `sensitive: true`, `GITHUB_ACTIONS=true`, `--show-sensitive` passed | `::add-mask::` emitted to stdout BEFORE the value; GHA log redacts automatically | +| Output declared `sensitive: false` | Plaintext always | +| Output not declared in schema | Treated as `sensitive: true` (safe default) | + +No environment variable can override `--show-sensitive` — must be explicit on each invocation. Rationale: scripts that pipe output into other tools must be deliberately marked as handling secrets. + +**Output formats:** +- `--format value` (default) — `: ` lines +- `--format env` — `KEY=value` shell-exportable (applies masking to stdout but NOT to env export; use `--show-sensitive` to populate env) +- `--format json` — `{"key": "value", ...}`; sensitive values become `"***MASKED***"` unless `--show-sensitive` + +**Filters:** +- `--module ` limits to one module +- `--output ` limits to one field (useful for shell: `DB_URL=$(wfctl infra outputs --module bmw-staging-db --output connection_url --format value --show-sensitive)`) + +**CLI examples:** +```bash +# List all outputs for all modules, safe default +$ wfctl infra outputs --env staging +bmw-staging-db.connection_url: ***MASKED*** (sensitive) +bmw-staging-db.host: db-abc.nyc3.doctl.com +bmw-staging-db.port: 25060 +bmw-staging.url: https://staging.buymywishlist.com + +# JSON for downstream tooling +$ wfctl infra outputs --env staging --format json +{ + "bmw-staging-db": { + "connection_url": "***MASKED***", + "host": "db-abc...", + "port": 25060 + }, + ... +} + +# Explicit reveal (rare — usually Feature H handles this) +$ wfctl infra outputs --module bmw-staging-db --output connection_url \ + --format value --show-sensitive +WARNING: printing sensitive value 'connection_url' to stdout — ensure terminal is not recorded +postgresql://user:pw@host:25060/bmw +``` + +### G. `wfctl deploy verify` — configurable healthcheck gate + +**Problem:** Post-deploy health verification is critical for staging→prod promotion gates. BMW implements this as ad-hoc curl loops (if it has any; more commonly this is absent, allowing bad deploys to promote). No standardized retry / timeout / multi-target pattern. + +**CLI form:** +```bash +wfctl deploy verify \ + --url https://staging.buymywishlist.com/healthz \ + --expect-status 200 \ + --expect-body-contains "ok" \ # optional + --timeout 5m \ + --retry-interval 10s \ + --max-retries 30 +``` + +Exit 0 only when all conditions met within timeout; non-zero with descriptive stderr otherwise. + +**Config-driven form** (pipeline step): +```yaml +pipelines: + staging-verify: + - type: deploy_verify + targets: + - url: "https://${STAGING_DOMAIN}/healthz" + expect_status: 200 + expect_body_contains: "ok" + - url: "https://${STAGING_DOMAIN}/readyz" + expect_status: 200 + timeout: 5m + retry_interval: 10s + max_retries: 30 + # all targets must pass within global timeout +``` + +**Behavior:** + +1. For each target, poll at `retry_interval` until: + - Received response status matches `expect_status` + - AND (if set) response body contains `expect_body_contains` +2. If all targets pass within global `timeout`: exit 0, log per-target outcome. +3. If any target times out: exit 1, log per-target outcome with last failure reason. +4. Request options per target: method (default GET), headers (map), body (string), TLS verify (default true). + +**Integration with Feature F:** responses are logged at `--verbose`; if a response body contains values matching a sensitive output's content, wfctl emits `::add-mask::` to redact. (Implementation: scan response body against current module outputs' sensitive values.) + +**Use cases:** +- Staging deploy gate: `wfctl deploy verify --url https://staging.../healthz` blocks prod promotion until staging is healthy +- Pre-rollback check: verify current prod is indeed broken before initiating rollback +- Smoke-test placeholder (not a full smoke runner — that's a separate future feature) + +**Out of scope:** complex assertion languages (JSONPath queries, schema validation), custom authentication, cookie handling. Basic HTTP status + body-substring matching only; file follow-up for richer assertions. + +### H. Declarative secret sinks — plaintext never leaves wfctl process + +**Problem:** Extracting a sensitive output (e.g., DB URL) and writing it to a secret store (e.g., GH Actions secrets) traditionally requires shell piping: +```bash +DB_URL=$(wfctl infra outputs ... --show-sensitive) # plaintext on stdout +gh secret set DATABASE_URL --body "$DB_URL" # plaintext in env/argv +``` + +Each arrow touches a surface (stdout, env var, argv) where the plaintext can leak — process listings, terminal recordings, CI logs, shell history. + +**Solution:** declare the sink in the module schema; wfctl reads output → writes sink in-process: + +```yaml +- name: bmw-staging-db + type: infra.database + outputs: + connection_url: + sensitive: true + sinks: + - type: github_secret + repo: "${{ github.repository }}" # or literal GoCodeAlone/buymywishlist + name: DATABASE_URL + # (optional) environment: staging # for GH environment-scoped secret + - type: github_env # for in-job downstream steps + name: DATABASE_URL +``` + +At `wfctl infra apply --env staging`: +1. Resources provision (existing flow). +2. After successful state save, for each module with sink-declared outputs: + - Read the output value from the resource (provider-reported) into a local string — in-process, no stdout. + - For each declared sink, resolve the handler and call `sink.Write(ctx, name, value, opts)`. + - Plaintext lifetime: from `provider.Apply` return value → HTTP POST body to the secret store API. Never crosses stdout, env, or argv. +3. Errors at sink write are non-fatal by default but logged loudly and surfaced in the apply summary — the resource exists, the secret just didn't land. User can re-run with `wfctl infra secrets sync --env staging` to retry (alternate CLI command, future convenience). + +**`SecretSink` interface:** +```go +type SecretSink interface { + // Write stores value under the given logical name. The sink is + // responsible for authentication (uses credentials embedded in the + // config at build-from-config time) and idempotent upsert semantics. + // opts carries sink-type-specific fields (e.g. github environment). + Write(ctx context.Context, name, value string, opts map[string]any) error +} +``` + +**v0.19.0 sink types (built into workflow core):** + +1. **`github_secret`** — writes to GitHub repo/env/org secret via REST API + - Auth: reads `GH_TOKEN` from env (populated by `actions/create-github-app-token@v1` in CI, or user's PAT locally) + - API: `PUT /repos/{owner}/{repo}/actions/secrets/{secret_name}` with libsodium-encrypted payload (uses repo's public key via `GET /repos/{owner}/{repo}/actions/secrets/public-key`) + - Supports `environment:` field for GH Environment secrets (different API path) + +2. **`github_env`** — appends `KEY=value` to `$GITHUB_ENV` file + - Auth: none (file write) + - For in-job downstream steps without leaking via CLI output + +**Fast-follow sink types (v0.19.x plugin-contributed):** + +3. **`aws_secrets_manager`** — via workflow-plugin-aws; uses AWS SDK with plugin's existing IAM credentials +4. **`gcp_secret_manager`** — via workflow-plugin-gcp +5. **`azure_key_vault`** — via workflow-plugin-azure +6. **`vault_kv`** — via new workflow-plugin-vault (future); uses configured vault address + token + +Each plugin declares its sink type in its manifest; wfctl resolves sink type → plugin via the plugin registry. Same dispatch pattern as IaCProvider plugin types. + +**Sink config schema:** +```yaml +# under module.outputs..sinks[]: +- type: # required; string matching a registered SecretSink + name: # required; the key under which the value is stored + # ...other fields are sink-type-specific, validated at config load: + # github_secret: repo, environment (optional) + # aws_secrets_manager: region, path + # etc. +``` + +**Safety invariants:** +- A module with `outputs..sensitive: true` CANNOT have `sinks: [, ...]` without `github_env` being the default behavior for same-job consumption. Rationale: github_env is a file write that's redacted by GHA if masks are registered; safe. Exposing to downstream jobs across step boundaries is safe IF the job-level secret masking is respected. +- A sink write failure DOES NOT fail the apply (resource exists). BUT a subsequent `wfctl infra outputs --show-sensitive` or `wfctl infra secrets sync` can manually re-attempt. +- No plaintext value is ever logged at any log level unless `--verbose` + `--show-sensitive` — both opt-in, both loud. + +**Implementation:** +- New file: `cmd/wfctl/infra_teardown.go` with `runInfraTeardown`. +- Dispatch case in `cmd/wfctl/infra.go` top-level switch. +- Reuses existing `provider.ResourceDriver(type).Read` (name-based, now reliable per DO plugin v0.7.3+). +- Reuses existing `provider.Destroy`. +- New state-store method `WipeEnv(ctx, envName)` — filesystem: rm -rf the prefix dir; spaces/s3: batch-delete prefix objects; postgres: DELETE WHERE env=... + +**BMW migration:** +Replace `.github/workflows/teardown-staging.yml`'s 80-line doctl+awk scavenger hunt with: + +```yaml +jobs: + teardown: + if: github.event.inputs.confirm == 'yes' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: GoCodeAlone/setup-wfctl@v1 + with: { version: v0.19.0 } + - uses: actions/create-github-app-token@v1 + id: app-token + with: + app-id: ${{ vars.PLUGIN_INSTALLER_APP_ID }} + private-key: ${{ secrets.PLUGIN_INSTALLER_APP_PRIVATE_KEY }} + - name: wfctl infra teardown + env: + DIGITALOCEAN_TOKEN: ${{ secrets.DIGITALOCEAN_TOKEN }} + SPACES_access_key: ${{ secrets.SPACES_access_key }} + SPACES_secret_key: ${{ secrets.SPACES_secret_key }} + GH_TOKEN: ${{ steps.app-token.outputs.token }} + run: wfctl infra teardown --env staging -c infra.yaml --approve +``` + +Result: teardown is provider-agnostic, works across DO/AWS/GCP/Azure plugins uniformly, composable with other wfctl commands. + +### D. Official workflow-migrate Docker image + +**Current:** BMW's `deploy.yml:118-124` clones `GoCodeAlone/workflow` at `0c117140dec5dca350440259dc0b21c05df78d3e` (v0.2.0) and runs `docker build` on it. + +**New:** `workflow` repo's release workflow publishes `ghcr.io/gocodealone/workflow-migrate:vX.Y.Z` on every tag. + +**Additions in workflow repo:** +1. **`Dockerfile.migrate`** at repo root — distroless-based, COPY the `workflow-migrate` binary from the build stage, no shell. Signed with cosign via `sigstore/cosign-action`. +2. **`.github/workflows/release.yml` extension** — after goreleaser publishes binaries, add a job: + ```yaml + publish-migrate-image: + runs-on: ubuntu-latest + permissions: + packages: write + id-token: write + steps: + - uses: actions/checkout@v4 + - uses: docker/setup-buildx-action@v3 + - uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - uses: docker/build-push-action@v5 + with: + file: Dockerfile.migrate + tags: | + ghcr.io/gocodealone/workflow-migrate:${{ github.ref_name }} + ghcr.io/gocodealone/workflow-migrate:latest + push: true + provenance: true + sbom: true + - uses: sigstore/cosign-installer@v3 + - run: cosign sign --yes ghcr.io/gocodealone/workflow-migrate:${{ github.ref_name }} + ``` + +**BMW migration (post-v0.19.0 tag):** +- Delete the workflow checkout + docker build steps from `deploy.yml`. +- `infra.yaml` pre-deploy migration job references `ghcr.io/gocodealone/workflow-migrate:${{ vars.WFCTL_VERSION }}` (a new Actions variable tracking the wfctl version, bumped by the same setup-wfctl bump PRs). + +## Config precedence + loading order + +`wfctl.yaml` is read if present; falls back to `app.yaml`'s legacy inline sections with deprecation warnings. For registries specifically, if both `wfctl.yaml:registries` AND `app.yaml:registries` are present, wfctl.yaml wins and emits a deprecation warning on the app.yaml section. + +## Data flow + +### Plugin install +``` +wfctl plugin install + → loads .wfctl-lock.yaml (authoritative) + → for each entry: fetch tarball → verify sha256 → extract → write plugin.json + → if sha256 mismatches: hard fail (supply-chain integrity) + +wfctl plugin add foo@v1.2.3 + → appends to wfctl.yaml + → resolves version (GET release, download, sha256) + → rewrites .wfctl-lock.yaml (re-marshal to preserve comment-free YAML) + → git-friendly diff +``` + +### Multi-registry build +``` +wfctl build --push --image bmw-app + → reads container_build step config + → step.registries = [docr-bmw, ghcr] + → for each: resolver.Ensure(ctx, spec) + → docr-bmw is type:digitalocean → provider.EnsureRegistryAuth via DO plugin gRPC + → ghcr is type:github → built-in: writes ~/.docker/config.json entry using GITHUB_TOKEN + → docker buildx build --push with multiple fully-qualified tags + → push goes to all configured registries atomically +``` + +### Typed gRPC call +``` +wfctl invokes IaCProvider.BootstrapStateBackend(ctx, cfg) + → remoteIaCProvider marshals BootstrapStateBackendReq{Config: cfg} + → gRPC transport: map[string]any → plugin subprocess + → plugin invokeProviderBootstrapStateBackend unmarshals into + BootstrapStateBackendReq struct — any missing required field raises + a typed error rather than silently nil-valuing + → typed response marshaled back; client unmarshals + → compile-time contract: struct changes in interfaces/ break both + sides at build time, not runtime +``` + +## Testing + +### A. Plugin manifest + lockfile +- `config/wfctl_manifest_test.go` — YAML parse round-trip, version field validation, multiple plugins with and without auth/verify +- `config/lockfile_test.go` — lockfile (de)serialization preserves entry order, sha256 fields validated as hex +- `cmd/wfctl/plugin_add_test.go` — round-trip `wfctl plugin add` writes manifest + lockfile; mock HTTP fetch for registry calls +- `cmd/wfctl/plugin_install_lockfile_test.go` — `wfctl plugin install` reads lockfile; sha256 mismatch → hard fail; version-not-in-lockfile → error +- `cmd/wfctl/migrate_plugins_test.go` — migrates a real BMW-shaped app.yaml; asserts wfctl.yaml + .wfctl-lock.yaml produced; asserts the deprecation warning emitted on subsequent loads of the original app.yaml + +### B. Multi-registry +- `registry/resolver_test.go` — given spec, returns right handler type (table-driven: digitalocean/aws/gcp/azure/github/docker) +- `registry/handler_github_test.go` — writes credential store entry for GHCR using test env vars +- `registry/handler_docker_test.go` — same for generic docker registry +- Each IaC plugin gets a `TestEnsureRegistryAuth_` integration test that launches the plugin subprocess and verifies the Docker credential store is written correctly (use a fake $HOME) +- `cmd/wfctl/build_push_multi_registry_test.go` — simulated buildx invocation captures the tag list; asserts all configured registries are targeted + +### C. Typed-args +- `interfaces/grpc_args_test.go` — MarshalArgs/UnmarshalArgs round-trip for every Req/Resp type; asserts missing-required-field raises typed error; nil/empty/populated/partially-populated variants for each +- For each of the 4 IaC plugins, `internal/grpc_dispatch_integration_test.go` — spins up plugin binary via `go-plugin`, exercises every IaCProvider method over gRPC with populated typed args; asserts the plugin's method implementation receives exactly what the client sent. Would catch any future schema drift at CI. + +### D. workflow-migrate image +- `Dockerfile.migrate` syntax-checks in CI via `docker buildx build --load` (no push on PR) +- Release workflow extension: tagged manually with `v0.19.0-rc1` to test the full push + sign + SBOM path in a staging context before v0.19.0 final + +### E. wfctl infra teardown +- `cmd/wfctl/infra_teardown_test.go` — table-driven: (a) all resources present → all destroyed; (b) some missing → skipped silently; (c) none present → exits clean with summary; (d) provider errors mid-run → continues to next resource, reports errors at end +- `cmd/wfctl/infra_teardown_env_test.go` — env resolution used correctly; env-resolved names drive the discovery +- `module/state_wipe_env_test.go` — filesystem: wipes prefix dir; spaces: batch-deletes prefix objects; postgres: DELETE WHERE env=... +- End-to-end BMW-shaped test — writes mock infra.yaml with staging env overrides, fake provider returns specific resources for Read, asserts Destroy called with right IDs, asserts WipeEnv called with "staging" +- CLI integration: `TestTeardownCommand_DryRun` — `--dry-run` flag prints plan without executing +- `TestTeardownCommand_RequiresApprove` — without `--approve` in non-interactive mode, errors out with clear message +- `TestTeardownCommand_InteractiveConfirm` — with TTY, prompts for 'yes' string; rejects anything else + +### F. wfctl infra outputs +- `cmd/wfctl/infra_outputs_test.go` — masking by default; `--show-sensitive` requires explicit flag +- `TestOutputs_GithubActionsMaskEmitted` — when `GITHUB_ACTIONS=true` + `--show-sensitive`, emits `::add-mask::` before the value +- `TestOutputs_FormatEnv` / `TestOutputs_FormatJson` / `TestOutputs_FormatValue` — format variants produce correct output +- `TestOutputs_UnknownModule_Errors` / `TestOutputs_UnknownField_Errors` — filter errors are clear +- `TestOutputs_UndeclaredOutput_TreatedAsSensitive` — safe default +- Config validation: `TestModuleSchema_OutputsSensitiveRequired` — if `sinks:` is present, `sensitive: true` is required (sinks imply sensitivity) + +### G. wfctl deploy verify +- `cmd/wfctl/deploy_verify_test.go` — mock HTTP server returns 200 → pass; returns 503 repeatedly → fail on timeout; returns 200 after N retries → pass +- `TestDeployVerify_MultiTarget` — all-targets-pass gate; one failing target fails the gate +- `TestDeployVerify_BodyContains` — status matches but body mismatch → retry; both match → pass +- `TestDeployVerify_TimeoutShape` — timeout enforcement with retry_interval arithmetic +- `TestDeployVerify_PipelineStepIntegration` — config-driven form runs through pipeline executor identically + +### H. Declarative secret sinks +- `module/secret_sink_test.go` — interface mock: `Write` called with right (name, value, opts) tuple +- `secret_sink/github_secret_test.go` — libsodium encryption round-trip with test public key; writes via mock HTTP API endpoint for repo / env secret +- `secret_sink/github_env_test.go` — appends correct format to a tempfile acting as $GITHUB_ENV +- `cmd/wfctl/infra_apply_sink_integration_test.go` — end-to-end: fake provider Apply returns outputs; `sinks:` declared; asserts sink handler receives plaintext value; asserts no plaintext in stdout/stderr +- `TestInfraApply_SinkWriteFailure_NonFatal` — sink.Write errors → apply completes, warning logged, resource state saved +- `TestSinkConfig_RejectsSinksOnNonSensitiveOutputs` — config validation: `sinks:` without `sensitive: true` fails load + +### Regression tests (things we want to not break) +- All existing `cmd/wfctl/infra_*` tests continue to pass unchanged; typed-args refactor is invisible at the command layer +- BMW's current `app.yaml` loads cleanly with deprecation warning; migrate-plugins command produces an equivalent-behavior config split +- DO plugin v0.7.6 → v0.8.0 upgrade path: typed-args refactor does not change external gRPC method names; only arg encoding changes + +## Rollout phases + +Phases are sequenced by dependency, each phase produces a releasable artifact: + +**Phase 1 (workflow core — v0.19.0-alpha.1):** Plugin manifest + lockfile split (A) + backward-compat shim on app.yaml. Ships independently; BMW on v0.18.8 continues to work via the shim. + +**Phase 2 (workflow core — v0.19.0-alpha.2):** Typed-args refactor (C) — interfaces/grpc_args.go + all wfctl-side call sites updated. Plugins still use map[string]any; typed helpers are backward-compatible (client marshals → wire format unchanged → server may use old or new unpacking). + +**Phase 3 (workflow-plugin-digitalocean v0.8.0):** DO plugin adopts typed-args unpacking on every gRPC handler. Adds gRPC integration test suite. First plugin to land the new pattern. + +**Phase 4 (workflow core — v0.19.0-alpha.3):** Multi-registry support (B) — `registries:` config block + resolver + IaCProvider.EnsureRegistryAuth method on the interface. Built-in github + docker handlers. + +**Phase 5 (workflow-plugin-digitalocean v0.8.1):** DO plugin implements EnsureRegistryAuth + EnsureRegistry (delegates to doctl SDK or writes Docker config directly). + +**Phase 6 (workflow core — v0.19.0-rc1):** workflow-migrate Docker image (D) — Dockerfile.migrate + release workflow extension. Release candidate tagged to validate the push + sign + SBOM path. + +**Phase 6b (workflow core — v0.19.0-rc2):** `wfctl infra teardown` (E) — state-independent recovery command + `IaCStateStore.WipeEnv`. No plugin changes required — uses existing `ResourceDriver.Read` + `IaCProvider.Destroy`. + +**Phase 7 (v0.19.0 final):** All five features merged, changelog finalized, tag released. + +**Phase 8 (plugin updates):** aws, gcp, azure, tofu plugins adopt typed-args + EnsureRegistryAuth in their v0.2.0 bumps. Can happen in parallel with each other after v0.19.0 tag. + +**Phase 9 (BMW migration, after v0.19.0 stabilizes):** +- Run `wfctl migrate plugins` against BMW +- Add `registries:` block to wfctl.yaml referencing do-provider + ghcr +- Delete doctl registry shell steps from deploy.yml +- Drop workflow checkout + docker build; reference ghcr.io/gocodealone/workflow-migrate:v0.19.0 by tag +- **Replace `.github/workflows/teardown-staging.yml`'s 80-line doctl+awk script with a 15-line `wfctl infra teardown --env staging --confirm` invocation** +- Single PR captures the full declarative migration; size ~150 lines + +## Deferred (explicit non-goals) + +- **Constraint-based resolution** (semver ^ / ~): v0.20.0. +- **Transitive plugin deps** (plugins depending on other plugins): v0.20.0+. +- **Registry types beyond the listed 6:** file follow-up when a real need appears. +- **Cross-registry image mirroring** as a wfctl-managed step (copy from A → B): file follow-up. +- **`wfctl registry login/logout` CLI** as a separate command: not needed; build step auto-invokes Ensure handlers. If interactive login becomes useful, file follow-up. +- **Auto-upgrade of lockfile on `wfctl plugin install`:** no — `install` is strict; `update`/`lock` are the only commands that modify the lockfile. Mirrors `go mod download` vs `go get`. +- **Schema versioning of wfctl.yaml itself:** the `version: 1` field is present but no alternative is defined yet. v0.20.0 may bump to `version: 2` with constraint syntax. + +## Success criteria + +- `workflow` repo passes full CI on v0.19.0 tag; goreleaser publishes binaries + workflow-migrate image + SBOM + cosign signature. +- `workflow-plugin-digitalocean` v0.8.x passes the new gRPC-dispatch integration test matrix (every IaCProvider method × nil/empty/populated args). +- BMW migration PR (post-v0.19.0): `deploy.yml` has **zero** `doctl`, **zero** `docker build`, **zero** workflow clone, **zero** `RELEASES_TOKEN` references. wfctl.yaml declares plugins + registries; lockfile committed; `wfctl build --push` covers DOCR + GHCR; `infra.yaml` references the official migrate image by tag. +- The v0.7.5-class bug (gRPC schema mismatch between client and plugin) cannot land silently — compile breakage in any plugin that doesn't adopt typed args catches it; integration tests cover nil/empty/populated across every method. + +## Open questions (self-resolved during design) + +- *Q: Why not a single fat `wfctl.yaml` that absorbs everything from `app.yaml`?* A: `app.yaml` is the pipeline + module config; mixing plugin dep management + registry config + pipeline definition makes it unusably large. Split by concern. +- *Q: Why not use `go.mod` style (`wfctl.yaml` + `wfctl.sum`)?* A: Convention is weaker in Go community for non-module dep trees; we pick `wfctl.yaml` + `.wfctl-lock.yaml` to match npm/cargo operator expectations more closely. The semantics are identical. +- *Q: Do we need to version the manifest format?* A: `version: 1` field included for future-proofing; v0.20.0 may bump. diff --git a/docs/plans/2026-04-24-v0.19.0-architectural-cleanup.md b/docs/plans/2026-04-24-v0.19.0-architectural-cleanup.md new file mode 100644 index 00000000..de6b2e53 --- /dev/null +++ b/docs/plans/2026-04-24-v0.19.0-architectural-cleanup.md @@ -0,0 +1,725 @@ +# Workflow v0.19.0 Architectural Cleanup Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Ship workflow v0.19.0 with 7 architectural cleanup features bundled into a single config-schema release boundary (wfctl.yaml + .wfctl-lock.yaml): plugin manifest split, multi-registry, typed gRPC args, official migrate image, wfctl infra teardown, wfctl infra outputs (masked), wfctl deploy verify, declarative secret sinks. + +**Architecture:** Each feature is independently testable but they share config-file shape + release tag. Rollout is phased (alpha.1 → alpha.2 → alpha.3 → rc1 → rc2 → final) so consumers can track early changes and each alpha is independently releasable. Plugin fan-out (aws/gcp/azure/tofu adopting typed-args + EnsureRegistryAuth) happens in parallel after v0.19.0 tags. + +**Tech Stack:** Go 1.26, Hashicorp go-plugin (gRPC over stdio), Hashicorp modular framework, goreleaser, Docker buildx + cosign, libsodium (for github_secret sink encryption). + +**Reference design:** `docs/plans/2026-04-24-v0.19.0-architectural-cleanup-design.md`. + +**Timing:** v0.19.0 tag does NOT ship until BMW's tonight deploy reaches prod /healthz green (task #26). Individual phase PRs can merge; final tag held. + +--- + +## Phase 1 — workflow v0.19.0-alpha.1: Plugin manifest + lockfile (Feature A) + +### Task A1: Define `wfctl.yaml` manifest schema + +**Files:** +- Create: `config/wfctl_manifest.go` (new file) +- Test: `config/wfctl_manifest_test.go` + +**Step 1: Write the failing test** + +```go +// config/wfctl_manifest_test.go +package config + +import ( + "testing" + "gopkg.in/yaml.v3" +) + +func TestWfctlManifest_LoadsPluginEntries(t *testing.T) { + raw := []byte(` +version: 1 +plugins: + - name: workflow-plugin-digitalocean + version: v0.7.6 + source: github.com/GoCodeAlone/workflow-plugin-digitalocean + auth: + env: GH_TOKEN + - name: workflow-plugin-supply-chain + version: v0.3.0 + source: github.com/GoCodeAlone/workflow-plugin-supply-chain + verify: + identity: "https://github.com/GoCodeAlone/workflow-plugin-supply-chain/.github/workflows/release.yml@refs/tags/v0.3.0" +`) + var m WfctlManifest + if err := yaml.Unmarshal(raw, &m); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if m.Version != 1 { + t.Errorf("version = %d, want 1", m.Version) + } + if len(m.Plugins) != 2 { + t.Fatalf("plugins count = %d, want 2", len(m.Plugins)) + } + p := m.Plugins[0] + if p.Name != "workflow-plugin-digitalocean" || p.Version != "v0.7.6" { + t.Errorf("plugin[0] = %+v", p) + } + if p.Auth == nil || p.Auth.Env != "GH_TOKEN" { + t.Errorf("plugin[0].auth = %+v", p.Auth) + } +} +``` + +**Step 2: Run test to verify it fails** + +Run: `go test ./config/ -run TestWfctlManifest_LoadsPluginEntries -v` +Expected: compile error (WfctlManifest undefined) + +**Step 3: Define the struct** + +```go +// config/wfctl_manifest.go +package config + +type WfctlManifest struct { + Version int `yaml:"version"` + Plugins []WfctlPluginEntry `yaml:"plugins"` +} + +type WfctlPluginEntry struct { + Name string `yaml:"name"` + Version string `yaml:"version"` + Source string `yaml:"source"` + Auth *WfctlPluginAuth `yaml:"auth,omitempty"` + Verify *WfctlPluginVerify `yaml:"verify,omitempty"` +} + +type WfctlPluginAuth struct { + Env string `yaml:"env"` +} + +type WfctlPluginVerify struct { + Identity string `yaml:"identity"` +} +``` + +**Step 4: Run test — passes. Add edge cases:** +- `TestWfctlManifest_EmptyPluginsList` +- `TestWfctlManifest_InvalidVersionField` (version: "1" string vs 1 int — should fail parse) +- `TestWfctlManifest_LoadFromFile` (reads from disk) + +**Step 5: Commit** + +```bash +git add config/wfctl_manifest.go config/wfctl_manifest_test.go +git commit -m "feat(config): WfctlManifest struct for wfctl.yaml + lockfile parsing" +``` + +### Task A2: Define `.wfctl-lock.yaml` lockfile schema + +**Files:** +- Create: `config/wfctl_lockfile.go` +- Test: `config/wfctl_lockfile_test.go` + +Similar TDD pattern — struct: +```go +type WfctlLockfile struct { + Version int `yaml:"version"` + GeneratedAt time.Time `yaml:"generated_at"` + Plugins map[string]WfctlLockPluginEntry `yaml:"plugins"` // keyed by name +} + +type WfctlLockPluginEntry struct { + Version string `yaml:"version"` + Source string `yaml:"source"` + SHA256 string `yaml:"sha256"` + Platforms map[string]WfctlLockPlatform `yaml:"platforms"` // keyed by "linux-amd64", etc. +} + +type WfctlLockPlatform struct { + URL string `yaml:"url"` + SHA256 string `yaml:"sha256"` +} +``` + +Tests: load/save round-trip, deterministic YAML output (maps sorted), missing-field errors. + +Commit: `feat(config): WfctlLockfile struct + deterministic (de)serialization` + +### Task A3: `wfctl plugin lock` subcommand — regenerate lockfile + +**Files:** +- Create: `cmd/wfctl/plugin_lock.go` +- Test: `cmd/wfctl/plugin_lock_test.go` + +Behavior: read `wfctl.yaml`, for each plugin fetch release metadata from GitHub (or configured registry), download tarball → compute sha256, assemble `.wfctl-lock.yaml` content, write. + +Tests: happy path with mock HTTP fetcher; bad version (not found) → clear error; existing lockfile preserved on failure (atomic write). + +Commit: `feat(wfctl): wfctl plugin lock — regenerates .wfctl-lock.yaml from manifest` + +### Task A4: `wfctl plugin add` subcommand + +Appends to manifest + calls the lock logic. Tests round-trip (read → add → lock → read). + +Commit: `feat(wfctl): wfctl plugin add [@version]` + +### Task A5: `wfctl plugin remove` subcommand + +Deletes entry from both files. + +Commit: `feat(wfctl): wfctl plugin remove ` + +### Task A6: `wfctl plugin update` subcommand + +Re-resolves to latest matching (exact pin for v0.19.0 — just re-fetches the current pin to get fresh sha256; constraint semver deferred to v0.20.0). + +Commit: `feat(wfctl): wfctl plugin update []` + +### Task A7: `wfctl plugin install` reads lockfile (not manifest) + +**Files:** +- Modify: `cmd/wfctl/plugin_install.go` (add `loadLockfile` path; prefer lockfile over manifest/app.yaml) + +Behavior change: if `.wfctl-lock.yaml` exists, treat it as authoritative. Download exact versions from locked URLs, verify sha256, abort on mismatch. Falls back to `app.yaml` legacy inline pins with deprecation warning if no lockfile. + +Tests: lockfile present → installs from locked URLs; sha256 mismatch → hard fail; lockfile absent + app.yaml inline pins → warning + legacy flow. + +Commit: `feat(wfctl): wfctl plugin install reads .wfctl-lock.yaml as authoritative` + +### Task A8: `wfctl migrate plugins` one-time migration + +**Files:** +- Create: `cmd/wfctl/migrate_plugins.go` +- Test: `cmd/wfctl/migrate_plugins_test.go` + +Behavior: read `app.yaml`, extract each inline plugin entry under `requires.plugins[]`, write to `wfctl.yaml`, regenerate `.wfctl-lock.yaml`. Prompt (or `--auto`) to strip inline fields from `app.yaml`. + +Tests: BMW-shaped app.yaml → migrates cleanly; no plugins → exits 0 noop; mixed (some have inline, some already in wfctl.yaml) → merges sensibly. + +Commit: `feat(wfctl): wfctl migrate plugins — app.yaml inline → wfctl.yaml + lockfile` + +### Task A9: Deprecation warning on app.yaml inline plugin fields + +**Files:** +- Modify: `config/module.go` (or wherever `requires.plugins[].version` is parsed) + +Add a warning emit when the field is non-empty at config-load time. Use a `deprecatedOnce` sync.Once pattern so it fires once per run. + +Tests: load BMW-shaped app.yaml → assert warning on stderr; load clean config → no warning. + +Commit: `feat(config): deprecation warning when app.yaml has inline plugin.version` + +### Task A10: Open PR for Phase 1 + +- Branch: `feat/v0.19.0-plugin-manifest-split` off main +- Target: workflow repo +- Body: reference design doc §A; lists new commands + migration path +- Standard flow: LOCAL review → push → Copilot → DM team-lead for merge +- On merge: tag `v0.19.0-alpha.1` + +--- + +## Phase 2 — workflow v0.19.0-alpha.2: Typed-args refactor wfctl-side (Feature C) + +### Task C1: Define per-method typed req/resp structs + +**Files:** +- Create: `interfaces/iac_provider_args.go` +- Test: `interfaces/iac_provider_args_test.go` + +Define structs for every IaCProvider method + ResourceDriver method: +- `BootstrapStateBackendReq/Resp` +- `InitializeReq/Resp`, `CapabilitiesReq/Resp` +- `PlanReq/Resp`, `ApplyReq/Resp`, `DestroyReq/Resp`, `StatusReq/Resp`, `DetectDriftReq/Resp`, `ImportReq/Resp` +- `ResolveSizingReq/Resp` +- `ResourceDriverReq/Resp` (returning a handle; actual method calls follow) +- `EnsureRegistryReq/Resp`, `EnsureRegistryAuthReq/Resp` (for Feature B, land together) +- `UpdateReq/Resp`, `HealthCheckReq/Resp` (ResourceDriver) + +Tests: one test per struct asserting Go struct-field JSON tags match expected wire format; ensures field additions don't accidentally break backward compat. + +Commit: `feat(interfaces): typed request/response structs for IaCProvider gRPC` + +### Task C2: `MarshalArgs` / `UnmarshalArgs` helpers + +**Files:** +- Create: `interfaces/grpc_args.go` +- Test: `interfaces/grpc_args_test.go` + +```go +// MarshalArgs JSON-round-trips a typed struct into a map[string]any. +func MarshalArgs(v any) (map[string]any, error) { + b, err := json.Marshal(v) + if err != nil { return nil, err } + var m map[string]any + if err := json.Unmarshal(b, &m); err != nil { return nil, err } + return m, nil +} + +// UnmarshalArgs unpacks a map[string]any back into a typed struct. +// Returns a clear error if required fields are missing or have wrong type. +func UnmarshalArgs(m map[string]any, v any) error { + b, err := json.Marshal(m) + if err != nil { return err } + dec := json.NewDecoder(bytes.NewReader(b)) + dec.DisallowUnknownFields() // catches schema drift + return dec.Decode(v) +} +``` + +Tests: +- Round-trip: marshal → unmarshal → struct matches +- Nil input → empty struct +- Empty map → struct with zero-value fields (no error unless required tags enforced — v0.20.0 enhancement) +- Missing required field → decode error +- Unknown field → decode error (catches typos on plugin side) + +Commit: `feat(interfaces): MarshalArgs/UnmarshalArgs helpers for gRPC boundary` + +### Task C3: Migrate wfctl client call sites to typed args + +**Files:** +- Modify: `cmd/wfctl/deploy_providers.go` (every `InvokeService` call site) + +For each `r.invoker.InvokeService("IaCProvider.", )`: +1. Construct typed Req struct +2. `MarshalArgs(req)` → map +3. Pass to InvokeService +4. `UnmarshalArgs(res, &resp)` → typed response + +Tests: existing tests in `deploy_providers_plugin_test.go` should still pass (wire format unchanged). Add new test asserting that a struct field change shows up as a compile error, not a silent mismatch. + +Commit: `refactor(wfctl): client-side typed-args for IaCProvider gRPC` + +### Task C4: Open PR for Phase 2 + +- Branch: `refactor/v0.19.0-typed-args-client` +- Body: explains typed-args goal; notes server side unchanged (backward compat) +- On merge: tag `v0.19.0-alpha.2` + +--- + +## Phase 3 — workflow-plugin-digitalocean v0.8.0: Typed-args server-side + integration tests + +### Task C5 (DO plugin repo): Import workflow v0.19.0-alpha.2 interfaces + +**Files:** +- Modify: `go.mod` (bump workflow dep) + +```bash +cd /Users/jon/workspace/workflow-plugin-digitalocean +go get github.com/GoCodeAlone/workflow@v0.19.0-alpha.2 +go mod tidy +``` + +Commit: `chore(deps): bump workflow to v0.19.0-alpha.2 (typed-args interfaces)` + +### Task C6: Migrate DO plugin gRPC handlers to typed args + +**Files:** +- Modify: `internal/module_instance.go` (every `invoke*` handler) + +Pattern: +```go +// Before: +func (m *doModuleInstance) invokeProviderBootstrapStateBackend(args map[string]any) (map[string]any, error) { + cfg := args // was: args["cfg"] + // ... +} + +// After: +func (m *doModuleInstance) invokeProviderBootstrapStateBackend(args map[string]any) (map[string]any, error) { + var req interfaces.BootstrapStateBackendReq + if err := interfaces.UnmarshalArgs(args, &req); err != nil { + return nil, fmt.Errorf("IaCProvider.BootstrapStateBackend: %w", err) + } + result, err := m.provider.BootstrapStateBackend(context.Background(), req.Config) + if err != nil { return nil, err } + if result == nil { return map[string]any{}, nil } + resp := interfaces.BootstrapStateBackendResp{ + Bucket: result.Bucket, Region: result.Region, + Endpoint: result.Endpoint, EnvVars: result.EnvVars, + } + return interfaces.MarshalArgs(resp) +} +``` + +Repeat for every `invoke*` handler. + +Commit per handler for clean revert points: `refactor(dispatch): typed-args for ` (× 14 handlers). + +### Task C7: Add gRPC-dispatch integration test suite + +**Files:** +- Create: `internal/grpc_dispatch_integration_test.go` + +For each IaCProvider method, launch the plugin subprocess via go-plugin's test harness, call the method via gRPC with test args, assert the plugin's method impl received exactly what the client sent. Uses fake S3/DO clients to avoid real API calls. + +Test matrix: 14 methods × 4 input variants (nil, empty, populated, partial) = 56 test cases. Organize as table-driven. + +Commit: `test(dispatch): gRPC integration tests — 14 methods × nil/empty/populated/partial` + +### Task C8: Open PR for DO plugin v0.8.0 + +- Branch: `feat/v0.8.0-typed-args` +- Body: explains breaking change (requires wfctl ≥ v0.19.0-alpha.2) + integration test coverage +- On merge: tag `v0.8.0` + +--- + +## Phase 4 — workflow v0.19.0-alpha.3: Multi-registry support (Feature B) + +### Task B1: Define `registries:` schema + RegistrySpec type + +**Files:** +- Create: `config/registry.go` (with RegistryConfig, RegistrySpec types) +- Modify: `config/workflow_config.go` (add `Registries []RegistryConfig`) +- Test: `config/registry_test.go` + +Schema per the design: +```go +type RegistryConfig struct { + Name string `yaml:"name"` + Type string `yaml:"type"` // digitalocean|aws-ecr|gcp-ar|azure-acr|github|docker + Host string `yaml:"host"` + Namespace string `yaml:"namespace,omitempty"` + Provider string `yaml:"provider,omitempty"` // iac.provider ref for provider-integrated types + Auth *RegistryAuth `yaml:"auth,omitempty"` + Extra map[string]any `yaml:",inline"` // provider-specific passthrough +} +``` + +Tests: parse each type + validate required fields per type. + +Commit: `feat(config): RegistryConfig schema for wfctl.yaml registries block` + +### Task B2: `SecretSink` interface + base types (needed by Feature H + Feature B shares same dispatch pattern) + +Do this early so Feature H can layer on it without a second interface design pass. + +**Files:** +- Create: `module/secret_sink.go` (interface + built-in types registration) + +### Task B3: `IaCProvider.EnsureRegistry` + `EnsureRegistryAuth` methods + +**Files:** +- Modify: `interfaces/iac_provider.go` (add methods to interface) +- Modify: all implementations (test stubs, remote wrapper) + +Tests: interface assertions, remote wrapper test using typed args (from Phase 2). + +Commit: `feat(interfaces): IaCProvider.EnsureRegistry + EnsureRegistryAuth` + +### Task B4: Registry resolver dispatch + +**Files:** +- Create: `registry/resolver.go` + per-type handlers + +Dispatch table: +- `digitalocean`, `aws-ecr`, `gcp-ar`, `azure-acr` → resolve via `IaCProvider.EnsureRegistryAuth` +- `github` → built-in handler in `registry/github_handler.go` +- `docker` → built-in handler in `registry/docker_handler.go` + +Tests: resolver picks right handler per type; handler contract test per type using mock Docker credential store. + +Commit: `feat(registry): multi-type resolver + github/docker built-in handlers` + +### Task B5: Integrate `registries: []` into container_build step + +**Files:** +- Modify: `module/pipeline_step_container_build.go` (or equivalent) + +Before buildx invocation: for each registry in step's list, resolve + call Ensure. Then build with fully-qualified tags per registry. + +Tests: pipeline step integration with mock resolver. + +Commit: `feat(pipeline): container_build accepts registries: [...] for multi-destination push` + +### Task B6: Open PR for Phase 4 + +- Branch: `feat/v0.19.0-multi-registry` +- On merge: tag `v0.19.0-alpha.3` + +--- + +## Phase 5 — workflow-plugin-digitalocean v0.8.1: EnsureRegistryAuth server-side + +### Task B7 (DO plugin): Implement EnsureRegistry + EnsureRegistryAuth + +**Files:** +- Create: `internal/registry.go` (new) +- Modify: `internal/provider.go` (add methods) +- Modify: `internal/module_instance.go` (gRPC dispatch handlers) + +Implementation: use doctl SDK or direct DO API (`POST /v2/registry` for create, write Docker credential store entry with DO API token as password). + +Tests: fake DOCR API + fake $HOME/.docker dir; assert credential store entry written correctly. + +Commit: `feat(do): EnsureRegistry + EnsureRegistryAuth for DOCR` + +### Task B8: Open PR for DO plugin v0.8.1 + +Tag v0.8.1 on merge. + +--- + +## Phase 6a — workflow v0.19.0-rc1: workflow-migrate image (Feature D) + +### Task D1: Add Dockerfile.migrate + +**Files:** +- Create: `Dockerfile.migrate` (distroless-based) + +Build stage compiles `cmd/workflow-migrate` (if exists — if not, create a thin wrapper that runs migrations via the existing config path). + +Commit: `feat(docker): Dockerfile.migrate for pre-deploy migration image` + +### Task D2: Release workflow publishes ghcr.io/gocodealone/workflow-migrate + +**Files:** +- Modify: `.github/workflows/release.yml` (add publish-migrate-image job) + +See design §D for YAML. Includes buildx, cosign sign, SBOM. + +Commit: `ci(release): publish + sign workflow-migrate image on tag` + +### Task D3: Open PR for Phase 6a + +Tag v0.19.0-rc1 on merge. Manual release run tests the full publish path before v0.19.0 final. + +--- + +## Phase 6b — workflow v0.19.0-rc2: wfctl infra teardown (Feature E) + +### Task E1: `IaCStateStore.WipeEnv` method on interface + +**Files:** +- Modify: `interfaces/iac_state.go` (add `WipeEnv(ctx, envName) error`) +- Modify: each state store impl (filesystem, spaces, postgres) + +Tests per impl: wipes correct prefix, idempotent on missing. + +Commit: `feat(state): IaCStateStore.WipeEnv for per-env state cleanup` + +### Task E2: `runInfraTeardown` function + +**Files:** +- Create: `cmd/wfctl/infra_teardown.go` +- Test: `cmd/wfctl/infra_teardown_test.go` + +Discovery + destroy loop per design §E. Safety model: `--approve` required in non-interactive, interactive TTY prompts for 'yes'. + +Commit: `feat(wfctl): wfctl infra teardown — recovery-mode state-independent destroy` + +### Task E3: Dispatch case in infra top-level switch + +**Files:** +- Modify: `cmd/wfctl/infra.go` (add `case "teardown"`) + +Commit: `feat(wfctl): teardown dispatch in infra command` + +### Task E4: Open PR for Phase 6b + +Tag v0.19.0-rc2 on merge. + +--- + +## Phase 6c — workflow v0.19.0-rc3: Outputs + Verify + Secret Sinks (Features F, G, H) + +Bundle F/G/H in a single alpha since they share module-schema extension. + +### Task F1: Module schema extension — outputs with sensitive flag + +**Files:** +- Modify: `config/module.go` — add `Outputs map[string]OutputSpec` to ModuleConfig +- Type: `OutputSpec{Sensitive bool, Description string, Sinks []SinkSpec}` + +Tests: parse round-trip; validation (sinks require sensitive:true). + +Commit: `feat(config): module.outputs schema with sensitive + sinks` + +### Task F2: `wfctl infra outputs` CLI + +**Files:** +- Create: `cmd/wfctl/infra_outputs.go` +- Test: `cmd/wfctl/infra_outputs_test.go` + +Masking by default; `--show-sensitive` opt-in; GHA `::add-mask::` integration when `GITHUB_ACTIONS=true`. + +Commit: `feat(wfctl): wfctl infra outputs with masking + GHA mask integration` + +### Task G1: `wfctl deploy verify` CLI + +**Files:** +- Create: `cmd/wfctl/deploy_verify.go` +- Test: `cmd/wfctl/deploy_verify_test.go` + +Poll-with-retry/backoff loop; multi-target gate; exit non-zero on any timeout. + +Commit: `feat(wfctl): wfctl deploy verify — healthcheck gate` + +### Task G2: `deploy_verify` pipeline step + +**Files:** +- Create: `module/pipeline_step_deploy_verify.go` +- Test: same package + +Thin wrapper around the CLI logic so verify can run as a pipeline step too. + +Commit: `feat(pipeline): deploy_verify step type` + +### Task H1: SecretSink interface + +**Files:** +- Modify: `module/secret_sink.go` (extend stub from Task B2) + +Define full interface; registration function (`RegisterSecretSink(type string, factory func(...) SecretSink)`). + +Commit: `feat(module): SecretSink interface + registration` + +### Task H2: github_secret built-in sink + +**Files:** +- Create: `module/secret_sink_github.go` +- Test: `module/secret_sink_github_test.go` + +Uses `crypto/nacl/box` for libsodium-compatible encryption. Public key fetched via `GET /repos/{owner}/{repo}/actions/secrets/public-key`. PUT with encrypted payload. + +Commit: `feat(sink): github_secret — GH Actions secret write via App token + libsodium` + +### Task H3: github_env built-in sink + +**Files:** +- Create: `module/secret_sink_github_env.go` + +Appends `KEY=value` to `$GITHUB_ENV` file (from env var). No API. + +Commit: `feat(sink): github_env — append to $GITHUB_ENV for in-job downstream` + +### Task H4: Apply-time sink write + +**Files:** +- Modify: `cmd/wfctl/infra_apply.go` (in `applyWithProviderAndStore` after state save) + +After each `SaveResource`, iterate sink-declared outputs, resolve handler, call Write. Errors log warning (non-fatal). + +Tests: fake provider returns outputs; mock sink; asserts Write called with correct (name, value, opts); sink failure → apply still succeeds with warning. + +Commit: `feat(wfctl): apply-time sink write for module outputs` + +### Task F/G/H/5: Open PR for Phase 6c + +Tag v0.19.0-rc3 on merge. + +--- + +## Phase 7 — v0.19.0 final + +### Task 7.1: Consolidated CHANGELOG entry + +**Files:** +- Modify: `CHANGELOG.md` — top entry for v0.19.0 summarizing all 7 features + migration notes + +Commit: `docs(changelog): v0.19.0 final — architectural cleanup release` + +### Task 7.2: Update DOCUMENTATION.md + +Add sections for wfctl.yaml manifest, registries, outputs, teardown, verify, sinks. + +Commit: `docs: v0.19.0 features in DOCUMENTATION.md` + +### Task 7.3: docs/migration-v0.19.0.md + +Operator migration guide for consumers (BMW, ratchet, workflow-cloud). Covers: `wfctl migrate plugins`, adding registries: block, replacing doctl shell, using official migrate image, declarative sinks for secret capture. + +Commit: `docs: v0.19.0 migration guide` + +### Task 7.4: Final release PR + tag + +- Merge all phase PRs +- Tag `v0.19.0` on main +- Release workflow produces: wfctl binaries, workflow-server binaries, workflow-migrate Docker image (signed), SBOM + +--- + +## Phase 8 — Plugin fan-out (parallel, post-v0.19.0 tag) + +Each of aws, gcp, azure, tofu plugins gets a v0.2.0 bump adopting: +- Typed-args gRPC handlers (Feature C server-side) +- EnsureRegistry + EnsureRegistryAuth (Feature B server-side) +- (Optional) secret_manager sink for their cloud + +One PR per plugin. Can happen in any order, in parallel. + +--- + +## Phase 9 — BMW migration (after v0.19.0 + plugin fan-out settles) + +### Task 9.1: Run `wfctl migrate plugins` on BMW + +Produces `wfctl.yaml` + `.wfctl-lock.yaml`, strips inline plugin pins from `app.yaml`. + +### Task 9.2: Add `registries:` block to wfctl.yaml + +```yaml +registries: + - name: docr-bmw + type: digitalocean + host: registry.digitalocean.com + provider: do-provider +``` + +### Task 9.3: Add output sinks to infra.yaml + +```yaml +- name: bmw-staging-db + type: infra.database + outputs: + connection_url: + sensitive: true + sinks: + - type: github_secret + repo: ${{ github.repository }} + name: DATABASE_URL +``` + +### Task 9.4: Delete doctl shell from deploy.yml + +Removes: `Ensure DOCR registry exists`, `Log in to DOCR`, `Capture staging DB URL`. + +### Task 9.5: Reference official workflow-migrate image + +Update infra.yaml to reference `ghcr.io/gocodealone/workflow-migrate:v0.19.0` by tag; drop the workflow-src checkout + build steps from deploy.yml. + +### Task 9.6: Replace teardown-staging.yml with wfctl invocation + +15-line workflow dispatching `wfctl infra teardown --env staging --confirm`. + +### Task 9.7: Add `wfctl deploy verify` step in prod promotion + +Before auto-promote, gate on staging /healthz via config-driven deploy_verify. + +### Task 9.8: BMW consolidated migration PR + +Single BMW PR capturing 9.1-9.7. Should shrink deploy.yml by ~80-120 lines and eliminate all provider-specific shell. Run through standard CI + review flow. + +--- + +## Team assignment + +- **impl-migrations** — leading workflow core changes (Phases 1, 2, 4, 6a, 6b, 6c, 7) +- **impl-digitalocean-2** — leading DO plugin changes (Phases 3, 5, 8a) +- **impl-bmw-2** — currently blocked on tonight's BMW deploy chain; picks up Phase 9 BMW migration after v0.19.0 stabilizes +- **code-reviewer** — every PR LOCAL review before push +- **team-lead** — PR merge decisions after Copilot window + admin-bypass where authorized + +## Execution constraint + +v0.19.0 work runs in parallel with the ongoing BMW tonight deploy blocker chain (PR #177 + teardown fix). The team has split attention until BMW hits prod /healthz green (task #26). Phase 1-5 PRs can start, merge, and tag alphas independently. Final v0.19.0 tag and Phase 9 BMW migration HOLD until BMW stabilizes tonight. + +## Success criteria + +- All 7 features merged to workflow main with passing CI +- v0.19.0 tag cuts with full release artifacts (binaries, Docker image, SBOM, cosign signature) +- workflow-plugin-digitalocean v0.8.x passes the 14×4 gRPC integration test matrix +- BMW migration PR (post-v0.19.0 + plugin fan-out) reduces deploy.yml by ≥80 lines, zero provider-specific shell, zero RELEASES_TOKEN references +- The v0.7.5-class gRPC schema mismatch cannot land silently again — typed-args + integration test matrix catches it at CI diff --git a/docs/plans/2026-04-24-wfctl-phase-continuation-design.md b/docs/plans/2026-04-24-wfctl-phase-continuation-design.md new file mode 100644 index 00000000..7bd66042 --- /dev/null +++ b/docs/plans/2026-04-24-wfctl-phase-continuation-design.md @@ -0,0 +1,142 @@ +# wfctl Phase Continuation — Module Env-Resolution Consistency — Design + +**Status:** Approved (autonomous pipeline, 2026-04-24) + +**Goal:** Close the class of bugs where one wfctl code path applies per-environment config overrides (`ModuleConfig.ResolveForEnv`) and another doesn't, producing **different identities** for the same logical resource and creating duplicate cloud resources or failed lookups. v0.18.9 patches the remaining known gaps (ci run deploy, infra_output source) and introduces a `config.ResolveModuleForEnv` helper to make future call sites default-correct. + +**Why:** BMW deploy run 24888583717 (`wfctl ci run --phase deploy`) created a duplicate DO App Platform app with name "bmw-app" (the module's base name) while `wfctl infra apply` had minutes earlier created "bmw-staging" (the env-resolved name). Both attempts found no existing resource by their respective name variants, each proceeded to CREATE, result: two apps in DO. The "no deployment found" health-check failure was a downstream symptom of DO refusing to spawn the second app's deployment (name collision). + +## Root cause + +`cmd/wfctl/deploy_providers.go:769` inside `newPluginDeployProvider`: +```go +findByType := func(target string) bool { + for i := range wfCfg.Modules { + m := &wfCfg.Modules[i] + ... + cfg, ok := resolveModCfg(m) // returns env-merged config map + ... + resourceName = m.Name // ← BUG: uses base name, ignores env override + resourceType = m.Type + resourceCfg = cfg + return true + } +} +``` + +`resolveModCfg` DOES apply `ResolveForEnv` and returns the env-merged config. But the surrounding code then reads `m.Name` (base) instead of `resolved.Name` (env-overridden). The fix for `ResolvedModule.Name` (lifted from `Config["name"]` in v0.18.7) is ignored here. + +**Fix pattern** already used successfully in `config/module_resolve_env.go:64-69`: +```go +if strings.HasPrefix(resolved.Type, "infra.") { + if n, ok := resolved.Config["name"].(string); ok && n != "" { + resolved.Name = n + delete(resolved.Config, "name") + } +} +``` + +`pluginDeployProvider` needs to adopt the same resolved identity. + +## Scope — related gaps + +Searching for `m.Name` direct usage after `resolveModCfg` / ResolveForEnv in wfctl reveals multiple call sites: + +| Location | What it reads | Should read | +|---|---|---| +| `cmd/wfctl/deploy_providers.go:769` (ci run deploy) | `m.Name` | `resolved.Name` | +| `cmd/wfctl/deploy_providers.go:796` (fallback path) | `m.Name` | `resolved.Name` | +| `cmd/wfctl/infra_secrets.go` (`infra_output.source` parser) | module name in dot-path | env-resolved module name (task #56) | +| Any other phase reading modules by name | — | audit pass | + +All suffer from the same class: **env-resolution helper returns the merged config but caller reads `m.Name` instead of the resolved name**. + +## Architecture + +### The helper pattern + +Introduce `config.ResolveModuleForEnv(m *ModuleConfig, envName string) (*ResolvedModule, bool)` as the blessed API. It already exists — `ModuleConfig.ResolveForEnv` returns a `*ResolvedModule` with `Name` and `Config` correctly populated. The API is fine; **callers just need to use both fields of the return value**. + +Rather than introduce a new helper, enforce usage: + +1. **Fix the deploy_providers.go call sites** (lines 769, 796) to replace `resourceName = m.Name` with `resourceName = resolved.Name` where `resolved` is the `*ResolvedModule` from ResolveForEnv. +2. **Refactor `resolveModCfg` closure** to return `*ResolvedModule` instead of just the Config map — so the resolved.Name is available at the call site. Call signature becomes: + ```go + resolveModule := func(m *config.ModuleConfig) (*config.ResolvedModule, bool) { + if envName == "" { + return &config.ResolvedModule{Name: m.Name, Type: m.Type, Config: m.Config}, true + } + return m.ResolveForEnv(envName) + } + ``` +3. **Update all consumers** (three call sites in deploy_providers.go + `infra_secrets.go` for infra_output) to read `resolved.Name` and `resolved.Config`. + +### Testing strategy + +Regression tests — would have caught tonight's bug: + +- `TestPluginDeployProvider_UsesEnvResolvedName` — construct a wfConfig with an `infra.container_service` module named `bmw-app` that has `environments.staging.config.name: bmw-staging`; call `newPluginDeployProvider(..., "staging")`; assert the returned provider's `resourceName == "bmw-staging"`, not `"bmw-app"`. +- `TestPluginDeployProvider_FallsBackToModuleNameWhenNoEnv` — envName="" case keeps base module name. +- `TestInfraOutput_EnvResolvesModuleSource` — `wfctl infra apply` with secrets.generate `infra_output` source `bmw-database.uri`; env override renames module to `bmw-staging-db`; infra_output finds state for `bmw-staging-db`, not `bmw-database`. (Addresses task #56.) +- `TestCIRunDeploy_PlansUpdateNotCreate_AfterInfraApply` — end-to-end: infra apply creates resource → ci run deploy should plan UPDATE (found by env-resolved name) not CREATE. Uses fake driver capturing Read/Create calls. + +### Out of scope (defer to separate work) + +- **Unifying deploy-phase state with IaC state** — the bug isn't really a state-sharing issue. Both subsystems use `driver.Read(ctx, ResourceRef{Name})` to find resources, which queries the cloud directly. As long as both use the SAME NAME, they find the same resource. No state coordination layer needed. +- **DAG-style pipeline orchestration** (`wfctl deploy --env X` as a meta-command) — larger scope, not a fix for this specific bug. +- **Deploy-phase typed-args refactor** — covered by v0.19.0 Feature C. + +## Rollout phases + +**Phase 1 (workflow v0.18.9):** +- Fix `deploy_providers.go:769` and `:796` to use `resolved.Name`. +- Refactor `resolveModCfg` closure to return `*ResolvedModule`. +- Add `TestPluginDeployProvider_UsesEnvResolvedName` regression test. +- Merge + tag v0.18.9. + +**Phase 2 (workflow v0.18.9 or v0.19.0):** +- Fix `infra_secrets.go` `infra_output` source resolution (task #56). +- Add `TestInfraOutput_EnvResolvesModuleSource` regression test. +- Bundle into v0.18.9 if small enough; else defer to v0.19.0. + +**Phase 3 (workflow v0.19.0):** +- Audit remaining call sites that read `m.Name` after env resolution. Patch any found. +- Documentation: DOCUMENTATION.md section on env-resolution consistency + the `resolved.Name` vs `m.Name` distinction. +- Add a `TestAllEnvResolutionCallSites` lint-style test or CI check (grep for `m.Name` in paths that use env resolution; flag patterns that don't use the ResolvedModule). + +**Phase 4 (BMW unblock):** +- User triggers teardown-staging (wipes duplicate apps + state). +- BMW bumps setup-wfctl pin to v0.18.9. +- Deploy auto-fires, env-resolved names flow consistently, health check passes. + +## Data flow (BMW staging, post-fix) + +``` +wfctl infra apply --env staging + → planResourcesForEnv: ResolveForEnv(staging) → resolved.Name="bmw-staging" + → platform.ComputePlan → Plan action: create bmw-staging + → provider.Apply creates DO App Platform with name "bmw-staging" + → state save: ResourceState{Name:"bmw-staging", ProviderID:} + +wfctl ci run --phase deploy --env staging + → newPluginDeployProvider(...staging): resolveModule(m) → resolved.Name="bmw-staging" + → pluginDeployProvider.resourceName = "bmw-staging" (was "bmw-app" before fix) + → Deploy: driver.Read(ref{Name:"bmw-staging"}) → finds existing DO app + → driver.Update(ref, spec) → pushes new image to existing app (id=) + → health-check polls the same UUID → 200 → deploy succeeds +``` + +## Success criteria + +- Unit tests added, all passing. +- `wfctl ci run --phase deploy --env staging` on BMW's config returns `pluginDeployProvider.resourceName == "bmw-staging"` (not `"bmw-app"`). +- End-to-end BMW retry post-v0.18.9 bump + teardown: staging /healthz 200, prod /healthz 200 auto-promote. +- No recurrence of duplicate DO resources from env-resolution mismatch (regression test gate at CI). +- DOCUMENTATION.md has a clear section warning future implementers: "always use `resolved.Name` from `ResolveForEnv`, never `m.Name`, in paths that consume modules with per-env overrides." + +## Non-goals (explicit) + +- State-sharing architecture between IaC and CI phases — the bug was about names, not state stores. +- Pipeline DAG / unified `wfctl deploy` meta-command — out of scope; later release. +- Changing `ResolveForEnv` signature / semantics — the function is correct, callers aren't using it correctly. +- Moving ci run deploy to read from IaC state — not needed; `driver.Read(name)` finds the same cloud resource if both paths use the same name. diff --git a/docs/plans/2026-04-24-wfctl-phase-continuation.md b/docs/plans/2026-04-24-wfctl-phase-continuation.md new file mode 100644 index 00000000..6c926812 --- /dev/null +++ b/docs/plans/2026-04-24-wfctl-phase-continuation.md @@ -0,0 +1,647 @@ +# wfctl v0.18.9 Phase-Continuation Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Ship workflow v0.18.9 fixing the env-resolution consistency gap in `wfctl ci run --phase deploy` and `infra_output` source resolution, unblocking BMW staging deploy which currently creates duplicate resources due to the base-name vs env-resolved-name mismatch. + +**Architecture:** Refactor `resolveModCfg` closure in `cmd/wfctl/deploy_providers.go` to return the full `*config.ResolvedModule` so callers see both `resolved.Name` (env-resolved identity) and `resolved.Config` (env-merged map). Replace `m.Name` with `resolved.Name` at every downstream consumer in the deploy-phase path. Apply the same fix to `infra_output` source-module parsing in `cmd/wfctl/infra_secrets.go`. Add regression tests gating both code paths. + +**Tech Stack:** Go 1.26, existing wfctl codebase. No new dependencies. + +**Reference design:** `docs/plans/2026-04-24-wfctl-phase-continuation-design.md`. + +**Timing:** v0.18.9 HOTFIX — sole workstream until merged. Pauses v0.19.0 Phase 2+ work. + +--- + +## Task 1: Refactor `resolveModCfg` closure signature + +**Files:** +- Modify: `cmd/wfctl/deploy_providers.go:709-718` (the closure definition) + +**Step 1: Write the failing test** + +```go +// cmd/wfctl/deploy_providers_env_test.go (new file) +package main + +import ( + "testing" + "github.com/GoCodeAlone/workflow/config" +) + +func TestPluginDeployProvider_UsesEnvResolvedName(t *testing.T) { + wfCfg := &config.WorkflowConfig{ + Modules: []config.ModuleConfig{ + { + Name: "do-provider", + Type: "iac.provider", + Config: map[string]any{"provider": "digitalocean"}, + }, + { + Name: "bmw-app", + Type: "infra.container_service", + Config: map[string]any{"provider": "do-provider"}, + Environments: map[string]config.EnvOverride{ + "staging": { + Config: map[string]any{"name": "bmw-staging"}, + }, + }, + }, + }, + } + + dp, err := newPluginDeployProvider("digitalocean", wfCfg, "staging") + if err != nil { + t.Fatalf("newPluginDeployProvider: %v", err) + } + + pdp, ok := dp.(*pluginDeployProvider) + if !ok { + t.Fatalf("expected *pluginDeployProvider, got %T", dp) + } + if pdp.resourceName != "bmw-staging" { + t.Errorf("resourceName = %q, want %q (env-resolved name)", pdp.resourceName, "bmw-staging") + } +} +``` + +**Step 2: Run test to verify it fails** + +```bash +cd /Users/jon/workspace/workflow +GOWORK=off go test ./cmd/wfctl/ -run TestPluginDeployProvider_UsesEnvResolvedName -v +``` +Expected: FAIL with `resourceName = "bmw-app", want "bmw-staging"` (the bug). + +**Step 3: Refactor the closure** + +Change `cmd/wfctl/deploy_providers.go:709-718` from: +```go +resolveModCfg := func(m *config.ModuleConfig) (map[string]any, bool) { + if envName == "" { + return m.Config, true + } + resolved, ok := m.ResolveForEnv(envName) + if !ok { + return nil, false + } + return resolved.Config, true +} +``` +To: +```go +resolveModule := func(m *config.ModuleConfig) (*config.ResolvedModule, bool) { + if envName == "" { + return &config.ResolvedModule{ + Name: m.Name, + Type: m.Type, + Config: m.Config, + }, true + } + return m.ResolveForEnv(envName) +} +``` + +**Step 4: Commit (do NOT run tests yet — will fix call sites in Task 2)** + +```bash +git add cmd/wfctl/deploy_providers.go cmd/wfctl/deploy_providers_env_test.go +git commit -m "refactor(wfctl): resolveModule returns *ResolvedModule for env-resolved Name access" +``` + +Note: after this commit the code won't compile until Task 2 updates the call sites. Commit the refactor + call-site fixes together in Task 2's final commit. + +--- + +## Task 2: Update three call sites to use resolved.Name + resolved.Config + +**Files:** +- Modify: `cmd/wfctl/deploy_providers.go` lines 723-802 (iac.provider lookup + findByType closure + fallback loop) + +**Step 1: Update the iac.provider lookup (was lines 723-737)** + +```go +// Find the iac.provider module matching the requested provider name. +var providerModName string +var providerModCfg map[string]any +for i := range wfCfg.Modules { + m := &wfCfg.Modules[i] + if m.Type != "iac.provider" { + continue + } + resolved, ok := resolveModule(m) + if !ok { + continue + } + cfgProvider, _ := resolved.Config["provider"].(string) + if cfgProvider == providerName || resolved.Name == providerName { + providerModName = resolved.Name + providerModCfg = resolved.Config + break + } +} +``` + +**Step 2: Update findByType closure (was lines 758-776)** + +Change: +```go +findByType := func(target string) bool { + for i := range wfCfg.Modules { + m := &wfCfg.Modules[i] + if m.Type != target { + continue + } + cfg, ok := resolveModCfg(m) // OLD + if !ok { + continue + } + if p, _ := cfg["provider"].(string); p == providerModName { + resourceName = m.Name // ← BUG + resourceType = m.Type + resourceCfg = cfg + return true + } + } + return false +} +``` +To: +```go +findByType := func(target string) bool { + for i := range wfCfg.Modules { + m := &wfCfg.Modules[i] + if m.Type != target { + continue + } + resolved, ok := resolveModule(m) + if !ok { + continue + } + if p, _ := resolved.Config["provider"].(string); p == providerModName { + resourceName = resolved.Name // ← fix: env-resolved + resourceType = resolved.Type + resourceCfg = resolved.Config + return true + } + } + return false +} +``` + +**Step 3: Update fallback loop (was lines 782-802)** + +Change: +```go +for i := range wfCfg.Modules { + m := &wfCfg.Modules[i] + if m.Type == "iac.provider" || m.Type == "" { + continue + } + cfg, ok := resolveModCfg(m) // OLD + if !ok { + continue + } + if p, _ := cfg["provider"].(string); p == providerModName { + fmt.Fprintf(os.Stderr, "warning: no deploy-target module ...; falling back to first infra module %q (type %q)\n", + deployTargetTypes, providerModName, m.Name, m.Type) + resourceName = m.Name // ← BUG + resourceType = m.Type + resourceCfg = cfg + break + } +} +``` +To: +```go +for i := range wfCfg.Modules { + m := &wfCfg.Modules[i] + if m.Type == "iac.provider" || m.Type == "" { + continue + } + resolved, ok := resolveModule(m) + if !ok { + continue + } + if p, _ := resolved.Config["provider"].(string); p == providerModName { + fmt.Fprintf(os.Stderr, "warning: no deploy-target module (%v) found for provider %q; falling back to first infra module %q (type %q)\n", + deployTargetTypes, providerModName, resolved.Name, resolved.Type) + resourceName = resolved.Name // ← fix + resourceType = resolved.Type + resourceCfg = resolved.Config + break + } +} +``` + +**Step 4: Run the regression test — should PASS now** + +```bash +cd /Users/jon/workspace/workflow +GOWORK=off go test ./cmd/wfctl/ -run TestPluginDeployProvider_UsesEnvResolvedName -v +``` +Expected: PASS. + +**Step 5: Run full wfctl test suite** + +```bash +GOWORK=off go test ./cmd/wfctl/... -v +``` +Expected: all tests pass. Any failures indicate an existing test was implicitly depending on `m.Name` (base) behavior — fix by updating those tests to reflect the correct env-resolved semantic. + +**Step 6: Commit** + +```bash +git add cmd/wfctl/deploy_providers.go cmd/wfctl/deploy_providers_env_test.go +git commit -m "fix(wfctl): ci run deploy uses env-resolved module name (not base) + +Regression tested via TestPluginDeployProvider_UsesEnvResolvedName. Same +class of bug as v0.18.7's Task #32 fix for ResourceSpec.Name — env override +of Config[\"name\"] was lifted into ResolvedModule.Name but deploy_providers.go +read m.Name directly, ignoring the override. Caused BMW deploy run +24888583717 to create duplicate DO apps (bmw-app vs bmw-staging)." +``` + +--- + +## Task 3: Add fallback test for no-env case + +**Files:** +- Modify: `cmd/wfctl/deploy_providers_env_test.go` + +**Step 1: Add the test** + +```go +func TestPluginDeployProvider_FallsBackToModuleNameWhenNoEnv(t *testing.T) { + wfCfg := &config.WorkflowConfig{ + Modules: []config.ModuleConfig{ + { + Name: "do-provider", + Type: "iac.provider", + Config: map[string]any{"provider": "digitalocean"}, + }, + { + Name: "bmw-app", + Type: "infra.container_service", + Config: map[string]any{"provider": "do-provider"}, + // NOTE: no Environments block — base name should be used + }, + }, + } + + dp, err := newPluginDeployProvider("digitalocean", wfCfg, "") + if err != nil { + t.Fatalf("newPluginDeployProvider: %v", err) + } + + pdp, ok := dp.(*pluginDeployProvider) + if !ok { + t.Fatalf("expected *pluginDeployProvider, got %T", dp) + } + if pdp.resourceName != "bmw-app" { + t.Errorf("resourceName = %q, want %q (base module name when no env)", pdp.resourceName, "bmw-app") + } +} +``` + +**Step 2: Run** + +```bash +GOWORK=off go test ./cmd/wfctl/ -run TestPluginDeployProvider_FallsBackToModuleNameWhenNoEnv -v +``` +Expected: PASS. + +**Step 3: Commit** + +```bash +git add cmd/wfctl/deploy_providers_env_test.go +git commit -m "test(wfctl): verify pluginDeployProvider falls back to module name with no env" +``` + +--- + +## Task 4: Audit infra_output source module-name resolution (task #56) + +**Files:** +- Modify: `cmd/wfctl/infra_secrets.go` (find and fix the source path parser) +- Test: `cmd/wfctl/infra_secrets_env_test.go` (new) + +**Step 1: Discover the current parser** + +```bash +grep -n 'infra_output\|infra output\|\.uri\b\|source.*\.' cmd/wfctl/infra_secrets.go | head -20 +``` + +Identify the function that parses `bmw-database.uri` (the format is `.`). Likely named `parseInfraOutputSource` or similar, or inlined in the secret-generation loop. + +**Step 2: Write the failing test** + +```go +// cmd/wfctl/infra_secrets_env_test.go (new) +package main + +import ( + "testing" + "github.com/GoCodeAlone/workflow/config" +) + +func TestInfraOutput_EnvResolvesModuleSource(t *testing.T) { + // Staging env renames bmw-database → bmw-staging-db. State is keyed by + // env-resolved name. Secret generation reads source "bmw-database.uri" + // which must resolve to "bmw-staging-db" for the state lookup to succeed. + + wfCfg := &config.WorkflowConfig{ + Modules: []config.ModuleConfig{ + { + Name: "bmw-database", + Type: "infra.database", + Config: map[string]any{"provider": "do-provider"}, + Environments: map[string]config.EnvOverride{ + "staging": {Config: map[string]any{"name": "bmw-staging-db"}}, + }, + }, + }, + Secrets: &config.SecretsConfig{ + Generate: []config.SecretGenerateEntry{ + { + Key: "STAGING_DATABASE_URL", + Type: "infra_output", + Source: "bmw-database.uri", + }, + }, + }, + } + + // Simulate pre-populated state with env-resolved name. + fakeState := map[string]interfaces.ResourceState{ + "bmw-staging-db": { + ID: "bmw-staging-db", + Name: "bmw-staging-db", + Outputs: map[string]any{"uri": "postgresql://test"}, + }, + } + + // Function under test: resolve infra_output source with envName. + // The function must transform "bmw-database.uri" → lookup "bmw-staging-db" → "uri" field. + val, err := resolveInfraOutput(wfCfg, "bmw-database.uri", "staging", fakeState) + if err != nil { + t.Fatalf("resolveInfraOutput: %v", err) + } + if val != "postgresql://test" { + t.Errorf("got %q, want %q (state lookup via env-resolved name)", val, "postgresql://test") + } +} +``` + +Adjust function name to match the actual existing function. If the function is tightly coupled to a larger codepath, extract a small helper and test that. + +**Step 3: Run to verify failure** + +```bash +GOWORK=off go test ./cmd/wfctl/ -run TestInfraOutput_EnvResolvesModuleSource -v +``` +Expected: FAIL (function returns "module bmw-database not found in state"). + +**Step 4: Fix the source parser** + +In `infra_secrets.go`, where the `.` string is parsed, after extracting the module name apply env resolution: + +```go +// Before (buggy): +moduleName := parts[0] // e.g., "bmw-database" +outputField := parts[1] +state, ok := stateMap[moduleName] // FAILS — state has "bmw-staging-db" + +// After (fixed): +moduleName := parts[0] +outputField := parts[1] + +// Apply env resolution to the module name so state lookup matches +// how infra apply persisted the resource. +if envName != "" { + for i := range wfCfg.Modules { + m := &wfCfg.Modules[i] + if m.Name != moduleName { + continue + } + if resolved, ok := m.ResolveForEnv(envName); ok { + moduleName = resolved.Name + break + } + } +} +state, ok := stateMap[moduleName] +``` + +Or, if a helper exists (`config.ResolveModuleName(wfCfg, name, envName) string`), use it. If not, consider introducing it in a later refactor — for v0.18.9 keep the inline version. + +**Step 5: Run test — should PASS** + +```bash +GOWORK=off go test ./cmd/wfctl/ -run TestInfraOutput_EnvResolvesModuleSource -v +``` + +**Step 6: Run full wfctl test suite** + +```bash +GOWORK=off go test ./cmd/wfctl/... -v +``` +Expected: all tests pass. + +**Step 7: Commit** + +```bash +git add cmd/wfctl/infra_secrets.go cmd/wfctl/infra_secrets_env_test.go +git commit -m "fix(wfctl): infra_output source module name flows through env resolution + +Closes task #56. Matches the v0.18.7 fix pattern: state lookup uses +env-resolved module name (bmw-staging-db) not base name (bmw-database) +when --env is set." +``` + +--- + +## Task 5: Update CHANGELOG + +**Files:** +- Modify: `CHANGELOG.md` — add v0.18.9 entry at the top + +**Step 1: Add CHANGELOG entry** + +At the top of `CHANGELOG.md`, between the existing title/intro and the v0.18.8 section, insert: + +```markdown +## [0.18.9] - 2026-04-24 + +### Fixed + +- **`wfctl ci run --phase deploy` now uses env-resolved module name** — `pluginDeployProvider.resourceName` was populated from `m.Name` (base config name) instead of `ResolvedModule.Name` (env-override lifted from `Config["name"]`). When infra apply had env-renamed a module (e.g. BMW's `bmw-app` → `bmw-staging` for staging), the deploy phase used the base name for `driver.Read` lookup, didn't find the resource, and went down the Create path — producing duplicate DO resources with conflicting names. Same class as v0.18.7 Task #32 fix, but in the ci-run code path. BMW deploy run 24888583717 is the regression case. +- **`infra_output` source resolution now applies env override to module name** — `secrets.generate[].source: "bmw-database.uri"` now resolves to the env-resolved state key (e.g. `bmw-staging-db`) when `--env staging` is set, matching how infra apply persists state. Closes task #56. + +### Tests + +- `cmd/wfctl/deploy_providers_env_test.go` — `TestPluginDeployProvider_UsesEnvResolvedName`, `TestPluginDeployProvider_FallsBackToModuleNameWhenNoEnv` +- `cmd/wfctl/infra_secrets_env_test.go` — `TestInfraOutput_EnvResolvesModuleSource` +``` + +**Step 2: Commit** + +```bash +git add CHANGELOG.md +git commit -m "docs: CHANGELOG v0.18.9 entry" +``` + +--- + +## Task 6: Open PR + +**Step 1: Push branch** + +```bash +git push -u origin fix/v0.18.9-phase-continuation +``` + +**Step 2: Open PR on workflow repo** + +```bash +gh pr create --repo GoCodeAlone/workflow --title "fix(wfctl): v0.18.9 phase-continuation — env-resolution consistency" --body "$(cat <<'EOF' +## Summary + +Closes the class of bugs where wfctl's env-resolution is applied in some +code paths but not others, producing different identities for the same +logical resource. + +**Surfaced by:** BMW deploy run 24888583717 — `wfctl infra apply` created +DO app `bmw-staging` (env-resolved), then `wfctl ci run --phase deploy` +immediately created a SECOND DO app `bmw-app` (base name), because the +deploy path at `cmd/wfctl/deploy_providers.go:769` read `m.Name` directly +instead of `ResolvedModule.Name`. + +**Fixes in this PR:** + +- Refactored `resolveModCfg` closure in deploy_providers.go to return + `*config.ResolvedModule`; call sites now read `resolved.Name` everywhere. +- Same pattern applied to `infra_secrets.go` `infra_output` source parser + (task #56). +- 3 regression tests gating both fixes. + +## Test plan + +- [x] `GOWORK=off go test ./cmd/wfctl/...` passes +- [x] New regression tests fail on main, pass on this branch +- [ ] BMW setup-wfctl bumps to v0.18.9; teardown + redeploy +- [ ] BMW staging /healthz 200 confirmed post-deploy + +## Reference + +Design: docs/plans/2026-04-24-wfctl-phase-continuation-design.md +Plan: docs/plans/2026-04-24-wfctl-phase-continuation.md + +🤖 Generated with [Claude Code](https://claude.com/claude-code) +EOF +)" +``` + +**Step 3: Request Copilot review** + +```bash +gh pr edit --repo GoCodeAlone/workflow --add-reviewer copilot-pull-request-reviewer +``` + +**Step 4: DM team-lead when CI green + Copilot window elapsed for merge decision.** + +Standard flow: LOCAL review with code-reviewer → push → Copilot → merge. If reviewer finds issues, iterate. + +--- + +## Task 7 (team-lead): Admin-merge + tag v0.18.9 + +After PR is green + Copilot cleared: + +**Step 1: Admin-merge** + +```bash +gh pr merge --repo GoCodeAlone/workflow --admin --squash \ + --subject "fix(wfctl): v0.18.9 phase-continuation — env-resolution consistency (#)" +``` + +**Step 2: Get the merge commit SHA** + +```bash +gh pr view --repo GoCodeAlone/workflow --json mergeCommit --jq '.mergeCommit.oid' +``` + +**Step 3: Tag v0.18.9** + +```bash +gh api repos/GoCodeAlone/workflow/git/refs --method POST \ + -f ref='refs/tags/v0.18.9' \ + -f sha='' +``` + +Release workflow fires on tag push, produces setup-wfctl v0.18.9 artifacts. + +--- + +## Task 8 (impl-bmw-2-2): BMW bump to v0.18.9 + +After v0.18.9 release artifacts publish: + +**Files:** +- Modify: `.github/workflows/deploy.yml` — all 3 job sites with `setup-wfctl@v1 with: version: v0.18.8` → `v0.18.9` + +**Step 1: Create branch + edit** + +```bash +cd /Users/jon/workspace/buymywishlist +git checkout -b chore/bump-setup-wfctl-v0.18.9 +# edit deploy.yml — 3 occurrences of v0.18.8 → v0.18.9 +``` + +**Step 2: LOCAL review with code-reviewer, push, open PR** + +```bash +git push -u origin chore/bump-setup-wfctl-v0.18.9 +gh pr create --repo GoCodeAlone/buymywishlist --title "chore: bump setup-wfctl v0.18.8 → v0.18.9 (phase-continuation fix)" +``` + +**Step 3: DM team-lead for fast-track merge on CI green.** + +--- + +## Task 9 (team-lead): Teardown + redeploy + +After BMW bump PR merges: + +**Step 1: Trigger teardown-staging (corrected patterns + state wipe from PRs #178/#179)** + +```bash +gh workflow run teardown-staging.yml --repo GoCodeAlone/buymywishlist -f confirm=yes +``` + +**Step 2: Wait for teardown to complete.** + +**Step 3: Deploy auto-fires from CI on main.** + +Expected end-to-end: +- infra apply: 4 CREATE actions, all succeed +- secrets.generate: STAGING_DATABASE_URL captured via env-resolved `bmw-staging-db` ✓ +- ci run deploy: `driver.Read(bmw-staging)` finds existing app from infra apply → UPDATE path (not CREATE) +- staging /healthz → 200 (Phase F closes; task #25) +- auto-promote to prod → prod /healthz → 200 (Phase G closes; task #26) + +**DM team-lead when staging /healthz is 200 confirmed.** + +--- + +## Success criteria + +- All regression tests pass +- PR merges, v0.18.9 tag published +- BMW bump PR merges +- Teardown clears duplicate apps + state +- Post-deploy: staging /healthz 200, prod /healthz 200 +- No new duplicate resources created by env-resolution mismatch +- Task #60 closes; task #56 closes +- Task #25 (Phase F) closes; task #26 (Phase G) closes