diff --git a/.gitignore b/.gitignore index 46d3bbb8..5f25198b 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,5 @@ benchstat-output.txt .worktrees cmd/wfctl/.wfctl.yaml cmd/wfctl/.wfctl-lock.yaml + +.claude/ diff --git a/cmd/wfctl/infra_apply_in_process_test.go b/cmd/wfctl/infra_apply_in_process_test.go index d45e95fd..f029eb79 100644 --- a/cmd/wfctl/infra_apply_in_process_test.go +++ b/cmd/wfctl/infra_apply_in_process_test.go @@ -74,7 +74,7 @@ func TestApply_InProcess_PlanStaleDiagnostic_NamesChangedKeys(t *testing.T) { plan := &interfaces.IaCPlan{ InputSnapshot: map[string]string{varName: planFP}, } - result, err := wfctlhelpers.ApplyPlan(context.Background(), inProcessFakeProvider{}, plan) + result, err := wfctlhelpers.ApplyPlanWithHooks(context.Background(), inProcessFakeProvider{}, plan, wfctlhelpers.ApplyPlanHooks{}) if err != nil { t.Fatalf("ApplyPlan returned top-level error: %v", err) } diff --git a/decisions/0040-v2-action-lifecycle-provider-compatibility.md b/decisions/0040-v2-action-lifecycle-provider-compatibility.md new file mode 100644 index 00000000..e70b05bd --- /dev/null +++ b/decisions/0040-v2-action-lifecycle-provider-compatibility.md @@ -0,0 +1,56 @@ +# 0040. V2 action lifecycle — provider compatibility expectations + Phase 2 hard-cutover + +**Status:** Accepted +**Date:** 2026-05-16 +**Decision-makers:** autonomous pipeline (cloud-sdk-bcd shutdown; new fresh team for Phase 2/3 execution per user mandate 2026-05-16), Jon (operator — direction 2026-05-16: "#640 is worth tracking as well") +**Related:** GoCodeAlone/workflow#640, PR #639 (v2 hooks engine landing), `docs/migrations/2026-05-16-v2-lifecycle-phase1-inventory.md`, `decisions/0024-iac-typed-force-cutover.md` (no-compat-shim mandate), `feedback_force_strict_contracts_no_compat` + +## Context + +PR #639 landed `wfctlhelpers.ApplyPlanWithHooks` — the v2 action lifecycle hook path. wfctl persists state at each successful cloud-mutation boundary instead of waiting for whole-plan completion. The pre-existing `wfctlhelpers.ApplyPlan` is preserved as `ApplyPlanWithHooks(ctx, p, plan, ApplyPlanHooks{})` (empty-hooks path) for backwards compatibility. + +#640 tracks the migration of all v1 callers to v2 + eventual removal of v1. Phase 1 (this ADR + inventory doc) defines the provider-side compatibility expectations + records the cutover constraint for Phase 2. + +**Critical Phase 1 finding (verified inline 2026-05-16):** 3 of 4 IaC plugins (workflow-plugin-aws / -gcp / -azure) do NOT use the iac-codemod canonical pattern (`return wfctlhelpers.ApplyPlan(ctx, p, plan)`). Only workflow-plugin-digitalocean does. The 3 non-canonical plugins implement their own `IaCProvider.Apply` loop, dispatching per-action via `provider.ResourceDriver(...)` directly. Phase 2 v2 contract design must accommodate both implementation paths. + +**Anti-pattern guard (per ADR 0024):** the strict-contracts cutover (`decisions/0024-iac-typed-force-cutover.md`) explicitly mandates "no compat shim, no build-tag dual-path" — graceful-fallback proto evolution would re-introduce the bug-class surface ADR 0024 forbids. + +## Decision + +**Adopt 5 normative provider-side compatibility expectations.** Plugins shipping Phase 2-conformant `IaCProviderRequiredServer.Apply` RPC MUST satisfy: + +1. **Per-action success evidence.** `ApplyResponse` MUST include per-action outcome (success / error per `PlanAction`), not aggregated whole-plan result. Wire format: extend `ApplyResponse` proto with `repeated ActionResult actions`, where `ActionResult { uint32 action_index; ActionStatus status; map output_keys; string error; }`. Required to fire wfctl-side `OnResourceApplied` hooks at correct boundary. + +2. **Failed-delete preservation.** Apply MUST flag failed-delete actions distinctly (e.g., `ActionStatus = DELETE_FAILED`) so wfctl-side `OnResourceDeleted` hook does NOT fire and state is preserved. Today this is detected via `ApplyResult.Errors` array with no per-action granularity — invariant requires action-level tagging. + +3. **Compensation evidence.** Apply MUST include the compensation outcome when create/replace persistence/routing fails — wfctl needs to know whether the cloud-side resource was successfully torn down (state DOESN'T leak) or whether compensation itself failed (state SHOULD be preserved with operator alert). Today this is opaque. + +4. **Update-failure non-deletion.** Plugins MUST NOT pre-emptively delete existing managed resources on update failure. Engine-side already enforces this (per #639); the contract requires plugins to confirm they don't override with custom cleanup. + +5. **ResourceReplacer advertisement.** Plugin manifest MUST advertise `ResourceReplacer` interface usage at the resource-type level so wfctl pre-mutation can decide whether to abort (delete-hook-active case) or allow (no-hook case). Today wfctl finds out at dispatch time, which is too late for safe rejection. + +**Phase 2 ships as a coordinated PR cascade per ADR 0024.** No compat shim, no graceful proto fallback. Workflow + workflow-plugin-aws/gcp/azure/digitalocean all ship Phase 2-conformant `ApplyResponse` shape simultaneously. Plugins on workflow ≥ Phase-2-tag receive the new contract; plugins on older workflow tags are NOT supported (operator upgrades workflow + all 4 plugins together). + +**Phase 3 plugin migration is bifurcated.** workflow-plugin-digitalocean (canonical-form delegate) gets codemod-driven rewrite once iac-codemod's `applyCanonicalCallExpr` constant + `rewriteApplyBody` + `isAlreadyDelegatedApplyBody` + `runAssertApplyDelegatesToHelper` AST functions are updated in lockstep (Phase 3 work). The other 3 plugins (aws/gcp/azure) need MANUAL migration since their custom Apply loops are not codemod-rewritable. + +## Consequences + +- **Phase 2 design pass MUST cite this ADR as a constraint.** The Phase 2 writing-plans agent MUST read ADR 0040 first and reject any compat-shim or graceful-fallback approach. Recorded explicitly here so the consequence survives team rotation per user mandate "recreate your agent team for each task." + +- **Phase 2 release coordination cost is real.** Same shape as the strict-contracts cutover (referenced in `feedback_force_strict_contracts_no_compat`): 5 repos must release in coordination, each pinned to the same workflow tag. Operator workflow during the cutover: install/upgrade workflow Phase-2 tag → install/upgrade aws+gcp+azure+DO Phase-2 tags simultaneously. + +- **Manifest validation gap on `cmd/wfctl/deploy_providers.go::findIaCPluginDir`** (per Phase 1 design Assumption 8) MUST be addressed in Phase 2 scope. `dispatch.go`'s own warning ("DO NOT rely on the manifest-validation guarantee in callers") means a typo in plugin's manifest `computePlanVersion` field SILENTLY falls to v1 dispatch. Phase 2 adds runtime validation. + +- **Phase 3 plugin migration is bifurcated** (codemod for DO, manual for aws/gcp/azure). Phase 3 design must split into 4 sub-plans, not assume uniform codemod-fix-it run. + +- **Cost of the per-action ActionResult proto field** is small (additive proto change) but the COORDINATION cost (5 repos simultaneous release) is large. Prior cloud-SDK extraction + plugin sweep precedents (this session) show the team can execute coordinated multi-repo cascades in a single autonomous pipeline run. + +- **Phase 4 conformance scenario migration SHIPPED in the same PR as this ADR** (4 call sites total: 3 conformance scenarios + 1 test file in cmd/wfctl). Folded into Phase 1 because staticcheck SA1019 (from the new godoc Deprecated marker) required immediate migration. Removes the Phase 5 prerequisite gating. + +- **Phase 5 ApplyPlan removal** now gates ONLY on Phase 2 + Phase 3 (since Phase 4 shipped with this PR). Plugin canonical-form propagation is the remaining blocker: DO already canonical; aws/gcp/azure must migrate to either canonical OR Phase-2-direct path. + +## Alternatives rejected + +- **Graceful proto-evolution fallback** — DIRECTLY contradicts ADR 0024. Would silently degrade plugin Apply outcomes to "all succeeded" when extended fields absent, exact failure mode ADR 0024 forbids. +- **Per-plugin opt-in via manifest flag** — re-introduces the dual-path bug class. ADR 0024 explicitly rejects build-tag/feature-flag dual-paths. +- **Single codemod-fix-it run across all 4 plugins** — would silently rewrite aws/gcp/azure custom Apply loops INCORRECTLY (codemod assumes canonical pattern). Phase 1 finding rules this out. diff --git a/docs/migrations/2026-05-16-v2-lifecycle-phase1-inventory.md b/docs/migrations/2026-05-16-v2-lifecycle-phase1-inventory.md new file mode 100644 index 00000000..0db2ef9e --- /dev/null +++ b/docs/migrations/2026-05-16-v2-lifecycle-phase1-inventory.md @@ -0,0 +1,89 @@ +# V2 Action Lifecycle — Phase 1 Inventory + +**Status:** Phase 1 complete 2026-05-16 +**Tracking issue:** GoCodeAlone/workflow#640 +**Phase 1 design:** `docs/plans/2026-05-16-v2-lifecycle-phase1-design.md` +**ADR:** `decisions/0040-v2-action-lifecycle-provider-compatibility.md` + +## Background + +PR #639 landed the v2 action lifecycle hook path (`wfctlhelpers.ApplyPlanWithHooks`) — wfctl can persist state at each successful cloud-mutation boundary instead of waiting for whole-plan completion. The pre-existing `wfctlhelpers.ApplyPlan` is preserved for backwards compatibility but is now legacy debt. + +#640 tracks the multi-phase migration: +- **Phase 1 (this document) — SHIPPED**: inventory + provider-compatibility ADR + Deprecated marker +- **Phase 2 — pending**: design + ship v2-hooks-over-gRPC contract (HARD-CUTOVER per ADR 0024) +- **Phase 3 — pending**: migrate plugins to v2 contract (per-plugin manual + codemod for canonical-form plugins) +- ~~Phase 4~~ — **SHIPPED in this PR**: migrated 3 conformance scenarios + 1 test file from `ApplyPlan` to `ApplyPlanWithHooks(..., ApplyPlanHooks{})`. Folded into Phase 1 because staticcheck SA1019 (from the new godoc marker) required it +- **Phase 5 — pending**: remove `wfctlhelpers.ApplyPlan` entirely (after Phase 2 + Phase 3 land) + +## Workflow-side caller inventory + +### v1 `wfctlhelpers.ApplyPlan` callers (production) + +| File:Line | Classification | Notes | +|-----------|----------------|-------| +| `iac/conformance/scenario_upsert_on_already_exists.go:88` | MIGRATED in this PR (Phase 4 folded) | Renamed to `ApplyPlanWithHooks(..., ApplyPlanHooks{})`. Empty-hooks form has zero semantic difference. | +| `iac/conformance/scenario_delete_action.go:74` | MIGRATED in this PR | Same | +| `iac/conformance/scenario_replace_cascade_preserves_dependents.go:92` | MIGRATED in this PR | Same | +| `cmd/wfctl/infra_apply_in_process_test.go:77` | MIGRATED in this PR (was missing from initial inventory; surfaced by staticcheck SA1019) | Same | + +### iac-codemod tool references (NOT runtime callers) + +| File:Line | Notes | +|-----------|-------| +| `cmd/iac-codemod/refactor_apply.go:29` (`applyCanonicalCallExpr` constant, `//nolint:unused`) | **Documentation-only constant; NOT consumed by AST rewriter.** Phase 3 lockstep update bumps this constant TOGETHER with `rewriteApplyBody` (line 1231 hardcoded `ast.NewIdent("ApplyPlan")`) + `isAlreadyDelegatedApplyBody` (line 630 hardcoded `sel.Sel.Name != "ApplyPlan"`) + `runAssertApplyDelegatesToHelper` + `refactor_apply_test.go:593`. Phase 1 does NOT touch this — bumping just the constant would create internal inconsistency. | +| `cmd/iac-codemod/refactor_apply.go:1208` (doc comment) | Same Phase 3 lockstep update | +| `cmd/iac-codemod/lint.go:54` (comment) + `lint.go:641` (matcher consumer) | Same | + +### v1 `provider.Apply(ctx, &plan)` direct callers (workflow side, NOT through wfctlhelpers) + +| File:Line | Classification | Notes | +|-----------|----------------|-------| +| `cmd/wfctl/infra_apply.go:486` | MIGRATE-NEEDED (Phase 2 — v1 dispatch path removal) | `else` branch sister to L473 `applyV2ApplyPlanWithHooksFn`. Selected by `DispatchVersionFor(provider) == DispatchVersionV2` predicate. v2 path uses `provider.ResourceDriver(action.Resource.Type)` per action — NOT `provider.Apply`. Phase 2 removes this v1 path entirely after all 4 plugins ship Phase 2-conformant Apply RPC + manifest declaration. | +| `cmd/wfctl/infra_apply.go:1615` | MIGRATE-NEEDED (Phase 2) | Same — second occurrence (likely refresh path). | +| `cmd/wfctl/iac_typed_adapter.go:350` | NOT IN V2 HOT PATH (Phase 2 wire-format work) | `typedIaCAdapter.Apply` is called ONLY on the v1 dispatch path. Per-action wire format change happens in `applyResultFromPB` (the typed adapter's response decoder) when Phase 2 extends `ApplyResponse` proto with per-action `Actions []ActionResult` field. Adapter dispatch shape doesn't need to change; response decoder does. | + +## Plugin-side IaCProvider.Apply implementation inventory (verified 2026-05-16 via `gh api`) + +| Plugin | File:Line | Pattern | Phase 3 path | +|--------|-----------|---------|--------------| +| workflow-plugin-aws | `provider/provider.go:237` `AWSProvider.Apply` | **NON-CANONICAL** — own loop with `p.mu.RLock` + init check + custom dispatch | **MIGRATE-NEEDED (Phase 3 MANUAL)** — codemod cannot rewrite | +| workflow-plugin-gcp | `provider/provider.go:226` `GCPProvider.Apply` | **NON-CANONICAL** — own `for _, action := range plan.Actions` with `p.ResourceDriver(action.Resource.Type)` per-action | **MIGRATE-NEEDED (Phase 3 MANUAL)** — codemod cannot rewrite | +| workflow-plugin-azure | `internal/provider.go:138` `AzureProvider.Apply` | **NON-CANONICAL** — own loop with `p.mu.RLock` | **MIGRATE-NEEDED (Phase 3 MANUAL)** — codemod cannot rewrite | +| workflow-plugin-digitalocean | `internal/provider.go:274-275` `DOProvider.Apply` | **CANONICAL** — `result, err := wfctlhelpers.ApplyPlan(ctx, p, plan)` delegate (with custom post-flush wrapper) | LEAVE-AS-IS for Phase 1; Phase 3 codemod CAN rewrite (after AST functions updated in lockstep with constant) | + +## Major architectural finding + +**3 of 4 IaC plugins do NOT use the iac-codemod canonical pattern (`return wfctlhelpers.ApplyPlan(ctx, p, plan)`).** The canonical-form constant in `cmd/iac-codemod/refactor_apply.go:29` is aspirational, not reality. + +Phase 2 + Phase 3 implications: +1. **Phase 3 is NOT a single codemod-fix-it run.** 3 plugins need MANUAL migration; 1 plugin (DO) can be codemod-rewritten. +2. **Phase 2 v2 hooks contract design** must accommodate two plugin implementation paths: + - **(a) Canonical delegate**: `provider.Apply` → `wfctlhelpers.ApplyPlan(ctx, p, plan)` → `applyPlanWithEnvProviderAndHooks(ctx, p, plan, nil, hooks)` — wfctl-side hooks fire automatically at each `dispatchAction` boundary + - **(b) Custom loop**: `provider.Apply` runs its own `for _, action := range plan.Actions` loop, calling `p.ResourceDriver(action.Resource.Type)` per-action. The plugin must EMIT per-action outcome via the Phase 2 extended `ApplyResponse` proto so wfctl-side reconstructs the hook events +3. **Phase 2 contract MUST be a hard-cutover per ADR 0024.** No graceful proto fallback — workflow + 4 plugin repos ship the new ApplyResponse shape simultaneously, same coordination pattern as the strict-contracts cutover. + +## Provider compatibility expectations (Phase 2 contract preview) + +Per `decisions/0040-v2-action-lifecycle-provider-compatibility.md`, plugins MUST satisfy these 5 invariants at the IaCProviderRequiredServer.Apply RPC boundary: + +1. **Per-action success evidence** — ApplyResponse MUST include per-action outcome (success/error per `PlanAction`) +2. **Failed-delete preservation** — Apply MUST flag failed-delete actions distinctly so wfctl `OnResourceDeleted` does NOT fire +3. **Compensation evidence** — Apply MUST include compensation outcome when create/replace persistence/routing fails +4. **Update-failure non-deletion** — Plugins MUST NOT pre-emptively delete on update failure (engine-side enforced; plugin must not override) +5. **ResourceReplacer advertisement** — Plugin manifest MUST advertise ResourceReplacer usage so wfctl pre-mutation gates correctly + +## Out of scope for Phase 1 + +- Phase 2 gRPC contract design + implementation — separate design pass +- Phase 3 plugin migration — separate per-plugin design + execution passes +- Phase 4 conformance scenario migration — separate trivial PR +- Phase 5 ApplyPlan removal — gates on Phase 4 completion + plugin canonical-form propagation + +## References + +- ADR 0024 (`decisions/0024-iac-typed-force-cutover.md`) — strict-contracts cutover precedent (no compat shim) +- ADR 0040 (`decisions/0040-v2-action-lifecycle-provider-compatibility.md`) — provider-side compatibility contract +- PR #639 — v2 hooks engine landing +- `iac/wfctlhelpers/apply.go` — engine source; package-level doc comment now embeds the v1/v2 contract pointer; `ApplyPlan` symbol carries `// Deprecated:` marker per Phase 1 +- Prior product design: `docs/plans/2026-04-25-wfctl-lifecycle-product-design.md` diff --git a/docs/plans/2026-05-16-v2-lifecycle-phase1-design.md b/docs/plans/2026-05-16-v2-lifecycle-phase1-design.md new file mode 100644 index 00000000..906fd72c --- /dev/null +++ b/docs/plans/2026-05-16-v2-lifecycle-phase1-design.md @@ -0,0 +1,172 @@ +# V2 Action Lifecycle — Phase 1 Inventory + Provider Compatibility Expectations — Design + +**Status:** Draft +**Date:** 2026-05-16 +**Operator:** Jon (autonomous-mode mandate 2026-05-16: "continue with follow-ups, you'll probably need a new brainstorm/design pass before implementation to ensure the accuracy of your plans. continue autonomously" + "#640 is worth tracking as well") +**Tracking issue:** GoCodeAlone/workflow#640 +**Related:** PR #639 (v2 hook path landed), `iac/wfctlhelpers/apply.go` (engine), `interfaces/iac.go` (IaCProvider), `docs/plans/2026-04-25-wfctl-lifecycle-product-design.md` (prior product design) + +## Goal + +Phase 1 of #640. Produces an analysis-and-deprecation-signal deliverable: +1. Inventories every v1 ApplyPlan caller across workflow + all IaC plugins (4 repos: aws/gcp/azure/DO) — verified via per-repo grep at Task 0 (NOT speculative). +2. Classifies each caller as MIGRATE-NEEDED (must adopt v2 hooks for #640's invariants) or LEAVE-AS-IS (no semantic difference; trivial empty-hooks rename suffices). +3. Defines provider compatibility expectations for v2 — what plugins must do at the gRPC IaCProvider.Apply boundary so wfctl-side hooks fire correctly. +4. Lands ADR 0040 recording the per-caller classification, the provider-side expectation contract, **and the consequence that Phase 2 is a coordinated hard-cutover per ADR 0024 (no compat-shim path).** +5. **Adds `// Deprecated: use ApplyPlanWithHooks` godoc marker to the `iac/wfctlhelpers.ApplyPlan` symbol** (grep `^func ApplyPlan` in `iac/wfctlhelpers/apply.go`) — surfaced by `gopls`/`staticcheck` to every caller; mechanically delivers #640's "Add deprecation warnings" milestone. + +**Phase 4 (3 conformance scenarios + 1 test file) FOLDED INTO THIS PR** — staticcheck SA1019 fired on 4 callers when the godoc marker landed; rather than ship a separate trivial-rename PR, the 4 single-line `ApplyPlan(...)` → `ApplyPlanWithHooks(..., ApplyPlanHooks{})` migrations are committed here. + +**Explicitly out of scope for Phase 1** (deferred to Phase 2-5 design passes): +- Phase 2: design + ship the v2-hooks-over-gRPC contract — must be a HARD-CUTOVER coordinated PR cascade across workflow + 4 plugin repos per ADR 0024 (no compat shim, no graceful fallback) +- Phase 3: migrate plugins to emit hooks-aware Apply responses (per-repo PRs). **Phase 3 also bumps `iac-codemod`'s `applyCanonicalCallExpr` constant + updates `rewriteApplyBody` + `isAlreadyDelegatedApplyBody` + `runAssertApplyDelegatesToHelper` AST functions in lockstep** so the codemod becomes the migration driver. (Cycle-2 review correctly flagged that bumping ONLY the constant in Phase 1 is theatre — constant is `//nolint:unused`, not consumed by rewriter; risks leaving codemod internally inconsistent.) +- ~~Phase 4~~: SHIPPED in this PR — see Phase 1 scope item 5 above (staticcheck SA1019 forced the migration when the godoc marker landed; folded in to avoid a trivial separate PR) +- Phase 5: remove `wfctlhelpers.ApplyPlan` entirely (after Phase 2 + Phase 3 plugin canonical-form propagation; Phase 4 already shipped) + +Phase 1 ships engine docs + the deprecation marker. Light mechanical changes; no runtime behavior change. + +## Phase 2 constraint reference + +ADR 0040 (landed by this Phase 1 PR) explicitly records that Phase 2 ships as a coordinated PR cascade — same shape as the original strict-contracts cutover. The Phase 2 design pass MUST cite ADR 0040 as a constraint and reject any compat-shim or graceful-fallback approach. Phase 2 writing-plans agent should read ADR 0040 first. + +## Architecture + +Single-repo (workflow), 1-PR deliverable. Four artifacts: +- `docs/migrations/2026-05-16-v2-lifecycle-phase1-inventory.md` — the inventory document (per-caller table + classification rationale) +- `decisions/0040-v2-action-lifecycle-provider-compatibility.md` — ADR recording: (a) the provider-side compatibility contract; (b) the explicit consequence that Phase 2 is a coordinated hard-cutover per ADR 0024 +- `iac/wfctlhelpers/doc.go` — Go doc.go file embedding a SHORT version of the inventory + provider-expectation pointer to the migration doc +- **`iac/wfctlhelpers.ApplyPlan` symbol — add `// Deprecated: use ApplyPlanWithHooks` godoc** (grep `^func ApplyPlan` in `iac/wfctlhelpers/apply.go`). The EXPORTED `ApplyPlan` function only — `applyPlanWithEnvProvider` + `applyPlanWithEnvProviderAndHooks` helpers are unexported and `gopls`/`staticcheck` ignore Deprecated markers on unexported identifiers. + +Production code changes are MINIMAL: 1 godoc comment line in `apply.go`. Build clean. No iac-codemod changes in Phase 1 — those move to Phase 3 in lockstep with the AST-rewriter updates that actually consume the constant. + +No runtime behavior change. The deprecation marker surfaces through static analysis to every caller of `wfctlhelpers.ApplyPlan` (the 4 callers migrated in this PR + future regressions), giving immediate signal to any future caller that adds a v1 ApplyPlan call. + +## Inventory (preliminary — verified during Task 1) + +**v1 ApplyPlan callers in workflow (origin/main since c68a56cc):** + +| # | File:Line | Classification | Rationale | +|---|-----------|----------------|-----------| +| 1 | `cmd/iac-codemod/refactor_apply.go:29` (`applyCanonicalCallExpr` constant, `//nolint:unused`) | TOOL — Phase 3 lockstep update | Documentation-only constant; not consumed by AST rewriter. Phase 3 bumps it together with `rewriteApplyBody` (line 1231 hardcoded `ast.NewIdent("ApplyPlan")`) + `isAlreadyDelegatedApplyBody` (line 630 hardcoded `sel.Sel.Name != "ApplyPlan"`) + `runAssertApplyDelegatesToHelper` + test assertions (`refactor_apply_test.go:593`). Phase 1 does NOT touch this — bumping just the constant would create internal inconsistency. | +| 2 | `cmd/iac-codemod/refactor_apply.go:1208` (doc comment) | DOC | Same Phase 3 lockstep update | +| 3 | `cmd/iac-codemod/lint.go:54+641` (comment + matcher) | TOOL — Phase 3 lockstep | Same | +| 4 | `iac/conformance/scenario_upsert_on_already_exists.go:88` | MIGRATED in this PR (Phase 4 folded) | Renamed to `ApplyPlanWithHooks(..., ApplyPlanHooks{})`. Empty-hooks form has zero semantic difference. | +| 5 | `iac/conformance/scenario_delete_action.go:74` | MIGRATED in this PR | Same | +| 6 | `iac/conformance/scenario_replace_cascade_preserves_dependents.go:92` | MIGRATED in this PR | Same | +| 6b | `cmd/wfctl/infra_apply_in_process_test.go:77` | MIGRATED in this PR (was missing from initial inventory; surfaced by staticcheck SA1019) | Same | + +**v1 callers in workflow via `provider.Apply` (NOT wfctlhelpers.ApplyPlan):** + +| # | File:Line | Classification | Rationale | +|---|-----------|----------------|-----------| +| 7 | `cmd/wfctl/infra_apply.go:486` | MIGRATE-NEEDED (Phase 2 — v1 dispatch path) | wfctl's v1 dispatch path — calls `provider.Apply(ctx, &plan)` directly. Sister `else` branch to L473 (v2 path uses `applyV2ApplyPlanWithHooksFn` which internally dispatches via `provider.ResourceDriver(action.Resource.Type)` per action — NOT through `provider.Apply`). Branch selection: `DispatchVersionFor(provider) == DispatchVersionV2` (or similar `dispatch.go` predicate). Phase 2 removes this v1 path entirely after all 4 plugins ship Phase 2-conformant Apply RPC + manifest declaration. | +| 8 | `cmd/wfctl/infra_apply.go:1615` | MIGRATE-NEEDED (Phase 2) | Same as above — second occurrence (likely refresh path). | +| 9 | `cmd/wfctl/iac_typed_adapter.go:350` | NOT IN V2 HOT PATH (Phase 2 wire-format work) | `typedIaCAdapter.Apply` is called ONLY on the v1 dispatch path (Item #7 above). Per-action wire format change happens in `applyResultFromPB` (the typed adapter's response decoder) when Phase 2 extends `ApplyResponse` proto with per-action `Actions []ActionResult` field. The adapter's Apply RPC dispatch shape doesn't need to change; the response decoder does. **Reclassified from cycle 1 to fix architectural misunderstanding** — v2 dispatch is per-resource via ResourceDriver, not per-plan via provider.Apply. | + +**v1 callers in IaC plugin repos (per ADR 0034 cross-repo inventory):** + +**Task 0 (COMPLETED 2026-05-16 inline during design — actual gh api results):** + +| # | Plugin | File:Line | Caller pattern | Classification | +|---|--------|-----------|----------------|----------------| +| 10 | workflow-plugin-aws | `provider/provider.go:237` `AWSProvider.Apply` | **NON-CANONICAL** — implements own loop with `p.mu.RLock`, init check, custom dispatch | **MIGRATE-NEEDED (Phase 3 MANUAL)** — codemod cannot rewrite; needs hand migration to v2 hooks contract | +| 11 | workflow-plugin-gcp | `provider/provider.go:226` `GCPProvider.Apply` | **NON-CANONICAL** — own `for _, action := range plan.Actions` loop with `p.ResourceDriver(action.Resource.Type)` per-action | **MIGRATE-NEEDED (Phase 3 MANUAL)** — codemod cannot rewrite; needs hand migration | +| 12 | workflow-plugin-azure | `internal/provider.go:138` `AzureProvider.Apply` | **NON-CANONICAL** — own loop with `p.mu.RLock` + custom dispatch | **MIGRATE-NEEDED (Phase 3 MANUAL)** — codemod cannot rewrite; needs hand migration | +| 13 | workflow-plugin-digitalocean | `internal/provider.go:274-275` `DOProvider.Apply` | **CANONICAL** — `result, err := wfctlhelpers.ApplyPlan(ctx, p, plan)` delegate (with custom post-flush wrapper) | LEAVE-AS-IS for Phase 1; Phase 3 codemod CAN rewrite (after AST functions updated in lockstep with constant) | + +**Major architectural finding:** **3 of 4 plugins do NOT use the canonical pattern.** iac-codemod's `applyCanonicalCallExpr` constant is aspirational, not reality. Phase 3 scope expands significantly: +- Phase 3 cannot be a single codemod-fix-it run across all 4 plugins +- 3 plugins need MANUAL migration to either canonical form OR (preferred) directly to v2 hooks contract +- Phase 2 v2 contract design must accommodate two implementation paths per provider: (a) delegating to `wfctlhelpers.ApplyPlanWithHooks` (DO pattern), (b) per-plugin custom loop with its own action-boundary surfacing (aws/gcp/azure pattern) + +This finding gets recorded in the inventory doc + ADR 0040 as a Phase 2 / Phase 3 input. It does NOT change Phase 1's deliverable shape — Phase 1 still ships 4 artifacts (inventory + ADR + doc.go + godoc marker). It DOES change the signaling: the inventory doc + ADR need to call out the 3 non-canonical plugins so future Phase 2/3 design passes don't assume codemod-driven uniform migration. + +## Provider compatibility expectations (Phase 1 ADR scope) + +Document what plugins MUST do at the IaCProviderRequiredServer.Apply boundary so wfctl-side v2 hooks fire correctly. This is the contract Phase 2 will implement. + +**Required behaviors (per #640's 5 invariants):** + +1. **Per-action success evidence:** Apply RPC response MUST include per-action outcome (success / error per `PlanAction`), not just whole-plan result. Today `ApplyResponse` likely returns `ApplyResult` with aggregated `Errors []` field — invariant #1 requires per-action granularity for the wfctl-side `OnResourceApplied` hook to fire correctly. Phase 2 contract decision: add `Actions []ActionResult` to `ApplyResponse` proto, where each ActionResult has `{action_index, status, output_keys, error?}`. + +2. **Failed-delete must NOT prune state:** Apply RPC response MUST flag failed-delete actions distinctly so wfctl's `OnResourceDeleted` hook does not fire (and state is preserved). Today this is detected via `ApplyResult.Errors` array but with no per-action granularity — invariant #2 requires action-level tagging. + +3. **Compensation evidence on create/replace failure:** Apply RPC response MUST include the COMPENSATION result when create/replace persistence/routing fails — wfctl needs to know whether the cloud-side resource was successfully torn down (so state DOESN'T leak) or whether compensation itself failed (so state SHOULD leak with operator alert). Today this is opaque. + +4. **Update-failure DOES NOT delete:** This is engine-side behavior (already enforced by ApplyPlanWithHooks per #639). Phase 2 contract just needs to confirm plugins don't override this behavior with their own pre-emptive cleanup. + +5. **Provider-owned-replace gating:** Provider's `ResourceReplacer` interface usage MUST be advertised in plugin manifest so wfctl can decide pre-mutation whether to abort (delete-hook-active case) or allow (no-hook case). Today provider just implements ResourceReplacer; wfctl finds out at dispatch time. + +**ADR 0040 records:** these 5 expectations are normative; Phase 2 implements; plugins ship updated `IaCProviderRequiredServer.Apply` per the contract. + +## Self-challenge round + cycle 1 adversarial findings (addressed in revision) + +**Cycle 1 adversarial-design-review findings — all addressed:** + +- **C-1 (Assumption 5 contradicts ADR 0024):** REVOKED Assumption 5; replaced with "Phase 2 is hard-cutover per ADR 0024" recorded as ADR 0040 consequence + scope expansion documented. +- **C-2 (typed_adapter.Apply misclassified):** Fixed Item #9 reclassification — typed_adapter.Apply is in v1 hot path only; v2 dispatch via ResourceDriver. Phase 2 wire-format work happens in `applyResultFromPB` decoder, not adapter dispatch. +- **I-1 (KEEP-ON-V1 conformance scenarios block Phase 5):** Reclassified all 3 scenarios as MIGRATE-NEEDED (Phase 4) — TRIVIAL. Empty-hooks rename. Phase 5 prerequisite documented. +- **I-2 (plugin inventory bootstrap problem):** Added Task 0 (PRE-PLAN-AUTHORING grep across 4 plugin repos) before ADR commits speculative file:line. Plugin rows show `(Task 0 result)` placeholders that the actual Task 0 work fills in. +- **I-3 (manifest-validation gap on deploy_providers path):** New Assumption 8 records the silent-v1-fallback risk; Phase 2 scope includes adding the validation gate. +- **Minor m-1 (ADR number gap):** Verified at execution: 0039 is highest; 0040 is correct. +- **Minor m-2 (KEEP-ON-V1 terminology confusion):** Term replaced by MIGRATE-NEEDED-TRIVIAL / LEAVE-AS-IS in the table. +- **Minor m-3 (4th doubt missing):** Added: "Phase 2 hard-cutover coordination cost" — surfaced in revoked-Assumption-5 + Phase 2 scope statement. + +**Top remaining doubts:** + +1. **Phase 1's mechanical scope (godoc Deprecated marker + iac-codemod constant bump) is correct only if the codemod's existing call sites in `cmd/iac-codemod/refactor_apply.go:1208` + `cmd/iac-codemod/lint.go:54+641` (doc/comment/lint refs) update consistently.** Risk: missing one of those leaves the codemod inconsistent. Mitigation: Task 2 (codemod constant bump) explicitly enumerates ALL `applyCanonicalCallExpr` references for the rewrite. + +2. **Phase 4 conformance scenario migration shipped in this PR (folded).** Decision changed mid-execution: staticcheck SA1019 (from the new godoc Deprecated marker) failed CI when the marker landed against the 4 still-on-v1 callers. Rather than ship a separate PR for 4 single-line renames, folded Phase 4 into Phase 1. + +3. **The `iac-codemod` canonical-form bump might break already-rewritten plugins** — if a plugin already shipped using the OLD canonical form (`wfctlhelpers.ApplyPlan(ctx, p, plan)`), running the new codemod against it would change the call to `wfctlhelpers.ApplyPlanWithHooks(ctx, p, plan, wfctlhelpers.ApplyPlanHooks{})`. That's actually the desired behavior — but the codemod must be idempotent (re-running against already-bumped code shouldn't keep adding ApplyPlanHooks{} args). Verification: Task 2 includes idempotency test. + +## Assumptions + +1. **iac-codemod's `applyCanonicalCallExpr` is the canonical pattern every plugin uses to delegate Apply to the engine.** Verified by grepping the constant value in `cmd/iac-codemod/refactor_apply.go:29`. If false, plugin-side migration may diverge per-plugin instead of being a single AST rewrite. + +2. **All 4 IaC plugins (aws/gcp/azure/DO) implement `IaCProvider.Apply` via the canonical delegation form.** Verified during Task 1 via per-repo grep. If a plugin has a non-canonical implementation, it gets called out in the inventory table + adds a Phase 2 sub-task. + +3. **`docs/plans/2026-04-25-wfctl-lifecycle-product-design.md` is the prior-product-design source of truth.** Phase 1 doc.go should cite it. If it's stale or contradicts #639's invariants, Phase 1 surfaces the conflict for resolution. + +4. **No external (non-GoCodeAlone-org) consumers depend on `wfctlhelpers.ApplyPlan` being exported.** This is a workflow-internal helper; external consumers go through `IaCProvider.Apply` (gRPC). If false, deprecation in Phase 5 needs cross-org coordination. + +5. **REVOKED — Phase 2 is a HARD-CUTOVER per ADR 0024.** Cycle-1 adversarial review correctly flagged: a "graceful proto fallback" approach is exactly the compat-shim ADR 0024 forbids ("preserves the bug-class surface"). The CONSEQUENCE recorded in ADR 0040: **Phase 2 ships as a coordinated PR cascade across workflow + 4 plugin repos, all in same release window** — same shape as the original strict-contracts cutover (referenced in `feedback_force_strict_contracts_no_compat`). No graceful degradation path. wfctl Phase 2 release must require all 4 plugins ship Phase 2-conformant Apply RPC simultaneously. This expands Phase 2 scope but eliminates the silent-fallback failure mode. + +6. **The 3 conformance scenarios using v1 ApplyPlan don't need state-persistence semantics.** Read of the scenario code in Task 1 confirms (or refutes) — if a scenario actually verifies hook-fired behavior implicitly, classification flips to MIGRATE-NEEDED. + +7. **`cmd/wfctl/infra_apply.go:486 + :1615` (provider.Apply direct calls) are the v1 dispatch dual to the v2 ApplyPlanWithHooks at L473 + L1557 — selected by `DispatchVersionFor(provider) == DispatchVersionV2`** (see `dispatch.go`). Verified during Task 0/1. + +8. **`computePlanVersion` manifest field has no runtime validation gate on the `cmd/wfctl/deploy_providers.go::findIaCPluginDir` path** — uses `json.Unmarshal` without schema validation. Per dispatch.go's own warning ("DO NOT rely on the manifest-validation guarantee in callers"), a typo in a plugin's manifest `computePlanVersion` field SILENTLY falls to v1 dispatch. Phase 2 scope must include adding manifest validation on the deploy_providers path to prevent silent-v1-fallback. Recorded in ADR 0040 as a Phase 2 prerequisite. + +## Tech Stack + +- Markdown (inventory doc + ADR) +- Go doc.go (in-package documentation embedding) +- No production code changes; no tests beyond `go build ./... && go vet ./...` + +## Base branch + +`main` (workflow repo only — Phase 1 is workflow-side docs-only) + +## Rollback + +Trivial: revert the docs PR. No runtime side effects. ADR 0040 deletion via PR (stays in git history per ADR convention but loses Accepted status). + +## Decisions to record + +ADR 0040 — "v2 action lifecycle provider compatibility expectations": +- Status: Accepted +- Context: PR #639 landed v2 hooks engine-side; #640 tracks migration; provider-side contract not yet defined +- Decision: 5 normative expectations (per-action success evidence, failed-delete-no-prune, compensation evidence, update-failure-no-delete, provider-owned-replace-advertised) +- Consequences: Phase 2 implements; plugins must ship updated Apply gRPC per contract; wfctl can route accordingly + +## Out of scope (intentional non-goals — separate future design passes) + +- Phase 2: design + ship `IaCProviderRequiredServer.Apply` proto extension carrying action-boundary outcomes +- Phase 3: migrate plugins to emit v2-aware Apply responses (per-repo PRs) +- ~~Phase 4: migrate the 3 conformance scenarios~~ — SHIPPED in this PR +- Phase 5: deprecate `wfctlhelpers.ApplyPlan` (log warning on call) → remove after compatibility window + +## Memory updates (post-Phase-1 execution) + +Update MEMORY.md / `project_cloud_sdk_extraction_complete.md` Deferred section: mark "#640 v2 action lifecycle Phase 1" COMPLETE; flag Phase 2 (gRPC contract extension) as the next #640 design pass. diff --git a/iac/conformance/scenario_delete_action.go b/iac/conformance/scenario_delete_action.go index ab86e12a..0284ced2 100644 --- a/iac/conformance/scenario_delete_action.go +++ b/iac/conformance/scenario_delete_action.go @@ -71,7 +71,7 @@ func scenarioDeleteActionInApplyInvokesDriverDelete(t *testing.T, cfg Config) { }, } - result, err := wfctlhelpers.ApplyPlan(context.Background(), p, plan) + result, err := wfctlhelpers.ApplyPlanWithHooks(context.Background(), p, plan, wfctlhelpers.ApplyPlanHooks{}) if err != nil { t.Fatalf("ApplyPlan returned top-level error: %v", err) } diff --git a/iac/conformance/scenario_replace_cascade_preserves_dependents.go b/iac/conformance/scenario_replace_cascade_preserves_dependents.go index f750137a..a4d57138 100644 --- a/iac/conformance/scenario_replace_cascade_preserves_dependents.go +++ b/iac/conformance/scenario_replace_cascade_preserves_dependents.go @@ -89,7 +89,7 @@ func scenarioReplaceCascadePreservesDependents(t *testing.T, cfg Config) { }, } - result, err := wfctlhelpers.ApplyPlan(context.Background(), p, plan) + result, err := wfctlhelpers.ApplyPlanWithHooks(context.Background(), p, plan, wfctlhelpers.ApplyPlanHooks{}) if err != nil { t.Fatalf("ApplyPlan returned top-level error: %v", err) } diff --git a/iac/conformance/scenario_upsert_on_already_exists.go b/iac/conformance/scenario_upsert_on_already_exists.go index d55caf3b..89900ae9 100644 --- a/iac/conformance/scenario_upsert_on_already_exists.go +++ b/iac/conformance/scenario_upsert_on_already_exists.go @@ -85,7 +85,7 @@ func scenarioUpsertOnAlreadyExists(t *testing.T, cfg Config) { }, } - result, err := wfctlhelpers.ApplyPlan(context.Background(), p, plan) + result, err := wfctlhelpers.ApplyPlanWithHooks(context.Background(), p, plan, wfctlhelpers.ApplyPlanHooks{}) if err != nil { t.Fatalf("ApplyPlan returned top-level error: %v", err) } diff --git a/iac/wfctlhelpers/apply.go b/iac/wfctlhelpers/apply.go index dc603dec..3ef7eb0c 100644 --- a/iac/wfctlhelpers/apply.go +++ b/iac/wfctlhelpers/apply.go @@ -1,9 +1,25 @@ // Package wfctlhelpers hosts the wfctl-side dispatch helper for v2 IaC -// plugins. wfctl calls [ApplyPlan] when a plugin manifest declares -// iacProvider.computePlanVersion: v2 (see plugin/sdk.IaCProvider). The -// helper iterates plan.Actions, fetches the matching ResourceDriver from -// the provider, and dispatches each action to a per-action sub-function -// (doCreate, doUpdate, doReplace, doDelete). +// plugins. wfctl calls [ApplyPlanWithHooks] (or the legacy [ApplyPlan]) when +// a plugin manifest declares iacProvider.computePlanVersion: v2 (see +// plugin/sdk.IaCProvider). The helper iterates plan.Actions, fetches the +// matching ResourceDriver from the provider, and dispatches each action to +// a per-action sub-function (doCreate, doUpdate, doReplace, doDelete). +// +// # Action lifecycle versions (workflow#640 migration) +// +// Two callers can drive plan execution: +// +// - [ApplyPlan] (legacy, marked Deprecated) — empty-hooks equivalent +// of [ApplyPlanWithHooks]. State persistence happens at whole-plan +// completion only. +// +// - [ApplyPlanWithHooks] (v2, recommended) — caller-supplied per-action +// OnResourceApplied / OnResourceDeleted hooks fire at each successful +// cloud-mutation boundary. Required for #640's invariants. +// +// See docs/migrations/2026-05-16-v2-lifecycle-phase1-inventory.md and +// decisions/0040-v2-action-lifecycle-provider-compatibility.md for the +// migration contract; ApplyPlan will be removed in Phase 5. // // Lifecycle inside W-3a: // @@ -75,6 +91,15 @@ import ( // // T3.1 ships the dispatch skeleton; T3.1.5 added the postcondition above; // T3.2/T3.3/T3.4 fill the per-action sub-functions with their full bodies. +// +// Deprecated: use [ApplyPlanWithHooks] instead. ApplyPlan is the empty-hooks +// equivalent of ApplyPlanWithHooks and is preserved only for backwards +// compatibility during the workflow#640 v2 action lifecycle migration. New +// callers MUST use ApplyPlanWithHooks so per-action state-persistence hooks +// can fire at each cloud-mutation boundary. See +// docs/migrations/2026-05-16-v2-lifecycle-phase1-inventory.md and +// decisions/0040-v2-action-lifecycle-provider-compatibility.md for the +// migration contract; ApplyPlan will be removed in Phase 5. func ApplyPlan(ctx context.Context, p interfaces.IaCProvider, plan *interfaces.IaCPlan) (*interfaces.ApplyResult, error) { return applyPlanWithEnvProvider(ctx, p, plan, nil) }