Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,33 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Added

- **`interfaces.DriftClass`** enum with constants `DriftClassUnknown` (zero value, omitempty-safe),
`DriftClassInSync`, `DriftClassGhost`, `DriftClassConfig`. Additive `Class DriftClass` field on
`DriftResult` with `json:"class,omitempty"` — backwards-compatible with all existing consumers.
- **`wfctl infra apply --refresh`** flag: detect drift first, prune ghost-in-state entries (cloud
returns 404 but state has the resource) before running the normal plan+apply phase.
Default is dry-run (prints "would prune" without mutating); pass `--auto-approve` to execute.
- **`wfctl infra apply --allow-protected-prune`** flag: required two-key for pruning resources with
`protected: true` in their state Outputs. Without it, protected ghosts cause an immediate error
with a clear message.
- **`wfctl infra drift`** CLI output now prints drift class column (GHOST / CONFIG / IN-SYNC)
for actionable operator feedback. Providers returning `DriftClassUnknown` fall through to the
legacy Drifted-bool behavior.
- **`docs/wfctl/drift-recovery.md`** operator procedure covering detect→dry-run→approve flow,
protected-resource handling, audit-log format, and CI integration examples.

### Changed

- `driftInfraModules` uses `DriftClass` constants for output classification; drift-found message
updated to suggest `wfctl infra apply --refresh`.
- `DriftResult.Expected`, `DriftResult.Actual`, and `DriftResult.Fields` now carry `omitempty`
tags (additive — previously these fields serialised as `null` / `[]` in JSON; they are now
omitted entirely when empty, which is what most consumers expect).

## [0.18.11.1] - 2026-04-25

### Fixed
Expand Down
44 changes: 44 additions & 0 deletions cmd/wfctl/infra.go
Original file line number Diff line number Diff line change
Expand Up @@ -944,13 +944,24 @@ func runInfraApply(args []string) error {
fs.StringVar(&envName, "env", "", "Environment name (resolves per-module environments: overrides)")
var planFile string
fs.StringVar(&planFile, "plan", "", "Apply from a pre-emitted plan.json (skips ComputePlan)")
var refreshFlag bool
fs.BoolVar(&refreshFlag, "refresh", false, "Detect drift and prune ghost-in-state entries before applying")
var allowProtectedPruneFlag bool
fs.BoolVar(&allowProtectedPruneFlag, "allow-protected-prune", false, "Allow pruning state entries for resources marked protected: true (requires --refresh)")
Comment on lines +947 to +950
autoApprove := &autoApproveVal
showSensitive := showSensitiveVal
if err := fs.Parse(args); err != nil {
return err
}
_ = showSensitive // used in apply progress output when provider integration is complete

// Pre-flight: --allow-protected-prune is only meaningful with --refresh.
// Without --refresh, the flag is silently ignored, which could mislead
// operators into believing they have authorized a dangerous prune operation.
if allowProtectedPruneFlag && !refreshFlag {
return fmt.Errorf("--allow-protected-prune requires --refresh")
}

cfgFile := configFlag
if cfgFile == "" {
var err error
Expand Down Expand Up @@ -1006,6 +1017,39 @@ func runInfraApply(args []string) error {
}
}

// --refresh: detect drift first and prune ghost-in-state entries (cloud 404s)
// before running the normal plan + apply. Only applicable for infra.* configs;
// silently skipped for legacy platform.* configs.
if refreshFlag && hasInfraModules(cfgFile) {
fmt.Println("Refreshing state (detecting drift)...")
store, storeErr := resolveStateStore(cfgFile, envName)
if storeErr != nil {
return fmt.Errorf("open state store for refresh: %w", storeErr)
}
states, statesErr := store.ListResources(ctx)
if statesErr != nil {
return fmt.Errorf("list state for refresh: %w", statesErr)
}
groups, groupOrder := groupStatesByProvider(states, cfgFile, envName)
for _, moduleRef := range groupOrder {
g := groups[moduleRef]
provider, closer, provErr := resolveIaCProvider(ctx, g.provType, g.provCfg)
if provErr != nil {
return fmt.Errorf("refresh: load provider %q: %w", moduleRef, provErr)
}
refreshErr := runInfraApplyRefreshPhase(ctx, provider, g.refs, store,
*autoApprove, allowProtectedPruneFlag, states, os.Stdout, os.Stderr)
if closer != nil {
if cerr := closer.Close(); cerr != nil {
fmt.Fprintf(os.Stderr, "warning: provider %q shutdown: %v\n", g.provType, cerr)
}
}
if refreshErr != nil {
return fmt.Errorf("refresh phase: %w", refreshErr)
}
}
}

fmt.Printf("Applying infrastructure from %s...\n", cfgFile)

// --plan: dispatch actions from a pre-emitted plan file, skipping ComputePlan.
Expand Down
118 changes: 118 additions & 0 deletions cmd/wfctl/infra_apply_refresh.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
package main

import (
"context"
"fmt"
"io"
"strings"
"time"

"github.com/GoCodeAlone/workflow/interfaces"
)

// runInfraApplyRefreshPhase detects drift against the given provider and prunes
// ghost-in-state entries (where cloud Read returned ErrResourceNotFound). It is
// called by runInfraApply when --refresh is set.
//
// Behavior:
// - For each DriftClassGhost result: if autoApprove is false, prints a dry-run
// "would prune" line. If autoApprove is true, calls store.DeleteResource and
// emits an audit log line to stderr.
// - Protected resources (protected: true in state Outputs) are blocked unless
// allowProtectedPrune is also set. Without that flag, an error is returned and
// no prunes happen.
// - DriftClassConfig and DriftClassInSync results are left for the regular plan
// - apply phase; this function does not touch them.
// - If provider.DetectDrift returns a non-nil error, the error is propagated
// immediately and no pruning happens (transient API errors must NOT cause
// state loss).
//
// All parameters must be non-nil except states (nil is valid = no state for
// protected-resource lookup). stdout receives human-readable progress; stderr
// receives audit log lines.
func runInfraApplyRefreshPhase(
ctx context.Context,
provider interfaces.IaCProvider,
refs []interfaces.ResourceRef,
store infraStateStore,
autoApprove bool,
allowProtectedPrune bool,
states []interfaces.ResourceState,
stdout io.Writer,
stderr io.Writer,
) error {
if len(refs) == 0 {
fmt.Fprintln(stdout, "Refresh: no state to check.")
return nil
}

results, err := provider.DetectDrift(ctx, refs)
if err != nil {
// Transient or auth error — propagate; do NOT prune anything.
return fmt.Errorf("detect drift: %w", err)
}

// First pass: pre-scan ALL ghost results for protected resources without the
// override flag. Collecting all blocked names before any mutation ensures the
// operator sees the complete list and that no partial state mutation occurs.
var blocked []string
for _, r := range results {
if r.Class != interfaces.DriftClassGhost {
continue
}
if isRefProtected(states, r.Name) && !allowProtectedPrune {
blocked = append(blocked, r.Name)
}
}
if len(blocked) > 0 {
for _, name := range blocked {
fmt.Fprintf(stderr, "wfctl: BLOCKED: %s is protected; cannot prune without --allow-protected-prune\n", name)
}
return fmt.Errorf("refresh blocked: %d protected resource(s) require --allow-protected-prune: %s",
len(blocked), strings.Join(blocked, ", "))
}

// Second pass: all pre-validation passed — execute mutations.
for _, r := range results {
if r.Class != interfaces.DriftClassGhost {
// In-sync or config-drift: leave for regular plan/apply phase.
continue
}

isProtected := isRefProtected(states, r.Name)

Comment on lines +82 to +83
if !autoApprove {
// Dry-run: report what would happen without mutating.
fmt.Fprintf(stdout, "Refresh: would prune ghost %s (%s) — cloud reports not found.\n", r.Name, r.Type)
continue
}

// Emit audit log before mutation so the log entry is always present,
// even if DeleteResource fails.
fmt.Fprintf(stderr, "wfctl: state mutation prune %s (type=%s protected=%v) reason=ghost-in-state at %s\n",
r.Name, r.Type, isProtected, time.Now().UTC().Format(time.RFC3339))

if err := store.DeleteResource(ctx, r.Name); err != nil {
return fmt.Errorf("refresh: prune %s: %w", r.Name, err)
}
fmt.Fprintf(stdout, "Refresh: pruned ghost %s (%s)\n", r.Name, r.Type)
}
return nil
}

// isRefProtected returns true if the named resource has protected: true in its
// state Outputs map. The type assertion is intentionally strict: if
// Outputs["protected"] is a non-bool (e.g. the string "true"), the assertion
// fails and the function returns false. YAML unmarshals bare `true` as bool,
// so this should not occur in practice, but callers should be aware of the
// silent false-return for unexpected types.
func isRefProtected(states []interfaces.ResourceState, name string) bool {
for i := range states {
if states[i].Name == name {
if p, ok := states[i].Outputs["protected"].(bool); ok && p {
return true
}
}
}
return false
}
Loading
Loading