Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions e2e/reliability_contract_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,15 @@ var auditConsumerSpec = map[string]auditConsumerExpectation{
// Billing — internal alerts, no customer email
"billing.charge_undeliverable": {IntentionallyNoConsumer: true},

// MR-P0-3 (BugBash 2026-05-20): fires from finalizeProvision when the
// backend provision RPC succeeded but a post-RPC persistence step failed.
// Internal operator-alert kind, mirroring billing.charge_undeliverable and
// propagation.dead_lettered — NOT wired into the customer-email forwarder
// because the appropriate response is human-eyes-on, not an automated
// template. The emit site (provision_helper.go) accompanies the row with
// an ERROR-level slog line so NR alerts can key on either.
"provision.persistence_failed": {IntentionallyNoConsumer: true},

// Promote workflow — admin actions, no customer email
"promote.approval_requested": {IntentionallyNoConsumer: true},
"promote.approved": {IntentionallyNoConsumer: true},
Expand Down
12 changes: 10 additions & 2 deletions internal/handlers/agent_action_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,20 @@ func TestRespondError_KnownCode_PopulatesAgentAction(t *testing.T) {
// request_id"). upgrade_url stays absent because the remedy is not an
// upgrade.
func TestRespondError_UnknownCode_5xx_FallsBackToContactSupport(t *testing.T) {
// Pick a 5xx code that is deliberately NOT in codeToAgentAction so the
// W7G fallback branch fires. Previously this test used `provision_failed`,
// but MR-P0-3 (BugBash 2026-05-20) added an explicit retry-with-backoff
// entry for that code — its 503 must instruct the agent to retry, not
// contact support, because the backend object was atomically rolled back.
// `db_error` is documented in helpers.go's curation principles as one of
// the "pure plumbing errors deliberately omitted" — exactly the shape this
// test is meant to exercise.
status, body := doErrorRequest(t, func(c *fiber.Ctx) error {
return respondError(c, fiber.StatusServiceUnavailable, "provision_failed", "transient failure")
return respondError(c, fiber.StatusServiceUnavailable, "db_error", "transient failure")
})
assert.Equal(t, fiber.StatusServiceUnavailable, status)
assert.Equal(t, false, body["ok"])
assert.Equal(t, "provision_failed", body["error"])
assert.Equal(t, "db_error", body["error"])
assert.Equal(t, "transient failure", body["message"])

action, _ := body["agent_action"].(string)
Expand Down
20 changes: 13 additions & 7 deletions internal/handlers/error_envelope_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,21 @@ func decodeEnvelope(t *testing.T, resp *http.Response) map[string]any {
}

// TestErrorEnvelope_503_AllFieldsAndHeader covers the canonical 503 case
// called out in the W7G brief: provisioner failure / transient infra.
// The envelope must carry request_id, retry_after_seconds=30, the
// AgentActionContactSupport fallback (because "provision_failed" isn't in
// codeToAgentAction), AND the matching Retry-After: 30 header.
// called out in the W7G brief: a transient-infra failure with NO registry
// entry. The envelope must carry request_id, retry_after_seconds=30, the
// AgentActionContactSupport fallback, AND the matching Retry-After: 30 header.
//
// Uses `db_error` as the fixture code: it's documented in helpers.go's
// curation principles as deliberately omitted from codeToAgentAction, so
// the W7G fallback branch fires deterministically. (Previously this test
// used `provision_failed`, but MR-P0-3 added an explicit retry-with-backoff
// entry for that code — its 503 must instruct the agent to retry, not
// contact support.)
func TestErrorEnvelope_503_AllFieldsAndHeader(t *testing.T) {
app := envelopeApp(t)
app.Get("/x", func(c *fiber.Ctx) error {
return respondError(c, fiber.StatusServiceUnavailable,
"provision_failed", "Failed to provision webhook resource")
"db_error", "Failed to query platform database")
})

req := httptest.NewRequest(http.MethodGet, "/x", nil)
Expand All @@ -106,8 +112,8 @@ func TestErrorEnvelope_503_AllFieldsAndHeader(t *testing.T) {

body := decodeEnvelope(t, resp)
assert.Equal(t, false, body["ok"])
assert.Equal(t, "provision_failed", body["error"])
assert.Equal(t, "Failed to provision webhook resource", body["message"])
assert.Equal(t, "db_error", body["error"])
assert.Equal(t, "Failed to query platform database", body["message"])
assert.Equal(t, "rid-fixed-123", body["request_id"],
"request_id must echo X-Request-ID so agents quoting it to support don't have to read headers")
assert.Equal(t, float64(30), body["retry_after_seconds"],
Expand Down
24 changes: 24 additions & 0 deletions internal/handlers/export_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,27 @@ func RunFinalizeProvisionForTest(
helper := provisionHelper{db: dbConn, cfg: cfg}
return helper.finalizeProvision(ctx, res, connectionURL, keyPrefix, providerResourceID, requestID, logPrefix, cleanup)
}

// CodeToAgentActionMetaForTest is a read-only mirror of the package's
// errorCodeMeta exposed for MR-P0-3 coverage tests. Mirrored as a separate
// type (not a type-alias) to keep the unexported errorCodeMeta out of the
// public surface — tests only need the two visible fields.
type CodeToAgentActionMetaForTest struct {
AgentAction string
UpgradeURL string
}

// LookupCodeToAgentActionForTest returns the registered agent_action metadata
// for `code`, or (zero, false) when the code has no entry. Mirrors the
// lookup respondError itself performs, so the test exercises exactly the
// same branch as the production envelope-emit path.
func LookupCodeToAgentActionForTest(code string) (CodeToAgentActionMetaForTest, bool) {
meta, ok := codeToAgentAction[code]
if !ok {
return CodeToAgentActionMetaForTest{}, false
}
return CodeToAgentActionMetaForTest{
AgentAction: meta.AgentAction,
UpgradeURL: meta.UpgradeURL,
}, true
}
14 changes: 14 additions & 0 deletions internal/handlers/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,20 @@ var codeToAgentAction = map[string]errorCodeMeta{
AgentAction: "Tell the user the provisioner is temporarily unavailable. Retry in 30 seconds — see live status at https://instanode.dev/status.",
UpgradeURL: "https://instanode.dev/status",
},

// MR-P0-3 (BugBash 2026-05-20): explicit agent_action for the catch-all
// `provision_failed` 503 — historically omitted here so the response fell
// back to AgentActionContactSupport ("email support"). For an atomic-
// persistence-failure landing this code, that fallback is wrong: the
// backend object was just torn down (best-effort) and the row soft-
// deleted, so the right action is "retry the provision with backoff,"
// NOT "email support." Sentence keeps the U3 contract (opens with
// "Tell the user", names the reason, names the action, full
// https://instanode.dev URL, < 280 chars). The retry_after_seconds
// header on a 503 also signals the backoff window.
"provision_failed": {
AgentAction: "Tell the user provisioning hit a transient platform-persistence error and no charge or resource was created. Retry the same request with exponential backoff (start at 5s, cap at 60s) — see https://instanode.dev/status if it persists.",
},
"billing_provider_unavailable": {
AgentAction: "Tell the user the billing provider is temporarily unavailable. Retry the upgrade in 60 seconds — see status at https://instanode.dev/status.",
UpgradeURL: "https://instanode.dev/status",
Expand Down
Loading
Loading