From e835aa1bdd5b3456410affbc10efcfb6b79e8b46 Mon Sep 17 00:00:00 2001 From: Manas Srivastava Date: Sat, 6 Jun 2026 00:03:10 +0530 Subject: [PATCH 1/2] feat(plans): flag-gated per-service resource-count caps (Task #55) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the strict-≥80%-margin hole where only queue_count was capped: a tenant could create MANY postgres/vector/redis/mongodb/storage resources each at the per-resource size cap and blow the saturated-COGS bound (Redis binding at $6.50/GB). Adds a per-tier active-resource COUNT cap per service, enforced like the existing queue_count A6 block — but FLAG-GATED, default OFF. Flag: RESOURCE_COUNT_CAPS_ENABLED (config.go, default false). When off, the new enforceResourceCountCap helper returns immediately and runs NO count query — zero behavior change, proven inert by TestResourceCountCap_FlagOffIsInert + TestEnforceResourceCountCap_FlagOffInert_Whitebox. Operator enables after a usage audit so no current tenant is retroactively over a cap. Enforcement sites (mirror queue.go's A6 block; 402 + agent_action + metric): db.go (postgres), vector.go, cache.go (redis), nosql.go (mongodb), storage.go. Shared helper: internal/handlers/resource_count_cap.go (one call site per handler, not a copy-pasted block). Count cap fails CLOSED on a count-query error when enabled (a cheap indexed COUNT; must not silently bypass a cost cap). Per-tier numbers (api/plans.yaml; mirrors common defaultYAML, depends on common#47 which is merged): anon/free=1 each; hobby=2; hobby_plus=3; pro pg/vec/mongo/storage=5 redis=3; growth=6 redis=3; team pg=5 vec=8 redis=4 mongo=6 storage=6. redis_count is the most conservative line everywhere (binding COGS). Derived so count×size×unit-COGS ≤ tier 20%-of-price budget per service. Surfaces (rule 22): /api/v1/capabilities resource_count_limit map; /api/v1/billing/usage count+count_limit on storage services; openapi.go schemas; content/llms.txt + instanode-web public/llms.txt (separate PRs). Metric (rule 25): instant_resource_count_limit_blocked_total{service,team_tier} (metrics.go). Alert + Prom rule + dashboard tile + catalog row in infra PR. Tests: registry-iterating flag-on guard (rule 18, TestResourceCountCap_FlagOnAtLimitRejects) so service N+1 can't ship uncapped; under-limit pass; whitebox edge branches (unlimited, count-error, nil cfg) → enforceResourceCountCap 100%; capabilities surface guard; config flag test; strict_margin guard extended to the new *_count fields. make gate green (the one unrelated pre-existing failure, models.TestLinkGitHubID, reproduces with this change stashed and touches no files in this diff). Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/config/config.go | 22 ++ internal/config/config_test.go | 16 ++ internal/handlers/billing_usage.go | 13 + internal/handlers/cache.go | 6 + internal/handlers/capabilities.go | 52 ++-- internal/handlers/capabilities_test.go | 39 +++ internal/handlers/db.go | 6 + internal/handlers/nosql.go | 5 + internal/handlers/openapi.go | 6 +- internal/handlers/resource_count_cap.go | 84 +++++++ internal/handlers/resource_count_cap_test.go | 225 ++++++++++++++++++ .../resource_count_cap_whitebox_test.go | 123 ++++++++++ internal/handlers/storage.go | 8 + internal/handlers/vector.go | 5 + internal/metrics/metrics.go | 12 + .../plans/strict_margin_finite_limits_test.go | 8 + plans.yaml | 87 +++++++ 17 files changed, 700 insertions(+), 17 deletions(-) create mode 100644 internal/handlers/resource_count_cap.go create mode 100644 internal/handlers/resource_count_cap_test.go create mode 100644 internal/handlers/resource_count_cap_whitebox_test.go diff --git a/internal/config/config.go b/internal/config/config.go index 901f6b0f..13fa3e96 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -197,6 +197,17 @@ type Config struct { // Off → /deploy/new rejects source=git with 501; tarball/image unaffected. DeploySourceGitEnabled bool + // ResourceCountCapsEnabled gates per-service resource-count enforcement + // (Task #55). Default FALSE: when off, the count-check block in every + // provision handler (db/vector/cache/nosql/storage) is skipped entirely — + // zero behavior change, so shipping the caps cannot surprise-break an + // existing heavy tenant with a 402. When on, each handler counts the team's + // active resources of that type and rejects over-cap provisions with 402 + + // agent_action, mirroring the always-on queue_count cap. Enabling it is an + // operator action (kubectl set env RESOURCE_COUNT_CAPS_ENABLED=true) after a + // usage audit so no current tenant is over the new per-tier caps. + ResourceCountCapsEnabled bool + // GitHub App (P4) — install-once push-to-deploy + short-lived installation // tokens for private-repo clones. Distinct from the GitHub OAuth *login* app // above (GitHubClientID/Secret). GitHubAppEnabled gates the whole feature: @@ -501,6 +512,17 @@ func Load() *Config { cfg.DeploySourceGitEnabled = false } + // RESOURCE_COUNT_CAPS_ENABLED: default FALSE (Task #55). Off → the per-service + // count-check block in every provision handler is skipped (zero behavior + // change). On → over-cap provisions get 402. Operator action after a usage + // audit so no current tenant is retroactively over a new per-tier cap. + switch strings.ToLower(strings.TrimSpace(os.Getenv("RESOURCE_COUNT_CAPS_ENABLED"))) { + case "true", "1", "yes": + cfg.ResourceCountCapsEnabled = true + default: + cfg.ResourceCountCapsEnabled = false + } + // GITHUB_APP_ENABLED: default FALSE (off until the operator registers the // App and provisions GITHUB_APP_* secrets — see infra/GITHUB-APP-RUNBOOK.md). switch strings.ToLower(strings.TrimSpace(os.Getenv("GITHUB_APP_ENABLED"))) { diff --git a/internal/config/config_test.go b/internal/config/config_test.go index fe4483e8..d8554abb 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -63,6 +63,7 @@ func allKeys() []string { "METRICS_TOKEN", "DASHBOARD_BASE_URL", "API_PUBLIC_URL", "DELETION_CONFIRMATION_TTL_MINUTES", "FAMILY_BINDINGS_ENABLED", "DEPLOY_SOURCE_IMAGE_ENABLED", "DEPLOY_SOURCE_GIT_ENABLED", + "RESOURCE_COUNT_CAPS_ENABLED", "GITHUB_APP_ENABLED", "GITHUB_APP_ID", "GITHUB_APP_SLUG", "GITHUB_APP_PRIVATE_KEY", "GITHUB_APP_WEBHOOK_SECRET", "GITHUB_APP_CLIENT_ID", "GITHUB_APP_CLIENT_SECRET", "BREVO_WEBHOOK_SECRET", "SES_SNS_SUBSCRIPTION_ARN", @@ -387,6 +388,21 @@ func TestLoad_DeploySourceGitEnabled(t *testing.T) { } } +func TestLoad_ResourceCountCapsEnabled(t *testing.T) { + for _, val := range []string{"true", "1", "yes", "TRUE", " Yes "} { + applyBaselineEnv(t, map[string]string{"RESOURCE_COUNT_CAPS_ENABLED": val}) + if !Load().ResourceCountCapsEnabled { + t.Errorf("RESOURCE_COUNT_CAPS_ENABLED=%q should enable", val) + } + } + for _, val := range []string{"false", "0", "no", "maybe", ""} { + applyBaselineEnv(t, map[string]string{"RESOURCE_COUNT_CAPS_ENABLED": val}) + if Load().ResourceCountCapsEnabled { + t.Errorf("RESOURCE_COUNT_CAPS_ENABLED=%q should stay disabled (default OFF)", val) + } + } +} + func TestLoad_GitHubAppEnabled(t *testing.T) { // When enabling the App, Load() fails closed unless the webhook secret + // private key + app id are present (review HIGH-1), so set them here. diff --git a/internal/handlers/billing_usage.go b/internal/handlers/billing_usage.go index 0bd84e3a..cb8f8667 100644 --- a/internal/handlers/billing_usage.go +++ b/internal/handlers/billing_usage.go @@ -68,6 +68,13 @@ type usageMetric struct { LimitBytes int64 `json:"limit_bytes,omitempty"` Count int `json:"count,omitempty"` Limit int `json:"limit,omitempty"` + // CountLimit is the per-tier resource-COUNT cap (Task #55) for the + // byte-metered storage services (postgres/redis/mongodb), where `Limit` + // already carries no value (those use LimitBytes). It lets the dashboard + // show "3 / 5 databases" alongside "120 MB / 1024 MB". For the + // count-metered services (deployments/webhooks/vault/members) the existing + // Count/Limit pair is unchanged. -1 means unlimited. + CountLimit int `json:"count_limit,omitempty"` } // GetUsage handles GET /api/v1/billing/usage. @@ -152,9 +159,15 @@ func (h *BillingUsageHandler) computeUsage(ctx context.Context, teamID uuid.UUID return usageSummary{}, sumErr } limitMB := h.plans.StorageLimitMB(tier, svc) + // Task #55: also surface the active-resource COUNT + per-tier count cap + // so the dashboard can render "3 / 5 databases" next to the byte gauge. + // Best-effort: a count error must not fail the byte rows. + count, _ := models.CountActiveResourcesByTeamAndType(ctx, h.db, teamID, svc) usage[svc] = usageMetric{ Bytes: bytes, LimitBytes: mbToBytes(limitMB), + Count: count, + CountLimit: h.plans.ResourceCountLimit(tier, svc), } } diff --git a/internal/handlers/cache.go b/internal/handlers/cache.go index 84c14948..1248d184 100644 --- a/internal/handlers/cache.go +++ b/internal/handlers/cache.go @@ -333,6 +333,12 @@ func (h *CacheHandler) newCacheAuthenticated( tier = "growth" } + // Task #55: per-tier redis count cap (flag-gated, default OFF). Redis is the + // binding COGS constraint ($6.50/GB) so this is the most-conservative cap. + if handled, capErr := h.enforceResourceCountCap(c, teamUUID, team.PlanTier, models.ResourceTypeRedis, requestID); handled { + return capErr + } + parentRootID, perr := resolveFamilyParent(c, h.db, parentResourceID, teamUUID, models.ResourceTypeRedis, env) if perr != nil { return perr diff --git a/internal/handlers/capabilities.go b/internal/handlers/capabilities.go index 8bfdec72..df1666da 100644 --- a/internal/handlers/capabilities.go +++ b/internal/handlers/capabilities.go @@ -32,23 +32,30 @@ func NewCapabilitiesHandler(p *plans.Registry) *CapabilitiesHandler { } type tierCapabilities struct { - Tier string `json:"tier"` - DisplayName string `json:"display_name"` - PriceUSDMonthly int `json:"price_usd_monthly"` - PaidFromDayOne bool `json:"paid_from_day_one"` - StorageLimitMB map[string]int `json:"storage_limit_mb"` - ConnectionsLimit map[string]int `json:"connections_limit"` - Deployments int `json:"deployments_apps"` - BackupRetentionDays int `json:"backup_retention_days"` - BackupRestoreEnabled bool `json:"backup_restore_enabled"` - ManualBackupsPerDay int `json:"manual_backups_per_day"` + Tier string `json:"tier"` + DisplayName string `json:"display_name"` + PriceUSDMonthly int `json:"price_usd_monthly"` + PaidFromDayOne bool `json:"paid_from_day_one"` + StorageLimitMB map[string]int `json:"storage_limit_mb"` + ConnectionsLimit map[string]int `json:"connections_limit"` + // ResourceCountLimit is the per-service max number of active resources a + // team may hold (Task #55). Keyed by the same service strings as + // StorageLimitMB. -1 means unlimited; a positive value is the hard cap. + // Enforcement is flag-gated (RESOURCE_COUNT_CAPS_ENABLED) — this surface + // always advertises the cap so an agent can plan around it even while the + // operator hasn't yet flipped enforcement on. + ResourceCountLimit map[string]int `json:"resource_count_limit"` + Deployments int `json:"deployments_apps"` + BackupRetentionDays int `json:"backup_retention_days"` + BackupRestoreEnabled bool `json:"backup_restore_enabled"` + ManualBackupsPerDay int `json:"manual_backups_per_day"` // RPOMinutes / RTOMinutes — FIX-H #Q50 (B36). 0 means // "not promised" (no scheduled backups / no self-serve restore on // the tier). Lets an agent reason about durability requirements // per-tier without a second round-trip. - RPOMinutes int `json:"rpo_minutes"` - RTOMinutes int `json:"rto_minutes"` - AnnualDiscountPercent int `json:"annual_discount_percent"` + RPOMinutes int `json:"rpo_minutes"` + RTOMinutes int `json:"rto_minutes"` + AnnualDiscountPercent int `json:"annual_discount_percent"` // UpgradeURL — pointer so the terminal tier (Team — there is nothing // to upgrade to) emits an explicit JSON `null` instead of the pricing // URL. DOG-26 (QA 2026-05-29): every tier including Team used to @@ -56,12 +63,12 @@ type tierCapabilities struct { // rendering an "Upgrade" CTA on the Team plan with no destination. // `null` is the contract-stable terminal-tier marker; a non-null // string is the "click here to upgrade" signal. - UpgradeURL *string `json:"upgrade_url"` + UpgradeURL *string `json:"upgrade_url"` // IsTerminalTier — explicit boolean so clients don't have to encode // the "is upgrade_url null" check at every render site. True for the // top tier (Team today), false for everything below. Pairs with // UpgradeURL — when IsTerminalTier=true, UpgradeURL is null. - IsTerminalTier bool `json:"is_terminal_tier"` + IsTerminalTier bool `json:"is_terminal_tier"` } // capabilityResourceTypes is the list of service types the /capabilities @@ -71,6 +78,13 @@ var capabilityResourceTypes = []string{ "postgres", "redis", "mongodb", "queue", "storage", "webhook", "vector", } +// countCapResourceTypes is the set of services that carry a per-tier +// resource-COUNT cap (Task #55). Webhook is omitted — it is byte/request-capped +// via webhook_requests_stored, not count-capped. Order is contract-stable. +var countCapResourceTypes = []string{ + "postgres", "vector", "redis", "mongodb", "storage", "queue", +} + // upgradeURL is the marketing pricing page that every tier row in the // /capabilities response points back to. Hoisted to a package const so // the URL fragment isn't scattered as a string literal across the handler. @@ -158,10 +172,17 @@ func (h *CapabilitiesHandler) Get(c *fiber.Ctx) error { for _, e := range entries { storage := map[string]int{} conns := map[string]int{} + counts := map[string]int{} for _, rt := range capabilityResourceTypes { storage[rt] = h.plans.StorageLimitMB(e.name, rt) conns[rt] = h.plans.ConnectionsLimit(e.name, rt) } + // Task #55: per-service resource-count caps. Only the count-capped + // services appear (webhook is byte-capped via webhook_requests_stored, + // not count-capped). ResourceCountLimit returns -1 for unlimited. + for _, rt := range countCapResourceTypes { + counts[rt] = h.plans.ResourceCountLimit(e.name, rt) + } priceUSD := e.plan.PriceMonthly / 100 // cents → dollars // DOG-26: terminal tier marker — top of the rank ladder has // nothing to upgrade to. upgrade_url is null + is_terminal_tier @@ -179,6 +200,7 @@ func (h *CapabilitiesHandler) Get(c *fiber.Ctx) error { PaidFromDayOne: priceUSD > 0, StorageLimitMB: storage, ConnectionsLimit: conns, + ResourceCountLimit: counts, Deployments: h.plans.DeploymentsAppsLimit(e.name), BackupRetentionDays: h.plans.BackupRetentionDays(e.name), BackupRestoreEnabled: h.plans.BackupRestoreEnabled(e.name), diff --git a/internal/handlers/capabilities_test.go b/internal/handlers/capabilities_test.go index 3efc62aa..5079e26a 100644 --- a/internal/handlers/capabilities_test.go +++ b/internal/handlers/capabilities_test.go @@ -34,6 +34,7 @@ type capabilityTier struct { PaidFromDayOne bool `json:"paid_from_day_one"` StorageLimitMB map[string]int `json:"storage_limit_mb"` ConnectionsLimit map[string]int `json:"connections_limit"` + ResourceCountLimit map[string]int `json:"resource_count_limit"` Deployments int `json:"deployments_apps"` BackupRetentionDays int `json:"backup_retention_days"` BackupRestoreEnabled bool `json:"backup_restore_enabled"` @@ -214,6 +215,44 @@ func TestCapabilities_LimitsResolveFromRegistry(t *testing.T) { assert.Equal(t, 5, hp.ManualBackupsPerDay, "hobby_plus manual backups/day") } +// TestCapabilities_SurfacesResourceCountLimit is the Task #55 rule-18 surface +// guard: GET /api/v1/capabilities must expose resource_count_limit for EVERY +// count-capped service on every paid tier, with the value matching the live +// registry. Iterates the registry rather than hand-typing tiers so a new tier or +// service can't silently ship without the cap appearing on the public matrix. +func TestCapabilities_SurfacesResourceCountLimit(t *testing.T) { + reg := plans.Default() + app := newCapabilitiesApp(t, reg) + _, body := callCapabilities(t, app) + require.NotEmpty(t, body.Tiers) + + countServices := []string{"postgres", "vector", "redis", "mongodb", "storage", "queue"} + for _, tier := range body.Tiers { + require.NotNil(t, tier.ResourceCountLimit, + "tier %q must carry resource_count_limit", tier.Tier) + for _, svc := range countServices { + got, ok := tier.ResourceCountLimit[svc] + require.True(t, ok, "tier %q resource_count_limit must include %q", tier.Tier, svc) + assert.Equal(t, reg.ResourceCountLimit(tier.Tier, svc), got, + "tier %q %s count limit must match the registry", tier.Tier, svc) + } + // Webhook is request-capped, not count-capped — must NOT appear. + _, hasWebhook := tier.ResourceCountLimit["webhook"] + assert.False(t, hasWebhook, "webhook must not appear in resource_count_limit (it is request-capped)") + } + + // Spot-pin a couple of binding values so a loosened cap is a visible diff. + for _, tier := range body.Tiers { + switch tier.Tier { + case "pro": + assert.Equal(t, 3, tier.ResourceCountLimit["redis"], "pro redis_count") + assert.Equal(t, 5, tier.ResourceCountLimit["postgres"], "pro postgres_count") + case "team": + assert.Equal(t, 4, tier.ResourceCountLimit["redis"], "team redis_count") + } + } +} + // TestCapabilities_PlansUnavailable — when the registry pointer is nil // (boot-time failure in dev with no fallback), the handler must return // 503 instead of panicking. Lifted contract from the original handler. diff --git a/internal/handlers/db.go b/internal/handlers/db.go index 6f8f0fb5..583ef4cf 100644 --- a/internal/handlers/db.go +++ b/internal/handlers/db.go @@ -381,6 +381,12 @@ func (h *DBHandler) newDBAuthenticated( tier = "growth" } + // Task #55: per-tier postgres count cap (flag-gated, default OFF — inert + // unless RESOURCE_COUNT_CAPS_ENABLED). Mirrors queue.go's A6 block. + if handled, capErr := h.enforceResourceCountCap(c, teamUUID, team.PlanTier, models.ResourceTypePostgres, requestID); handled { + return capErr + } + // Family-link validation runs BEFORE provisioning so a cross-team / // cross-type / duplicate-twin parent_resource_id never causes us to // create-then-fail (which would leak a database we can't link). diff --git a/internal/handlers/nosql.go b/internal/handlers/nosql.go index fe47409e..fb4bccd6 100644 --- a/internal/handlers/nosql.go +++ b/internal/handlers/nosql.go @@ -326,6 +326,11 @@ func (h *NoSQLHandler) newNoSQLAuthenticated( tier = "growth" } + // Task #55: per-tier mongodb count cap (flag-gated, default OFF). Mirrors queue.go. + if handled, capErr := h.enforceResourceCountCap(c, teamUUID, team.PlanTier, models.ResourceTypeMongoDB, requestID); handled { + return capErr + } + parentRootID, perr := resolveFamilyParent(c, h.db, parentResourceID, teamUUID, models.ResourceTypeMongoDB, env) if perr != nil { return perr diff --git a/internal/handlers/openapi.go b/internal/handlers/openapi.go index b2619521..e6b31c12 100644 --- a/internal/handlers/openapi.go +++ b/internal/handlers/openapi.go @@ -3277,8 +3277,9 @@ const openAPISpec = `{ "properties": { "bytes": { "type": "integer", "format": "int64", "description": "Current storage usage in bytes. Present on postgres/redis/mongodb." }, "limit_bytes": { "type": "integer", "format": "int64", "description": "Storage cap in bytes (plans.yaml storage_mb × 1024 × 1024). -1 = unlimited." }, - "count": { "type": "integer", "description": "Current count. Present on deployments/webhooks/vault/members." }, - "limit": { "type": "integer", "description": "Count cap from plans.yaml. -1 = unlimited." } + "count": { "type": "integer", "description": "Current count. Present on deployments/webhooks/vault/members, and (Task #55) on postgres/redis/mongodb as the active-resource count alongside bytes." }, + "limit": { "type": "integer", "description": "Count cap from plans.yaml. -1 = unlimited." }, + "count_limit": { "type": "integer", "description": "Task #55: per-tier resource-COUNT cap for the byte-metered storage services (postgres/redis/mongodb), where the limit field is unused. -1 = unlimited. Enforcement is flag-gated (RESOURCE_COUNT_CAPS_ENABLED) but the cap is always advertised." } } }, "TeamSummaryResponse": { @@ -3368,6 +3369,7 @@ const openAPISpec = `{ "paid_from_day_one": { "type": "boolean", "description": "True iff price_usd_monthly > 0. Mirrors project policy: no trial — paid tiers are paid from signup." }, "storage_limit_mb": { "type": "object", "additionalProperties": { "type": "integer" }, "description": "Per-service storage cap in MB. Keys: postgres, redis, mongodb, queue, storage, webhook, vector. -1 sentinel means 'unlimited'." }, "connections_limit": { "type": "object", "additionalProperties": { "type": "integer" }, "description": "Per-service concurrent-connection cap. Keys mirror storage_limit_mb. -1 = unlimited." }, + "resource_count_limit": { "type": "object", "additionalProperties": { "type": "integer" }, "description": "Task #55: per-service max number of active resources a team may hold. Keys: postgres, vector, redis, mongodb, storage, queue (webhook is request-capped, not count-capped). -1 = unlimited. Enforcement is flag-gated (RESOURCE_COUNT_CAPS_ENABLED) but the cap is always advertised so an agent can plan around it." }, "deployments_apps": { "type": "integer", "description": "Max number of /deploy/new apps allowed. -1 = unlimited." }, "backup_retention_days": { "type": "integer" }, "backup_restore_enabled": { "type": "boolean" }, diff --git a/internal/handlers/resource_count_cap.go b/internal/handlers/resource_count_cap.go new file mode 100644 index 00000000..dbba28dd --- /dev/null +++ b/internal/handlers/resource_count_cap.go @@ -0,0 +1,84 @@ +package handlers + +// resource_count_cap.go — shared per-service resource-count cap enforcement +// (Task #55). Closes the strict-≥80%-margin hole where only queue_count was +// capped: a tenant could create MANY postgres/redis/mongo/etc. resources each +// at the per-resource size cap and blow the saturated-COGS bound. +// +// The check mirrors the always-on queue_count A6 block in queue.go exactly +// (count active resources of the type for the team, compare to the per-tier cap +// from plans.yaml, return 402 + agent_action + increment a *LimitBlocked +// metric) — but is FLAG-GATED behind cfg.ResourceCountCapsEnabled +// (RESOURCE_COUNT_CAPS_ENABLED, default OFF). When the flag is off the function +// returns (nil) immediately and NO count query runs, so enforcement is fully +// inert and shipping it cannot surprise-break an existing heavy tenant. +// +// queue.go keeps its own inline block (always-on, predates this flag) — this +// helper covers the five newly-capped services (postgres/vector/redis/mongodb/ +// storage) so adding a service is one call site, not a copy-pasted block. + +import ( + "fmt" + "log/slog" + + "github.com/gofiber/fiber/v2" + "github.com/google/uuid" + "instant.dev/internal/metrics" + "instant.dev/internal/models" +) + +// enforceResourceCountCap rejects a provision when the team is already at or +// above its per-tier active-resource count for the given service. +// +// Behaviour: +// - flag OFF (default) → returns (false, nil): caller proceeds, NO query runs. +// - limit < 0 (unlimited / fail-open) → returns (false, nil). +// - count query error → returns (true, 503): fail CLOSED on the count is +// intentional here (unlike Redis rate-limit fail-open) because the count is +// a cheap indexed COUNT on platform_db; a DB outage already fails the +// provision downstream, and we must not let a count-query blip silently +// bypass a cost cap when the operator has explicitly enabled enforcement. +// - existing >= limit → returns (true, 402 + agent_action) and increments the +// metric. +// - under cap → returns (false, nil): caller proceeds. +// +// The bool is "handled": when true the caller MUST return the returned error +// (which is the fiber response) and not continue provisioning. +// +// service is the resources.resource_type value AND the plans.ResourceCountLimit +// key (postgres/vector/redis/mongodb/storage) — they are the same string set. +func (h *provisionHelper) enforceResourceCountCap( + c *fiber.Ctx, teamID uuid.UUID, planTier, service, requestID string, +) (handled bool, err error) { + // Flag OFF (or a misconfigured handler with no cfg/registry) → fully inert. + // No query, no behavior change. This is the proven-inert path + // (TestResourceCountCap_FlagOffIsInert). + if h.cfg == nil || !h.cfg.ResourceCountCapsEnabled || h.plans == nil { + return false, nil + } + + limit := h.plans.ResourceCountLimit(planTier, service) + if limit < 0 { + // Unlimited (or fail-open zero-fallback / unknown tier) — no cap. + return false, nil + } + + ctx := c.UserContext() + existing, countErr := models.CountActiveResourcesByTeamAndType(ctx, h.db, teamID, service) + if countErr != nil { + slog.Error("provision.count_cap.count_failed", + "error", countErr, "service", service, "team_id", teamID.String(), "request_id", requestID) + return true, respondError(c, fiber.StatusServiceUnavailable, "quota_check_failed", + fmt.Sprintf("Failed to check %s quota", service)) + } + if existing >= limit { + metrics.ResourceCountLimitBlocked.WithLabelValues(service, planTier).Inc() + return true, respondErrorWithAgentAction(c, fiber.StatusPaymentRequired, + service+"_limit_reached", + fmt.Sprintf("Your %s plan allows %d %s resource(s). Upgrade at %s", planTier, limit, service, DefaultPricingURL), + fmt.Sprintf("Tell the user they've hit the %s tier %s cap (%d). Upgrade at %s for a higher limit.", planTier, service, limit, DefaultPricingURL), + DefaultPricingURL, + ) + } + return false, nil +} diff --git a/internal/handlers/resource_count_cap_test.go b/internal/handlers/resource_count_cap_test.go new file mode 100644 index 00000000..d2b5873e --- /dev/null +++ b/internal/handlers/resource_count_cap_test.go @@ -0,0 +1,225 @@ +package handlers_test + +// resource_count_cap_test.go — Task #55 per-service resource-count cap. +// +// These exercise the FLAG-GATED enforcement added to db/vector/cache/nosql/ +// storage handlers. The load-bearing tests: +// - flag OFF (default) → a team AT its cap is NOT rejected (proves inert), +// - flag ON → a team AT its cap gets 402 _limit_reached, +// - a registry-iterating guard (rule 18) asserting every count-capped service +// enforces when the flag is on (so service N+1 can't ship uncapped). +// +// Run: +// TEST_DATABASE_URL=postgres://instant:instant@localhost:5432/instant_platform?sslmode=disable \ +// go test ./internal/handlers/... -run 'ResourceCountCap' -v -count=1 + +import ( + "context" + "database/sql" + "errors" + "fmt" + "net/http" + "testing" + + "github.com/gofiber/fiber/v2" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "instant.dev/internal/config" + "instant.dev/internal/handlers" + "instant.dev/internal/middleware" + "instant.dev/internal/plans" + "instant.dev/internal/testhelpers" +) + +// sqlExec is the subset of *sql.DB the seed helper needs. +type sqlExec interface { + ExecContext(context.Context, string, ...any) (sql.Result, error) +} + +// countCappedServices is the service set with a per-tier resource-count cap. +// Mirrors handlers.countCapResourceTypes minus queue (queue has its own always- +// on A6 block + its own route name). These are the five flag-gated services. +var countCappedServices = []struct { + service string // resources.resource_type AND plans key + route string // POST route + errCode string // expected 402 error code +}{ + {"postgres", "/db/new", "postgres_limit_reached"}, + {"vector", "/vector/new", "vector_limit_reached"}, + {"redis", "/cache/new", "redis_limit_reached"}, + {"mongodb", "/nosql/new", "mongodb_limit_reached"}, + {"storage", "/storage/new", "storage_limit_reached"}, +} + +// newCountCapApp builds a Fiber app exercising the given count-capped service's +// POST route with the count-caps flag set to `flagOn`. +// +// Most services route through the shared NewTestAppWithServices test app. Storage +// is special-cased: its handler 503s at the top when the storage provider is nil +// (the shared app passes nil), so the count cap — which fires BEFORE any backend +// call — would never be reached. We give storage an OFFLINE do-spaces provider +// (newDOSpacesProvider) purely to get past the nil-provider guard; the cap still +// returns 402 before the provider is ever touched. +func newCountCapApp(t *testing.T, db *sql.DB, service string, flagOn bool) (*fiber.App, func()) { + t.Helper() + mut := func(c *config.Config) { c.ResourceCountCapsEnabled = flagOn } + if service != "storage" { + return testhelpers.NewTestAppWithServices(t, db, nil, + "postgres,vector,redis,mongodb,storage,queue", mut) + } + + // Storage: real handler + offline provider so newStorageAuthenticated runs. + rdb, rcleanup := testhelpers.SetupTestRedis(t) + cfg := testStorageCapConfig(flagOn) + storageH := handlers.NewStorageHandler(db, rdb, cfg, newDOSpacesProvider(t), plans.Default()) + app := fiber.New(fiber.Config{ + ProxyHeader: "X-Forwarded-For", + ErrorHandler: func(c *fiber.Ctx, err error) error { + if errors.Is(err, handlers.ErrResponseWritten) { + return nil + } + return c.SendStatus(fiber.StatusInternalServerError) + }, + }) + app.Use(middleware.RequestID()) + app.Use(middleware.Fingerprint()) + app.Post("/storage/new", middleware.OptionalAuth(cfg), storageH.NewStorage) + return app, func() { rcleanup() } +} + +// testStorageCapConfig builds a config for the storage cap app with the flag set. +func testStorageCapConfig(flagOn bool) *config.Config { + cfg := storageProvConfig(false) // reuse the offline-storage config from storage_provarms_test.go + cfg.EnabledServices = "storage" + cfg.ResourceCountCapsEnabled = flagOn + return cfg +} + +// seedN inserts n active resources of resourceType for the team via raw SQL. +func seedN(t *testing.T, db sqlExec, teamID, resourceType, tier string, n int) { + t.Helper() + for i := 0; i < n; i++ { + _, err := db.ExecContext(context.Background(), + `INSERT INTO resources (team_id, resource_type, name, tier, status) + VALUES ($1, $2, $3, $4, 'active')`, + teamID, resourceType, fmt.Sprintf("seed-%s-%d", resourceType, i), tier) + require.NoError(t, err, "seed %s #%d", resourceType, i) + } +} + +// TestResourceCountCap_FlagOffIsInert is THE inert-path proof: with the flag +// unset (default), a hobby team already AT its per-service cap is NOT rejected +// with a *_limit_reached 402. Zero behavior change when off. +func TestResourceCountCap_FlagOffIsInert(t *testing.T) { + requireTestDB(t) + + planReg := plans.Default() + for _, svc := range countCappedServices { + t.Run(svc.service, func(t *testing.T) { + db, cleanDB := testhelpers.SetupTestDB(t) + defer cleanDB() + ensureStackTables(t, db) + + limit := planReg.ResourceCountLimit("hobby", svc.service) + require.Greater(t, limit, 0, "hobby %s_count must be positive", svc.service) + + // Flag OFF (default). + app, cleanup := newCountCapApp(t, db, svc.service, false) + defer cleanup() + + teamID := testhelpers.MustCreateTeamDB(t, db, "hobby") + jwt := testhelpers.MustSignSessionJWT(t, "u-off-"+svc.service, teamID, "off-"+svc.service+"@example.com") + + // Seed the team AT its cap. + seedN(t, db, teamID, svc.service, "hobby", limit) + + resp := postWithAuthJSONTier(t, app, svc.route, jwt, `{"name":"over-cap"}`) + defer resp.Body.Close() + + // The provision may 503 (no real backend in tests), but it must + // NEVER be the count-cap 402 while the flag is off. + if resp.StatusCode == http.StatusPaymentRequired { + b := decodeTierErrBody(t, resp) + assert.NotEqual(t, svc.errCode, b.Error, + "flag OFF: %s at cap must NOT be rejected by the count cap (inert)", svc.service) + } + }) + } +} + +// TestResourceCountCap_FlagOnAtLimitRejects is the rule-18 registry-iterating +// guard: with the flag ON, a hobby team AT its cap gets 402 _limit_reached +// for EVERY count-capped service. If a new count-capped service ships without +// wiring the enforcement block, its sub-test fails here. +func TestResourceCountCap_FlagOnAtLimitRejects(t *testing.T) { + requireTestDB(t) + + planReg := plans.Default() + for _, svc := range countCappedServices { + t.Run(svc.service, func(t *testing.T) { + db, cleanDB := testhelpers.SetupTestDB(t) + defer cleanDB() + ensureStackTables(t, db) + + limit := planReg.ResourceCountLimit("hobby", svc.service) + require.Greater(t, limit, 0, "hobby %s_count must be positive", svc.service) + + // Flag ON. + app, cleanup := newCountCapApp(t, db, svc.service, true) + defer cleanup() + + teamID := testhelpers.MustCreateTeamDB(t, db, "hobby") + jwt := testhelpers.MustSignSessionJWT(t, "u-on-"+svc.service, teamID, "on-"+svc.service+"@example.com") + + seedN(t, db, teamID, svc.service, "hobby", limit) + + resp := postWithAuthJSONTier(t, app, svc.route, jwt, `{"name":"over-cap"}`) + defer resp.Body.Close() + + require.Equal(t, http.StatusPaymentRequired, resp.StatusCode, + "flag ON: %s team AT cap (%d) must get 402", svc.service, limit) + b := decodeTierErrBody(t, resp) + assert.False(t, b.OK) + assert.Equal(t, svc.errCode, b.Error, + "flag ON: %s at cap must return %s", svc.service, svc.errCode) + }) + } +} + +// TestResourceCountCap_FlagOnUnderLimitPasses verifies the count cap does NOT +// reject a team under its cap when the flag is on (the provision may still 503 +// for lack of a real backend, but not with a count-cap 402). +func TestResourceCountCap_FlagOnUnderLimitPasses(t *testing.T) { + requireTestDB(t) + + planReg := plans.Default() + for _, svc := range countCappedServices { + t.Run(svc.service, func(t *testing.T) { + db, cleanDB := testhelpers.SetupTestDB(t) + defer cleanDB() + ensureStackTables(t, db) + + limit := planReg.ResourceCountLimit("hobby", svc.service) + require.Greater(t, limit, 1, "hobby %s_count must be > 1 for an under-limit state", svc.service) + + app, cleanup := newCountCapApp(t, db, svc.service, true) + defer cleanup() + + teamID := testhelpers.MustCreateTeamDB(t, db, "hobby") + jwt := testhelpers.MustSignSessionJWT(t, "u-under-"+svc.service, teamID, "under-"+svc.service+"@example.com") + + // Seed UNDER the cap. + seedN(t, db, teamID, svc.service, "hobby", limit-1) + + resp := postWithAuthJSONTier(t, app, svc.route, jwt, `{"name":"under-cap"}`) + defer resp.Body.Close() + + if resp.StatusCode == http.StatusPaymentRequired { + b := decodeTierErrBody(t, resp) + assert.NotEqual(t, svc.errCode, b.Error, + "flag ON: %s under cap must NOT be rejected by the count cap", svc.service) + } + }) + } +} diff --git a/internal/handlers/resource_count_cap_whitebox_test.go b/internal/handlers/resource_count_cap_whitebox_test.go new file mode 100644 index 00000000..d5acd817 --- /dev/null +++ b/internal/handlers/resource_count_cap_whitebox_test.go @@ -0,0 +1,123 @@ +package handlers + +// resource_count_cap_whitebox_test.go — in-package (white-box) branch coverage +// for enforceResourceCountCap's edge paths that the HTTP-level +// resource_count_cap_test.go can't reach (the cap helper is unexported and some +// branches are only reachable with a synthetic registry / closed DB): +// - flag OFF / nil cfg → inert, +// - limit < 0 (unlimited) → inert, +// - count-query error → 503 quota_check_failed (fail-closed when enabled). +// +// The happy "at cap → 402" and "under cap → pass" paths are covered end-to-end +// via the HTTP tests; here we exercise only the defensive/edge arms. + +import ( + "context" + "database/sql" + "os" + "path/filepath" + "testing" + + "github.com/gofiber/fiber/v2" + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/valyala/fasthttp" + + "instant.dev/internal/config" + "instant.dev/internal/plans" +) + +// ctxForCap returns a fiber.Ctx bound to a throwaway app for direct handler +// calls. UserContext is set to context.Background() so database/sql doesn't call +// (*fasthttp.RequestCtx).Done() (which panics on a bare RequestCtx). +func ctxForCap(t *testing.T) (*fiber.Ctx, func()) { + t.Helper() + app := fiber.New() + c := app.AcquireCtx(&fasthttp.RequestCtx{}) + c.SetUserContext(context.Background()) + return c, func() { app.ReleaseCtx(c) } +} + +func TestEnforceResourceCountCap_FlagOffInert_Whitebox(t *testing.T) { + c, done := ctxForCap(t) + defer done() + + h := &provisionHelper{ + cfg: &config.Config{ResourceCountCapsEnabled: false}, + plans: plans.Default(), + } + handled, err := h.enforceResourceCountCap(c, uuid.New(), "hobby", "postgres", "rq") + assert.False(t, handled, "flag OFF must be inert") + assert.NoError(t, err) + + // nil cfg → also inert (defensive). + h2 := &provisionHelper{cfg: nil, plans: plans.Default()} + handled2, err2 := h2.enforceResourceCountCap(c, uuid.New(), "hobby", "postgres", "rq") + assert.False(t, handled2) + assert.NoError(t, err2) + + // nil plans → inert (defensive). + h3 := &provisionHelper{cfg: &config.Config{ResourceCountCapsEnabled: true}, plans: nil} + handled3, err3 := h3.enforceResourceCountCap(c, uuid.New(), "hobby", "postgres", "rq") + assert.False(t, handled3) + assert.NoError(t, err3) +} + +func TestEnforceResourceCountCap_UnlimitedIsInert_Whitebox(t *testing.T) { + c, done := ctxForCap(t) + defer done() + + // Build a registry whose hobby postgres_count is -1 (unlimited) so the + // limit<0 branch is reached. ResourceCountLimit returns the field verbatim + // when negative. + const unlimitedYAML = ` +plans: + anonymous: + display_name: "Anonymous" + price_monthly_cents: 0 + limits: + provisions_per_day: 5 + hobby: + display_name: "Hobby" + price_monthly_cents: 900 + limits: + provisions_per_day: -1 + postgres_count: -1 +` + dir := t.TempDir() + path := filepath.Join(dir, "unlimited.yaml") + require.NoError(t, os.WriteFile(path, []byte(unlimitedYAML), 0o600)) + reg, err := plans.Load(path) + require.NoError(t, err) + + h := &provisionHelper{ + cfg: &config.Config{ResourceCountCapsEnabled: true}, + plans: reg, + db: nil, // never reached — unlimited returns before the DB query + } + handled, capErr := h.enforceResourceCountCap(c, uuid.New(), "hobby", "postgres", "rq") + assert.False(t, handled, "unlimited (-1) cap must be inert and NOT query the DB") + assert.NoError(t, capErr) +} + +func TestEnforceResourceCountCap_CountErrorFailsClosed_Whitebox(t *testing.T) { + c, done := ctxForCap(t) + defer done() + + // A closed *sql.DB makes CountActiveResourcesByTeamAndType error → the + // helper fails CLOSED with handled=true (the caller returns the 503). + closed, err := sql.Open("postgres", "postgres://invalid:invalid@127.0.0.1:1/none?sslmode=disable") + require.NoError(t, err) + require.NoError(t, closed.Close()) + + h := &provisionHelper{ + cfg: &config.Config{ResourceCountCapsEnabled: true}, + plans: plans.Default(), + db: closed, + } + handled, capErr := h.enforceResourceCountCap(c, uuid.New(), "hobby", "postgres", "rq") + assert.True(t, handled, "a count-query error with the flag on must be handled (fail closed)") + assert.ErrorIs(t, capErr, ErrResponseWritten, "respondError returns the written sentinel") + assert.Equal(t, fiber.StatusServiceUnavailable, c.Response().StatusCode()) +} diff --git a/internal/handlers/storage.go b/internal/handlers/storage.go index ff0a2634..7b07578d 100644 --- a/internal/handlers/storage.go +++ b/internal/handlers/storage.go @@ -476,6 +476,14 @@ func (h *StorageHandler) newStorageAuthenticated( } } + // Task #55: per-tier storage *count* cap (flag-gated, default OFF). This is + // distinct from the storage-bytes quota above (total bytes across buckets): + // it caps the NUMBER of storage resources so a tenant can't open many + // prefix-scoped buckets each near the byte cap. Mirrors queue.go. + if handled, capErr := h.enforceResourceCountCap(c, teamUUID, team.PlanTier, models.ResourceTypeStorage, requestID); handled { + return capErr + } + resource, err := models.CreateResource(ctx, h.db, models.CreateResourceParams{ TeamID: &teamUUID, ResourceType: "storage", diff --git a/internal/handlers/vector.go b/internal/handlers/vector.go index 835f9ccf..71365929 100644 --- a/internal/handlers/vector.go +++ b/internal/handlers/vector.go @@ -469,6 +469,11 @@ func (h *VectorHandler) newVectorAuthenticated( tier = "growth" } + // Task #55: per-tier vector count cap (flag-gated, default OFF). Mirrors queue.go. + if handled, capErr := h.enforceResourceCountCap(c, teamUUID, team.PlanTier, models.ResourceTypeVector, requestID); handled { + return capErr + } + parentRootID, perr := resolveFamilyParent(c, h.db, parentResourceID, teamUUID, models.ResourceTypeVector, env) if perr != nil { return perr diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index 75374ed8..c2723d2d 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -209,6 +209,18 @@ var ( Help: "Queue provision attempts rejected by per-tier queue_count cap", }, []string{"team_tier"}) + // ResourceCountLimitBlocked counts provision attempts rejected by the + // per-tier per-service resource-count cap (Task #55). One CounterVec with a + // `service` label (postgres/vector/redis/mongodb/storage) rather than five + // near-identical metrics — a single dashboard tile + alert covers them all. + // Lazy *Vec: a label pair only appears at /metrics after its first block, + // which only happens when RESOURCE_COUNT_CAPS_ENABLED is on and a team is + // over-cap; expect zero series until the operator enables the flag. + ResourceCountLimitBlocked = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "instant_resource_count_limit_blocked_total", + Help: "Provision attempts rejected by per-tier per-service resource-count cap (Task #55, flag-gated)", + }, []string{"service", "team_tier"}) + // DeployTeardownMarkFailed counts teardown-reconciler sweeps where the // compute was destroyed but MarkDeploymentTornDown failed to flip the // row to 'deleted'. The row is then retried forever — a persistently diff --git a/internal/plans/strict_margin_finite_limits_test.go b/internal/plans/strict_margin_finite_limits_test.go index 6506af3d..e8511f2c 100644 --- a/internal/plans/strict_margin_finite_limits_test.go +++ b/internal/plans/strict_margin_finite_limits_test.go @@ -56,6 +56,14 @@ func TestStrictMargin_NoUnlimitedResourceLimits(t *testing.T) { "mongodb_ops_per_minute": l.MongoOpsPerMinute, "queue_storage_mb": l.QueueStorageMB, "queue_count": l.QueueCount, + // Task #55: per-service resource-count caps must also be finite + // (>= 0). A -1 here would re-open the unbounded-COGS hole this + // task closed for non-queue services. + "postgres_count": l.PostgresCount, + "vector_count": l.VectorCount, + "redis_count": l.RedisCount, + "mongodb_count": l.MongoCount, + "storage_count": l.StorageCount, "storage_storage_mb": l.StorageStorageMB, "webhook_requests_stored": l.WebhookRequestsStored, "team_members": l.TeamMembers, diff --git a/plans.yaml b/plans.yaml index ed01a199..0fec1219 100644 --- a/plans.yaml +++ b/plans.yaml @@ -30,6 +30,14 @@ plans: # mean "none" — anon/free get exactly 1 queue, bounded by the 64 MB slice. # Hard-cap-only; no metered overage. queue_count: 1 + # Task #55 resource-count caps (flag-gated, default OFF via + # RESOURCE_COUNT_CAPS_ENABLED). anonymous/free are also fingerprint-dedup- + # gated; 1 each keeps the saturated-COGS bound. Mirror common defaultYAML. + postgres_count: 1 + vector_count: 1 + redis_count: 1 + mongodb_count: 1 + storage_count: 1 storage_storage_mb: 10 webhook_requests_stored: 100 team_members: 1 @@ -77,6 +85,14 @@ plans: queue_storage_mb: 64 # queue_count 1 (was -1) — mirrors anonymous. 0 means unlimited, so 1. queue_count: 1 + # Task #55 resource-count caps (flag-gated, default OFF via + # RESOURCE_COUNT_CAPS_ENABLED). anonymous/free are also fingerprint-dedup- + # gated; 1 each keeps the saturated-COGS bound. Mirror common defaultYAML. + postgres_count: 1 + vector_count: 1 + redis_count: 1 + mongodb_count: 1 + storage_count: 1 storage_storage_mb: 10 webhook_requests_stored: 100 team_members: 1 @@ -117,6 +133,13 @@ plans: # strict-80% margin redesign (2026-06-05): queue trimmed 5120 → 2048 MB. queue_storage_mb: 2048 queue_count: 3 + # Task #55 resource-count caps. hobby budget=$1.80 (20% of $9); redis + # 2×50MB×$6.50/GB=$0.65 — well within budget, redis kept conservative. + postgres_count: 2 + vector_count: 2 + redis_count: 2 + mongodb_count: 2 + storage_count: 2 storage_storage_mb: 512 webhook_requests_stored: 1000 team_members: 1 @@ -188,6 +211,13 @@ plans: mongodb_ops_per_minute: 1000 queue_storage_mb: 5120 queue_count: 5 + # Task #55 resource-count caps. hobby_plus budget=$3.80; redis + # 3×50MB×$6.50/GB=$0.98 — conservative; every service ≤ its budget max. + postgres_count: 3 + vector_count: 3 + redis_count: 3 + mongodb_count: 3 + storage_count: 3 storage_storage_mb: 5120 webhook_requests_stored: 5000 team_members: 1 @@ -241,6 +271,13 @@ plans: mongodb_ops_per_minute: 1000 queue_storage_mb: 5120 queue_count: 5 + # Task #55 resource-count caps. hobby_plus budget=$3.80; redis + # 3×50MB×$6.50/GB=$0.98 — conservative; every service ≤ its budget max. + postgres_count: 3 + vector_count: 3 + redis_count: 3 + mongodb_count: 3 + storage_count: 3 storage_storage_mb: 5120 webhook_requests_stored: 5000 team_members: 1 @@ -285,6 +322,13 @@ plans: # strict-80% margin redesign (2026-06-05): queue trimmed 5120 → 2048 MB (mirror hobby). queue_storage_mb: 2048 queue_count: 3 + # Task #55 resource-count caps. hobby budget=$1.80 (20% of $9); redis + # 2×50MB×$6.50/GB=$0.65 — well within budget, redis kept conservative. + postgres_count: 2 + vector_count: 2 + redis_count: 2 + mongodb_count: 2 + storage_count: 2 storage_storage_mb: 512 webhook_requests_stored: 1000 team_members: 1 @@ -331,6 +375,14 @@ plans: # strict-80% margin redesign (2026-06-05): queue trimmed 10240 → 5120 MB. queue_storage_mb: 5120 queue_count: 20 + # Task #55 resource-count caps. pro budget=$9.80; redis is binding: + # 512MB×$6.50/GB=$3.25/res → max 3 in budget, so redis_count=3. pg/vec + # 10GB×$0.15=$1.50/res → 5 ≤ 6.5 budget-max. storage 50GB×$0.02=$1/res → 5. + postgres_count: 5 + vector_count: 5 + redis_count: 3 + mongodb_count: 5 + storage_count: 5 storage_storage_mb: 51200 webhook_requests_stored: 10000 team_members: 5 @@ -376,6 +428,14 @@ plans: # strict-80% margin redesign (2026-06-05): queue trimmed 10240 → 5120 MB (mirror pro). queue_storage_mb: 5120 queue_count: 20 + # Task #55 resource-count caps. pro budget=$9.80; redis is binding: + # 512MB×$6.50/GB=$3.25/res → max 3 in budget, so redis_count=3. pg/vec + # 10GB×$0.15=$1.50/res → 5 ≤ 6.5 budget-max. storage 50GB×$0.02=$1/res → 5. + postgres_count: 5 + vector_count: 5 + redis_count: 3 + mongodb_count: 5 + storage_count: 5 storage_storage_mb: 51200 webhook_requests_stored: 10000 team_members: 5 @@ -426,6 +486,15 @@ plans: mongodb_ops_per_minute: 50000 queue_storage_mb: 40960 queue_count: 100 + # Task #55 resource-count caps. team budget=$39.80; redis binding: + # 1.5GB×$6.50/GB=$9.75/res → max 4 in budget, redis_count=4. pg 50GB× + # $0.15=$7.50/res → 5 ≤ 5.3 budget-max; mongo 40GB×$0.15=$6 → 6 ≤ 6.6; + # storage 300GB×$0.02=$6 → 6 ≤ 6.6; vector 30GB×$0.15=$4.5 → 8 ≤ 8.8. + postgres_count: 5 + vector_count: 8 + redis_count: 4 + mongodb_count: 6 + storage_count: 6 storage_storage_mb: 307200 webhook_requests_stored: 100000 team_members: 25 @@ -474,6 +543,15 @@ plans: mongodb_ops_per_minute: 50000 queue_storage_mb: 40960 queue_count: 100 + # Task #55 resource-count caps. team budget=$39.80; redis binding: + # 1.5GB×$6.50/GB=$9.75/res → max 4 in budget, redis_count=4. pg 50GB× + # $0.15=$7.50/res → 5 ≤ 5.3 budget-max; mongo 40GB×$0.15=$6 → 6 ≤ 6.6; + # storage 300GB×$0.02=$6 → 6 ≤ 6.6; vector 30GB×$0.15=$4.5 → 8 ≤ 8.8. + postgres_count: 5 + vector_count: 8 + redis_count: 4 + mongodb_count: 6 + storage_count: 6 storage_storage_mb: 307200 webhook_requests_stored: 100000 team_members: 25 @@ -540,6 +618,15 @@ plans: mongodb_ops_per_minute: 50000 queue_storage_mb: 20480 queue_count: 50 + # Task #55 resource-count caps. growth budget=$19.80; redis binding: + # 1GB×$6.50/GB=$6.50/res → max 3 in budget, redis_count=3. pg 20GB× + # $0.15=$3/res → 6 ≤ 6.6 budget-max; mongo 20GB×$0.15=$3 → 6 ≤ 6.6; + # storage 150GB×$0.02=$3 → 6 ≤ 6.6; vector 10GB×$0.15=$1.5 → 6 ≤ 13.2. + postgres_count: 6 + vector_count: 6 + redis_count: 3 + mongodb_count: 6 + storage_count: 6 storage_storage_mb: 153600 webhook_requests_stored: 100000 team_members: 10 From e452f671c8c0097e97c89705f191ff1e52d1a6b9 Mon Sep 17 00:00:00 2001 From: Manas Srivastava Date: Sat, 6 Jun 2026 00:07:07 +0530 Subject: [PATCH 2/2] chore(openapi): regenerate snapshot for resource_count_limit + count_limit (Task #55) Co-Authored-By: Claude Opus 4.8 (1M context) --- openapi.snapshot.json | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/openapi.snapshot.json b/openapi.snapshot.json index 1fbb25b7..502e7375 100644 --- a/openapi.snapshot.json +++ b/openapi.snapshot.json @@ -2206,6 +2206,13 @@ "description": "Monthly price in whole USD (cents/100). 0 for free/anonymous tiers.", "type": "integer" }, + "resource_count_limit": { + "additionalProperties": { + "type": "integer" + }, + "description": "Task #55: per-service max number of active resources a team may hold. Keys: postgres, vector, redis, mongodb, storage, queue (webhook is request-capped, not count-capped). -1 = unlimited. Enforcement is flag-gated (RESOURCE_COUNT_CAPS_ENABLED) but the cap is always advertised so an agent can plan around it.", + "type": "object" + }, "rpo_minutes": { "description": "Recovery Point Objective in minutes — the maximum window of data loss a restore can incur. 0 means no backup/RPO guarantee for the tier.", "type": "integer" @@ -2256,7 +2263,11 @@ "type": "integer" }, "count": { - "description": "Current count. Present on deployments/webhooks/vault/members.", + "description": "Current count. Present on deployments/webhooks/vault/members, and (Task #55) on postgres/redis/mongodb as the active-resource count alongside bytes.", + "type": "integer" + }, + "count_limit": { + "description": "Task #55: per-tier resource-COUNT cap for the byte-metered storage services (postgres/redis/mongodb), where the limit field is unused. -1 = unlimited. Enforcement is flag-gated (RESOURCE_COUNT_CAPS_ENABLED) but the cap is always advertised.", "type": "integer" }, "limit": {