InstaNode-dev · mastermanas805 · Jun 5, 2026 · Jun 5, 2026 · Jun 5, 2026 · Jun 5, 2026
diff --git a/internal/config/config.go b/internal/config/config.go
@@ -197,6 +197,15 @@ type Config struct {
 	// Off → /deploy/new rejects source=git with 501; tarball/image unaffected.
 	DeploySourceGitEnabled bool
 
+	// DeployScaleToZeroEnabled gates scale-to-zero (idle descheduling, Task #54).
+	// Default FALSE: the worker idle-scaler patches idle Deployments to
+	// replicas=0 and the api wake path (POST /deploy/:id/wake) brings them back.
+	// Off → the wake endpoint returns 501 and nothing in the api scales an app;
+	// the worker idle-scaler is independently gated by its own
+	// DEPLOY_SCALE_TO_ZERO_ENABLED env so the two services share the flag name.
+	// Enabling it is an operator action (see infra runbook) after a canary.
+	DeployScaleToZeroEnabled bool
+
 	// ResourceCountCapsEnabled gates per-service resource-count enforcement
 	// (Task #55). Default FALSE: when off, the count-check block in every
 	// provision handler (db/vector/cache/nosql/storage) is skipped entirely —
@@ -512,6 +521,16 @@ func Load() *Config {
 		cfg.DeploySourceGitEnabled = false
 	}
 
+	// DEPLOY_SCALE_TO_ZERO_ENABLED: default FALSE (off until operator canary).
+	// Shared flag name with the worker idle-scaler; the api half gates the wake
+	// endpoint + any api-initiated scale, the worker half gates the idle sweep.
+	switch strings.ToLower(strings.TrimSpace(os.Getenv("DEPLOY_SCALE_TO_ZERO_ENABLED"))) {
+	case "true", "1", "yes":
+		cfg.DeployScaleToZeroEnabled = true
+	default:
+		cfg.DeployScaleToZeroEnabled = false
+	}
+
 	// RESOURCE_COUNT_CAPS_ENABLED: default FALSE (Task #55). Off → the per-service
 	// count-check block in every provision handler is skipped (zero behavior
 	// change). On → over-cap provisions get 402. Operator action after a usage

diff --git a/internal/config/config_test.go b/internal/config/config_test.go
@@ -63,6 +63,7 @@ func allKeys() []string {
 		"METRICS_TOKEN", "DASHBOARD_BASE_URL", "API_PUBLIC_URL",
 		"DELETION_CONFIRMATION_TTL_MINUTES", "FAMILY_BINDINGS_ENABLED",
 		"DEPLOY_SOURCE_IMAGE_ENABLED", "DEPLOY_SOURCE_GIT_ENABLED",
+		"DEPLOY_SCALE_TO_ZERO_ENABLED",
 		"RESOURCE_COUNT_CAPS_ENABLED",
 		"GITHUB_APP_ENABLED", "GITHUB_APP_ID", "GITHUB_APP_SLUG", "GITHUB_APP_PRIVATE_KEY",
 		"GITHUB_APP_WEBHOOK_SECRET", "GITHUB_APP_CLIENT_ID", "GITHUB_APP_CLIENT_SECRET",
@@ -388,6 +389,21 @@ func TestLoad_DeploySourceGitEnabled(t *testing.T) {
 	}
 }
 
+func TestLoad_DeployScaleToZeroEnabled(t *testing.T) {
+	for _, val := range []string{"true", "1", "yes", "TRUE", "  Yes  "} {
+		applyBaselineEnv(t, map[string]string{"DEPLOY_SCALE_TO_ZERO_ENABLED": val})
+		if !Load().DeployScaleToZeroEnabled {
+			t.Errorf("DEPLOY_SCALE_TO_ZERO_ENABLED=%q should enable", val)
+		}
+	}
+	for _, val := range []string{"false", "0", "no", "maybe", ""} {
+		applyBaselineEnv(t, map[string]string{"DEPLOY_SCALE_TO_ZERO_ENABLED": val})
+		if Load().DeployScaleToZeroEnabled {
+			t.Errorf("DEPLOY_SCALE_TO_ZERO_ENABLED=%q should stay disabled", val)
+		}
+	}
+}
+
 func TestLoad_ResourceCountCapsEnabled(t *testing.T) {
 	for _, val := range []string{"true", "1", "yes", "TRUE", "  Yes  "} {
 		applyBaselineEnv(t, map[string]string{"RESOURCE_COUNT_CAPS_ENABLED": val})

diff --git a/internal/db/migrations/068_deploy_scale_to_zero.sql b/internal/db/migrations/068_deploy_scale_to_zero.sql
@@ -0,0 +1,68 @@
+-- 068_deploy_scale_to_zero.sql — scale-to-zero (idle descheduling) state columns.
+--
+-- WHY: a deployed-but-idle app costs a full pod's worth of compute even when it
+-- serves zero requests. Scale-to-zero (Task #54) lets the worker patch an idle
+-- Deployment to replicas=0 (~$0 compute) and wake it back to replicas=1 on
+-- demand. This migration adds the per-deployment state the idle-scaler and the
+-- wake path read/write. The whole feature is gated behind the
+-- DEPLOY_SCALE_TO_ZERO_ENABLED worker env flag (default OFF), so these columns
+-- are inert — populated at create-time but acted upon only when an operator
+-- enables the flag.
+--
+-- Columns:
+--   last_activity_at  TIMESTAMPTZ — floor "last known activity" marker. Set to
+--                                   now() at create-time, bumped on every wake
+--                                   and on redeploy. The idle-scaler descheduals
+--                                   a Deployment only when
+--                                   now() - last_activity_at > idle_threshold.
+--
+--                                   v1 NOTE: the api is NOT in the request path
+--                                   (apps are served by k8s Ingress straight to
+--                                   the per-app Service), and no nginx-ingress
+--                                   request-total scrape is wired yet, so the
+--                                   honest "activity" signal v1 captures is
+--                                   deploy / redeploy / explicit-wake events —
+--                                   NOT per-HTTP-request traffic. A follow-up
+--                                   (documented in the worker job header) will
+--                                   wire an ingress request-counter to bump this
+--                                   column on real traffic for true
+--                                   traffic-based idle detection.
+--
+--   scaled_to_zero    BOOLEAN     — true while the app is currently descheduled
+--                                   (replicas=0). The wake path reads this to
+--                                   decide whether a scale-up is needed; the
+--                                   dashboard/agent reads it to show "sleeping".
+--                                   The idle-scaler sets it true on scale-down,
+--                                   the wake path sets it false on scale-up.
+--
+--   always_on         BOOLEAN     — per-app opt-out. A pinned app (an operator
+--                                   or Pro+ user who wants zero cold-starts) is
+--                                   never descheduled by the idle-scaler. Default
+--                                   false → eligible for scale-to-zero.
+--
+-- Idempotent + forward-only. Existing rows get last_activity_at backfilled from
+-- updated_at (their most recent known activity) so the idle-scaler does not
+-- immediately deschedule every pre-existing deploy the first time the flag is
+-- turned on; scaled_to_zero / always_on default to false.
+
+ALTER TABLE deployments
+    ADD COLUMN IF NOT EXISTS last_activity_at TIMESTAMPTZ,
+    ADD COLUMN IF NOT EXISTS scaled_to_zero   BOOLEAN NOT NULL DEFAULT false,
+    ADD COLUMN IF NOT EXISTS always_on        BOOLEAN NOT NULL DEFAULT false;
+
+-- Backfill: seed last_activity_at from updated_at for every pre-existing row so
+-- the very first idle-scaler tick after the flag is enabled treats existing
+-- deploys as "recently active" rather than immediately idle. New rows set
+-- last_activity_at = now() at INSERT time (see CreateDeployment).
+UPDATE deployments
+SET    last_activity_at = COALESCE(updated_at, created_at, now())
+WHERE  last_activity_at IS NULL;
+
+-- Partial index: the idle-scaler scans for healthy, eligible, not-yet-zeroed
+-- deployments ordered by activity. Excluding always_on + already-zeroed +
+-- terminal rows keeps the index narrow and the scan cheap.
+CREATE INDEX IF NOT EXISTS idx_deployments_idle_candidates
+    ON deployments (last_activity_at)
+    WHERE status = 'healthy'
+      AND scaled_to_zero = false
+      AND always_on = false;
diff --git a/internal/handlers/deploy.go b/internal/handlers/deploy.go
@@ -572,6 +572,11 @@ func deploymentToMapWithDB(d *models.Deployment, db *sql.DB) fiber.Map {
 		// image_ref is echoed (caller-supplied, no secret); registry_creds is
 		// NEVER returned — only registry_creds_set lifecycle metadata.
 		"source": deploymentSourceOrDefault(d.Source),
+		// Scale-to-zero state (migration 068). scaled_to_zero=true → the app is
+		// asleep (replicas=0); the dashboard/agent surfaces "sleeping — wake"
+		// and POSTs /deploy/:id/wake. always_on=true → pinned (never descheduled).
+		"scaled_to_zero": d.ScaledToZero,
+		"always_on":      d.AlwaysOn,
 	}
 	if d.Source == "image" {
 		m["image_ref"] = d.ImageRef

diff --git a/internal/handlers/deploy_buildfailed_autopsy_test.go b/internal/handlers/deploy_buildfailed_autopsy_test.go
@@ -56,6 +56,9 @@ func (m *mockProvider) Redeploy(_ context.Context, _ string, _ []byte, _ map[str
 func (m *mockProvider) UpdateAccessControl(_ context.Context, _ string, _ bool, _ []string) error {
 	panic("mockProvider.UpdateAccessControl: not expected in this test")
 }
+func (m *mockProvider) Scale(_ context.Context, _ string, _ int32) error {
+	panic("mockProvider.Scale: not expected in this test")
+}
 
 // mockBuildLogFetcher wraps mockProvider and adds FetchBuildLogs so the handler
 // code can type-assert to compute.BuildLogFetcher.

diff --git a/internal/handlers/deploy_redeploy_inplace_mock_test.go b/internal/handlers/deploy_redeploy_inplace_mock_test.go
@@ -77,6 +77,7 @@ var deploymentColumnsList = []string{
 	"expires_at", "ttl_policy", "reminders_sent", "last_reminder_at",
 	"source", "image_ref", "registry_creds_enc",
 	"git_url", "git_ref", "git_token_enc",
+	"last_activity_at", "scaled_to_zero", "always_on",
 }
 
 // redeployMockApp wires a minimal Fiber app that drives DeployHandler.New
@@ -256,6 +257,7 @@ func TestDeployNew_Redeploy_WrongTeam_DefenceInDepth(t *testing.T) {
 			sql.NullTime{}, "permanent", 0, sql.NullTime{}, // ttl_*
 			"tarball", "", "", // source, image_ref, registry_creds_enc (mig 064)
 			"", "", "", // git_url, git_ref, git_token_enc (mig 065)
+			sql.NullTime{}, false, false, // last_activity_at, scaled_to_zero, always_on (mig 068)
 		))
 
 	body, ct := multipartRedeployMockBody(t, map[string]string{
@@ -328,14 +330,15 @@ func TestDeployNew_Redeploy_UpdateStatusError_StillAccepts(t *testing.T) {
 			sql.NullTime{}, "permanent", 0, sql.NullTime{},
 			"tarball", "", "", // source, image_ref, registry_creds_enc (mig 064)
 			"", "", "", // git_url, git_ref, git_token_enc (mig 065)
+			sql.NullTime{}, false, false, // last_activity_at, scaled_to_zero, always_on (mig 068)
 		))
 
 	// MarkDeploymentBuilding (guarded CAS) → driver error. The handler must
 	// slog.Warn and CONTINUE (NOT return 5xx) — a driver error is
 	// non-determinate (we can't tell whether the flip landed), and
 	// runRedeployAsync will reconcile the row later. Only an explicit 0-row
 	// CAS miss means "reaped concurrently, return 409".
-	mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL, updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
+	mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL,\s+scaled_to_zero = false, last_activity_at = now\(\),\s+updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
 		WithArgs(rowID).
 		WillReturnError(errMockRedeployDriver)
 
@@ -420,6 +423,7 @@ func TestDeployNew_Redeploy_EmptyProviderID_Returns409(t *testing.T) {
 			sql.NullTime{}, "permanent", 0, sql.NullTime{},
 			"tarball", "", "", // source, image_ref, registry_creds_enc (mig 064)
 			"", "", "", // git_url, git_ref, git_token_enc (mig 065)
+			sql.NullTime{}, false, false, // last_activity_at, scaled_to_zero, always_on (mig 068)
 		))
 
 	body, ct := multipartRedeployMockBody(t, map[string]string{
@@ -486,10 +490,11 @@ func TestDeployNew_Redeploy_CASMiss_Returns409(t *testing.T) {
 			sql.NullTime{}, "permanent", 0, sql.NullTime{},
 			"tarball", "", "", // source, image_ref, registry_creds_enc (mig 064)
 			"", "", "", // git_url, git_ref, git_token_enc (mig 065)
+			sql.NullTime{}, false, false, // last_activity_at, scaled_to_zero, always_on (mig 068)
 		))
 
 	// Guarded CAS matches 0 rows — the reaper won the race. Handler 409s.
-	mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL, updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
+	mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL,\s+scaled_to_zero = false, last_activity_at = now\(\),\s+updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
 		WithArgs(rowID).
 		WillReturnResult(sqlmock.NewResult(0, 0))
 
@@ -597,10 +602,11 @@ func TestDeployRedeploy_ByID_CASMiss_Returns409(t *testing.T) {
 			sql.NullTime{}, "permanent", 0, sql.NullTime{},
 			"tarball", "", "",
 			"", "", "",
+			sql.NullTime{}, false, false,
 		))
 
 	// Guarded CAS matches 0 rows — the reaper won the race after the read.
-	mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL, updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
+	mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL,\s+scaled_to_zero = false, last_activity_at = now\(\),\s+updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
 		WithArgs(rowID).
 		WillReturnResult(sqlmock.NewResult(0, 0))
 
@@ -652,9 +658,10 @@ func TestDeployRedeploy_ByID_CASSuccess_Returns202(t *testing.T) {
 			sql.NullString{}, sql.NullString{}, "unset", 0,
 			sql.NullTime{}, "permanent", 0, sql.NullTime{},
 			"tarball", "", "", "", "", "",
+			sql.NullTime{}, false, false,
 		))
 
-	mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL, updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
+	mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL,\s+scaled_to_zero = false, last_activity_at = now\(\),\s+updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
 		WithArgs(rowID).
 		WillReturnResult(sqlmock.NewResult(0, 1))
 
@@ -702,10 +709,11 @@ func TestDeployRedeploy_ByID_CASDriverError_StillAccepts(t *testing.T) {
 			sql.NullString{}, sql.NullString{}, "unset", 0,
 			sql.NullTime{}, "permanent", 0, sql.NullTime{},
 			"tarball", "", "", "", "", "",
+			sql.NullTime{}, false, false,
 		))
 
 	// Guarded CAS → driver error (non-determinate). Handler logs + continues.
-	mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL, updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
+	mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL,\s+scaled_to_zero = false, last_activity_at = now\(\),\s+updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
 		WithArgs(rowID).
 		WillReturnError(errMockRedeployDriver)
 

diff --git a/internal/handlers/deploy_stack_internal_coverage_test.go b/internal/handlers/deploy_stack_internal_coverage_test.go
@@ -64,6 +64,9 @@ func (covPanicProvider) Redeploy(context.Context, string, []byte, map[string]str
 func (covPanicProvider) UpdateAccessControl(context.Context, string, bool, []string) error {
 	panic("covPanicProvider.UpdateAccessControl: not expected")
 }
+func (covPanicProvider) Scale(context.Context, string, int32) error {
+	panic("covPanicProvider.Scale: not expected")
+}
 
 // covFailProvider's Deploy/Redeploy return a configurable error. It does NOT
 // implement BuildLogFetcher, so fetchBuildLogsForAutopsy returns nil

diff --git a/internal/handlers/deploy_teardown_reconciler_test.go b/internal/handlers/deploy_teardown_reconciler_test.go
@@ -75,6 +75,9 @@ func (f *fakeTeardownProvider) Redeploy(context.Context, string, []byte, map[str
 func (f *fakeTeardownProvider) UpdateAccessControl(context.Context, string, bool, []string) error {
 	return nil
 }
+func (f *fakeTeardownProvider) Scale(context.Context, string, int32) error {
+	return nil
+}
 
 func reconcilerRequireDB(t *testing.T) {
 	t.Helper()

diff --git a/internal/handlers/deploy_wake.go b/internal/handlers/deploy_wake.go
@@ -0,0 +1,116 @@
+package handlers
+
+// deploy_wake.go — explicit wake path for scale-to-zero (Task #54).
+//
+// WHY AN EXPLICIT WAKE (v1 design decision)
+//
+// instanode.dev serves a deployed app via a k8s Ingress on
+// *.deployment.instanode.dev that routes straight to the per-app Service in
+// the instant-deploy-<appID> namespace. The api process is NOT in the request
+// path. Transparent wake-on-request (a request to a sleeping app
+// auto-scales it and holds the connection until ready) therefore requires an
+// ACTIVATOR proxy in front of every app — KEDA http-add-on or a Knative-style
+// activator. That is a significant new dependency and is explicitly out of
+// scope for the scale-to-zero v1.
+//
+// v1 ships scale-DOWN (worker idle-scaler) + this fast EXPLICIT wake:
+//
+//   POST /deploy/:id/wake → scales the app back to replicas=1 and returns once
+//   the scale patch is accepted by k8s. The pod still needs its normal startup
+//   time before it serves traffic, so a request that races the wake gets the
+//   app's own cold-start latency (a brief 502/503 from the ingress until the
+//   pod is Ready), exactly as a fresh rollout would. Callers/dashboard/agents
+//   surface "sleeping — wake" and retry the app URL after waking.
+//
+// COLD-START CONTRACT (documented v1 limitation)
+//
+//   - While scaled_to_zero, the app URL returns the ingress's upstream-down
+//     response (502/503) because there is no pod. This is the documented v1
+//     trade-off of explicit wake vs a transparent activator.
+//   - POST /deploy/:id/wake is idempotent: waking an already-awake app just
+//     refreshes last_activity_at (so it won't be re-descheduled immediately).
+//   - The endpoint is gated by DEPLOY_SCALE_TO_ZERO_ENABLED. With the flag OFF
+//     it returns 501 and performs NO scaling and NO DB writes (flag-off inert).
+
+import (
+	"errors"
+	"log/slog"
+
+	"github.com/gofiber/fiber/v2"
+
+	"instant.dev/internal/middleware"
+	"instant.dev/internal/models"
+)
+
+// Wake handles POST /deploy/:id/wake. It scales a (possibly scaled-to-zero)
+// deployment back to replicas=1 and clears the scaled_to_zero flag, returning
+// the refreshed deployment. See the file header for the cold-start contract.
+func (h *DeployHandler) Wake(c *fiber.Ctx) error {
+	if !h.cfg.DeployScaleToZeroEnabled {
+		// Flag OFF → fully inert: no scale call, no DB write.
+		return respondError(c, fiber.StatusNotImplemented, "scale_to_zero_disabled",
+			"Scale-to-zero is not enabled on this platform")
+	}
+
+	team, err := h.requireTeam(c)
+	if err != nil {
+		return err
+	}
+
+	appID := c.Params("id")
+	d, err := models.GetDeploymentByAppID(c.Context(), h.db, appID)
+	if err != nil {
+		var notFound *models.ErrDeploymentNotFound
+		if errors.As(err, &notFound) {
+			return respondError(c, fiber.StatusNotFound, "not_found", "Deployment not found")
+		}
+		return respondError(c, fiber.StatusServiceUnavailable, "fetch_failed", "Failed to fetch deployment")
+	}
+
+	if d.TeamID != team.ID {
+		// 404 not 403: never confirm the existence of another team's deployment.
+		return respondError(c, fiber.StatusNotFound, "not_found", "Deployment not found")
+	}
+
+	// Scale the k8s Deployment back to 1 replica. A NotFound Deployment is a
+	// no-op inside compute.Scale (the row may have been torn down), so this only
+	// errors on a real k8s transport failure — surface it so the caller retries.
+	if d.ProviderID != "" {
+		if scaleErr := h.compute.Scale(c.Context(), appID, 1); scaleErr != nil {
+			slog.Warn("deploy.wake.scale_failed",
+				"app_id", appID, "provider_id", d.ProviderID, "error", scaleErr,
+				"request_id", middleware.GetRequestID(c))
+			return respondError(c, fiber.StatusServiceUnavailable, "wake_failed",
+				"Failed to wake deployment; please retry")
+		}
+	}
+
+	// DB half: clear scaled_to_zero + bump last_activity_at so the idle-scaler
+	// doesn't immediately re-deschedule the just-woken app.
+	if _, dbErr := models.WakeDeployment(c.Context(), h.db, d.ID); dbErr != nil {
+		slog.Error("deploy.wake.db_failed",
+			"app_id", appID, "error", dbErr,
+			"request_id", middleware.GetRequestID(c))
+		return respondError(c, fiber.StatusServiceUnavailable, "wake_failed",
+			"Failed to record wake; please retry")
+	}
+
+	// Re-read so the response reflects the cleared flag + new activity stamp.
+	fresh, err := models.GetDeploymentByID(c.Context(), h.db, d.ID)
+	if err != nil {
+		// The scale + DB write already succeeded; a re-read failure shouldn't
+		// fail the wake. Fall back to the pre-read row with the fields we just set.
+		d.ScaledToZero = false
+		fresh = d
+	}
+
+	slog.Info("deploy.woke",
+		"app_id", appID, "team_id", team.ID,
+		"request_id", middleware.GetRequestID(c))
+
+	return c.JSON(fiber.Map{
+		"ok":         true,
+		"message":    "Deployment woken — the app will be reachable once its pod is Ready (cold start).",
+		"deployment": deploymentToMapWithDB(fresh, h.db),
+	})
+}