Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,15 @@ type Config struct {
// Off → /deploy/new rejects source=git with 501; tarball/image unaffected.
DeploySourceGitEnabled bool

// DeployScaleToZeroEnabled gates scale-to-zero (idle descheduling, Task #54).
// Default FALSE: the worker idle-scaler patches idle Deployments to
// replicas=0 and the api wake path (POST /deploy/:id/wake) brings them back.
// Off → the wake endpoint returns 501 and nothing in the api scales an app;
// the worker idle-scaler is independently gated by its own
// DEPLOY_SCALE_TO_ZERO_ENABLED env so the two services share the flag name.
// Enabling it is an operator action (see infra runbook) after a canary.
DeployScaleToZeroEnabled bool

// ResourceCountCapsEnabled gates per-service resource-count enforcement
// (Task #55). Default FALSE: when off, the count-check block in every
// provision handler (db/vector/cache/nosql/storage) is skipped entirely —
Expand Down Expand Up @@ -512,6 +521,16 @@ func Load() *Config {
cfg.DeploySourceGitEnabled = false
}

// DEPLOY_SCALE_TO_ZERO_ENABLED: default FALSE (off until operator canary).
// Shared flag name with the worker idle-scaler; the api half gates the wake
// endpoint + any api-initiated scale, the worker half gates the idle sweep.
switch strings.ToLower(strings.TrimSpace(os.Getenv("DEPLOY_SCALE_TO_ZERO_ENABLED"))) {
case "true", "1", "yes":
cfg.DeployScaleToZeroEnabled = true
default:
cfg.DeployScaleToZeroEnabled = false
}

// RESOURCE_COUNT_CAPS_ENABLED: default FALSE (Task #55). Off → the per-service
// count-check block in every provision handler is skipped (zero behavior
// change). On → over-cap provisions get 402. Operator action after a usage
Expand Down
16 changes: 16 additions & 0 deletions internal/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ func allKeys() []string {
"METRICS_TOKEN", "DASHBOARD_BASE_URL", "API_PUBLIC_URL",
"DELETION_CONFIRMATION_TTL_MINUTES", "FAMILY_BINDINGS_ENABLED",
"DEPLOY_SOURCE_IMAGE_ENABLED", "DEPLOY_SOURCE_GIT_ENABLED",
"DEPLOY_SCALE_TO_ZERO_ENABLED",
"RESOURCE_COUNT_CAPS_ENABLED",
"GITHUB_APP_ENABLED", "GITHUB_APP_ID", "GITHUB_APP_SLUG", "GITHUB_APP_PRIVATE_KEY",
"GITHUB_APP_WEBHOOK_SECRET", "GITHUB_APP_CLIENT_ID", "GITHUB_APP_CLIENT_SECRET",
Expand Down Expand Up @@ -388,6 +389,21 @@ func TestLoad_DeploySourceGitEnabled(t *testing.T) {
}
}

func TestLoad_DeployScaleToZeroEnabled(t *testing.T) {
for _, val := range []string{"true", "1", "yes", "TRUE", " Yes "} {
applyBaselineEnv(t, map[string]string{"DEPLOY_SCALE_TO_ZERO_ENABLED": val})
if !Load().DeployScaleToZeroEnabled {
t.Errorf("DEPLOY_SCALE_TO_ZERO_ENABLED=%q should enable", val)
}
}
for _, val := range []string{"false", "0", "no", "maybe", ""} {
applyBaselineEnv(t, map[string]string{"DEPLOY_SCALE_TO_ZERO_ENABLED": val})
if Load().DeployScaleToZeroEnabled {
t.Errorf("DEPLOY_SCALE_TO_ZERO_ENABLED=%q should stay disabled", val)
}
}
}

func TestLoad_ResourceCountCapsEnabled(t *testing.T) {
for _, val := range []string{"true", "1", "yes", "TRUE", " Yes "} {
applyBaselineEnv(t, map[string]string{"RESOURCE_COUNT_CAPS_ENABLED": val})
Expand Down
68 changes: 68 additions & 0 deletions internal/db/migrations/068_deploy_scale_to_zero.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
-- 068_deploy_scale_to_zero.sql — scale-to-zero (idle descheduling) state columns.
--
-- WHY: a deployed-but-idle app costs a full pod's worth of compute even when it
-- serves zero requests. Scale-to-zero (Task #54) lets the worker patch an idle
-- Deployment to replicas=0 (~$0 compute) and wake it back to replicas=1 on
-- demand. This migration adds the per-deployment state the idle-scaler and the
-- wake path read/write. The whole feature is gated behind the
-- DEPLOY_SCALE_TO_ZERO_ENABLED worker env flag (default OFF), so these columns
-- are inert — populated at create-time but acted upon only when an operator
-- enables the flag.
--
-- Columns:
-- last_activity_at TIMESTAMPTZ — floor "last known activity" marker. Set to
-- now() at create-time, bumped on every wake
-- and on redeploy. The idle-scaler descheduals
-- a Deployment only when
-- now() - last_activity_at > idle_threshold.
--
-- v1 NOTE: the api is NOT in the request path
-- (apps are served by k8s Ingress straight to
-- the per-app Service), and no nginx-ingress
-- request-total scrape is wired yet, so the
-- honest "activity" signal v1 captures is
-- deploy / redeploy / explicit-wake events —
-- NOT per-HTTP-request traffic. A follow-up
-- (documented in the worker job header) will
-- wire an ingress request-counter to bump this
-- column on real traffic for true
-- traffic-based idle detection.
--
-- scaled_to_zero BOOLEAN — true while the app is currently descheduled
-- (replicas=0). The wake path reads this to
-- decide whether a scale-up is needed; the
-- dashboard/agent reads it to show "sleeping".
-- The idle-scaler sets it true on scale-down,
-- the wake path sets it false on scale-up.
--
-- always_on BOOLEAN — per-app opt-out. A pinned app (an operator
-- or Pro+ user who wants zero cold-starts) is
-- never descheduled by the idle-scaler. Default
-- false → eligible for scale-to-zero.
--
-- Idempotent + forward-only. Existing rows get last_activity_at backfilled from
-- updated_at (their most recent known activity) so the idle-scaler does not
-- immediately deschedule every pre-existing deploy the first time the flag is
-- turned on; scaled_to_zero / always_on default to false.

ALTER TABLE deployments
ADD COLUMN IF NOT EXISTS last_activity_at TIMESTAMPTZ,
ADD COLUMN IF NOT EXISTS scaled_to_zero BOOLEAN NOT NULL DEFAULT false,
ADD COLUMN IF NOT EXISTS always_on BOOLEAN NOT NULL DEFAULT false;

-- Backfill: seed last_activity_at from updated_at for every pre-existing row so
-- the very first idle-scaler tick after the flag is enabled treats existing
-- deploys as "recently active" rather than immediately idle. New rows set
-- last_activity_at = now() at INSERT time (see CreateDeployment).
UPDATE deployments
SET last_activity_at = COALESCE(updated_at, created_at, now())
WHERE last_activity_at IS NULL;

-- Partial index: the idle-scaler scans for healthy, eligible, not-yet-zeroed
-- deployments ordered by activity. Excluding always_on + already-zeroed +
-- terminal rows keeps the index narrow and the scan cheap.
CREATE INDEX IF NOT EXISTS idx_deployments_idle_candidates
ON deployments (last_activity_at)
WHERE status = 'healthy'
AND scaled_to_zero = false
AND always_on = false;
5 changes: 5 additions & 0 deletions internal/handlers/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,11 @@ func deploymentToMapWithDB(d *models.Deployment, db *sql.DB) fiber.Map {
// image_ref is echoed (caller-supplied, no secret); registry_creds is
// NEVER returned — only registry_creds_set lifecycle metadata.
"source": deploymentSourceOrDefault(d.Source),
// Scale-to-zero state (migration 068). scaled_to_zero=true → the app is
// asleep (replicas=0); the dashboard/agent surfaces "sleeping — wake"
// and POSTs /deploy/:id/wake. always_on=true → pinned (never descheduled).
"scaled_to_zero": d.ScaledToZero,
"always_on": d.AlwaysOn,
}
if d.Source == "image" {
m["image_ref"] = d.ImageRef
Expand Down
3 changes: 3 additions & 0 deletions internal/handlers/deploy_buildfailed_autopsy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ func (m *mockProvider) Redeploy(_ context.Context, _ string, _ []byte, _ map[str
func (m *mockProvider) UpdateAccessControl(_ context.Context, _ string, _ bool, _ []string) error {
panic("mockProvider.UpdateAccessControl: not expected in this test")
}
func (m *mockProvider) Scale(_ context.Context, _ string, _ int32) error {
panic("mockProvider.Scale: not expected in this test")
}

// mockBuildLogFetcher wraps mockProvider and adds FetchBuildLogs so the handler
// code can type-assert to compute.BuildLogFetcher.
Expand Down
18 changes: 13 additions & 5 deletions internal/handlers/deploy_redeploy_inplace_mock_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ var deploymentColumnsList = []string{
"expires_at", "ttl_policy", "reminders_sent", "last_reminder_at",
"source", "image_ref", "registry_creds_enc",
"git_url", "git_ref", "git_token_enc",
"last_activity_at", "scaled_to_zero", "always_on",
}

// redeployMockApp wires a minimal Fiber app that drives DeployHandler.New
Expand Down Expand Up @@ -256,6 +257,7 @@ func TestDeployNew_Redeploy_WrongTeam_DefenceInDepth(t *testing.T) {
sql.NullTime{}, "permanent", 0, sql.NullTime{}, // ttl_*
"tarball", "", "", // source, image_ref, registry_creds_enc (mig 064)
"", "", "", // git_url, git_ref, git_token_enc (mig 065)
sql.NullTime{}, false, false, // last_activity_at, scaled_to_zero, always_on (mig 068)
))

body, ct := multipartRedeployMockBody(t, map[string]string{
Expand Down Expand Up @@ -328,14 +330,15 @@ func TestDeployNew_Redeploy_UpdateStatusError_StillAccepts(t *testing.T) {
sql.NullTime{}, "permanent", 0, sql.NullTime{},
"tarball", "", "", // source, image_ref, registry_creds_enc (mig 064)
"", "", "", // git_url, git_ref, git_token_enc (mig 065)
sql.NullTime{}, false, false, // last_activity_at, scaled_to_zero, always_on (mig 068)
))

// MarkDeploymentBuilding (guarded CAS) → driver error. The handler must
// slog.Warn and CONTINUE (NOT return 5xx) — a driver error is
// non-determinate (we can't tell whether the flip landed), and
// runRedeployAsync will reconcile the row later. Only an explicit 0-row
// CAS miss means "reaped concurrently, return 409".
mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL, updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL,\s+scaled_to_zero = false, last_activity_at = now\(\),\s+updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
WithArgs(rowID).
WillReturnError(errMockRedeployDriver)

Expand Down Expand Up @@ -420,6 +423,7 @@ func TestDeployNew_Redeploy_EmptyProviderID_Returns409(t *testing.T) {
sql.NullTime{}, "permanent", 0, sql.NullTime{},
"tarball", "", "", // source, image_ref, registry_creds_enc (mig 064)
"", "", "", // git_url, git_ref, git_token_enc (mig 065)
sql.NullTime{}, false, false, // last_activity_at, scaled_to_zero, always_on (mig 068)
))

body, ct := multipartRedeployMockBody(t, map[string]string{
Expand Down Expand Up @@ -486,10 +490,11 @@ func TestDeployNew_Redeploy_CASMiss_Returns409(t *testing.T) {
sql.NullTime{}, "permanent", 0, sql.NullTime{},
"tarball", "", "", // source, image_ref, registry_creds_enc (mig 064)
"", "", "", // git_url, git_ref, git_token_enc (mig 065)
sql.NullTime{}, false, false, // last_activity_at, scaled_to_zero, always_on (mig 068)
))

// Guarded CAS matches 0 rows — the reaper won the race. Handler 409s.
mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL, updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL,\s+scaled_to_zero = false, last_activity_at = now\(\),\s+updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
WithArgs(rowID).
WillReturnResult(sqlmock.NewResult(0, 0))

Expand Down Expand Up @@ -597,10 +602,11 @@ func TestDeployRedeploy_ByID_CASMiss_Returns409(t *testing.T) {
sql.NullTime{}, "permanent", 0, sql.NullTime{},
"tarball", "", "",
"", "", "",
sql.NullTime{}, false, false,
))

// Guarded CAS matches 0 rows — the reaper won the race after the read.
mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL, updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL,\s+scaled_to_zero = false, last_activity_at = now\(\),\s+updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
WithArgs(rowID).
WillReturnResult(sqlmock.NewResult(0, 0))

Expand Down Expand Up @@ -652,9 +658,10 @@ func TestDeployRedeploy_ByID_CASSuccess_Returns202(t *testing.T) {
sql.NullString{}, sql.NullString{}, "unset", 0,
sql.NullTime{}, "permanent", 0, sql.NullTime{},
"tarball", "", "", "", "", "",
sql.NullTime{}, false, false,
))

mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL, updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL,\s+scaled_to_zero = false, last_activity_at = now\(\),\s+updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
WithArgs(rowID).
WillReturnResult(sqlmock.NewResult(0, 1))

Expand Down Expand Up @@ -702,10 +709,11 @@ func TestDeployRedeploy_ByID_CASDriverError_StillAccepts(t *testing.T) {
sql.NullString{}, sql.NullString{}, "unset", 0,
sql.NullTime{}, "permanent", 0, sql.NullTime{},
"tarball", "", "", "", "", "",
sql.NullTime{}, false, false,
))

// Guarded CAS → driver error (non-determinate). Handler logs + continues.
mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL, updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
mock.ExpectExec(`UPDATE deployments\s+SET status = 'building', error_message = NULL,\s+scaled_to_zero = false, last_activity_at = now\(\),\s+updated_at = now\(\)\s+WHERE id = \$1 AND status IN`).
WithArgs(rowID).
WillReturnError(errMockRedeployDriver)

Expand Down
3 changes: 3 additions & 0 deletions internal/handlers/deploy_stack_internal_coverage_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ func (covPanicProvider) Redeploy(context.Context, string, []byte, map[string]str
func (covPanicProvider) UpdateAccessControl(context.Context, string, bool, []string) error {
panic("covPanicProvider.UpdateAccessControl: not expected")
}
func (covPanicProvider) Scale(context.Context, string, int32) error {
panic("covPanicProvider.Scale: not expected")
}

// covFailProvider's Deploy/Redeploy return a configurable error. It does NOT
// implement BuildLogFetcher, so fetchBuildLogsForAutopsy returns nil
Expand Down
3 changes: 3 additions & 0 deletions internal/handlers/deploy_teardown_reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ func (f *fakeTeardownProvider) Redeploy(context.Context, string, []byte, map[str
func (f *fakeTeardownProvider) UpdateAccessControl(context.Context, string, bool, []string) error {
return nil
}
func (f *fakeTeardownProvider) Scale(context.Context, string, int32) error {
return nil
}

func reconcilerRequireDB(t *testing.T) {
t.Helper()
Expand Down
116 changes: 116 additions & 0 deletions internal/handlers/deploy_wake.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
package handlers

// deploy_wake.go — explicit wake path for scale-to-zero (Task #54).
//
// WHY AN EXPLICIT WAKE (v1 design decision)
//
// instanode.dev serves a deployed app via a k8s Ingress on
// *.deployment.instanode.dev that routes straight to the per-app Service in
// the instant-deploy-<appID> namespace. The api process is NOT in the request
// path. Transparent wake-on-request (a request to a sleeping app
// auto-scales it and holds the connection until ready) therefore requires an
// ACTIVATOR proxy in front of every app — KEDA http-add-on or a Knative-style
// activator. That is a significant new dependency and is explicitly out of
// scope for the scale-to-zero v1.
//
// v1 ships scale-DOWN (worker idle-scaler) + this fast EXPLICIT wake:
//
// POST /deploy/:id/wake → scales the app back to replicas=1 and returns once
// the scale patch is accepted by k8s. The pod still needs its normal startup
// time before it serves traffic, so a request that races the wake gets the
// app's own cold-start latency (a brief 502/503 from the ingress until the
// pod is Ready), exactly as a fresh rollout would. Callers/dashboard/agents
// surface "sleeping — wake" and retry the app URL after waking.
//
// COLD-START CONTRACT (documented v1 limitation)
//
// - While scaled_to_zero, the app URL returns the ingress's upstream-down
// response (502/503) because there is no pod. This is the documented v1
// trade-off of explicit wake vs a transparent activator.
// - POST /deploy/:id/wake is idempotent: waking an already-awake app just
// refreshes last_activity_at (so it won't be re-descheduled immediately).
// - The endpoint is gated by DEPLOY_SCALE_TO_ZERO_ENABLED. With the flag OFF
// it returns 501 and performs NO scaling and NO DB writes (flag-off inert).

import (
"errors"
"log/slog"

"github.com/gofiber/fiber/v2"

"instant.dev/internal/middleware"
"instant.dev/internal/models"
)

// Wake handles POST /deploy/:id/wake. It scales a (possibly scaled-to-zero)
// deployment back to replicas=1 and clears the scaled_to_zero flag, returning
// the refreshed deployment. See the file header for the cold-start contract.
func (h *DeployHandler) Wake(c *fiber.Ctx) error {
if !h.cfg.DeployScaleToZeroEnabled {
// Flag OFF → fully inert: no scale call, no DB write.
return respondError(c, fiber.StatusNotImplemented, "scale_to_zero_disabled",
"Scale-to-zero is not enabled on this platform")
}

team, err := h.requireTeam(c)
if err != nil {
return err
}

appID := c.Params("id")
d, err := models.GetDeploymentByAppID(c.Context(), h.db, appID)
if err != nil {
var notFound *models.ErrDeploymentNotFound
if errors.As(err, &notFound) {
return respondError(c, fiber.StatusNotFound, "not_found", "Deployment not found")
}
return respondError(c, fiber.StatusServiceUnavailable, "fetch_failed", "Failed to fetch deployment")
}

if d.TeamID != team.ID {
// 404 not 403: never confirm the existence of another team's deployment.
return respondError(c, fiber.StatusNotFound, "not_found", "Deployment not found")
}

// Scale the k8s Deployment back to 1 replica. A NotFound Deployment is a
// no-op inside compute.Scale (the row may have been torn down), so this only
// errors on a real k8s transport failure — surface it so the caller retries.
if d.ProviderID != "" {
if scaleErr := h.compute.Scale(c.Context(), appID, 1); scaleErr != nil {
slog.Warn("deploy.wake.scale_failed",
"app_id", appID, "provider_id", d.ProviderID, "error", scaleErr,
"request_id", middleware.GetRequestID(c))
return respondError(c, fiber.StatusServiceUnavailable, "wake_failed",
"Failed to wake deployment; please retry")
}
}

// DB half: clear scaled_to_zero + bump last_activity_at so the idle-scaler
// doesn't immediately re-deschedule the just-woken app.
if _, dbErr := models.WakeDeployment(c.Context(), h.db, d.ID); dbErr != nil {
slog.Error("deploy.wake.db_failed",
"app_id", appID, "error", dbErr,
"request_id", middleware.GetRequestID(c))
return respondError(c, fiber.StatusServiceUnavailable, "wake_failed",
"Failed to record wake; please retry")
}

// Re-read so the response reflects the cleared flag + new activity stamp.
fresh, err := models.GetDeploymentByID(c.Context(), h.db, d.ID)
if err != nil {
// The scale + DB write already succeeded; a re-read failure shouldn't
// fail the wake. Fall back to the pre-read row with the fields we just set.
d.ScaledToZero = false
fresh = d
}

slog.Info("deploy.woke",
"app_id", appID, "team_id", team.ID,
"request_id", middleware.GetRequestID(c))

return c.JSON(fiber.Map{
"ok": true,
"message": "Deployment woken — the app will be reachable once its pod is Ready (cold start).",
"deployment": deploymentToMapWithDB(fresh, h.db),
})
}
Loading
Loading