Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions internal/backend/postgres/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,25 @@ type Backend interface {
Provision(ctx context.Context, token, tier string) (*Credentials, error)
StorageBytes(ctx context.Context, token, providerResourceID string) (int64, error)
Deprovision(ctx context.Context, token, providerResourceID string) error
// Regrade re-applies the tier's per-role CONNECTION LIMIT to an already
// provisioned resource (e.g. after a plan upgrade). Idempotent.
//
// connLimit is the connection cap to apply (-1 = unlimited). Backends that
// own a dedicated pod per resource (k8s) ALTER ROLE in place. The shared
// local/dedicated/neon backends set no per-role cap at provision time, so
// they return RegradeResult{Applied:false, SkipReason:"..."} without error.
//
// A non-error RegradeResult{Applied:false} means "nothing to do / not
// reachable" — the caller can safely retry on the next sweep. An error is
// reserved for unexpected failures.
Regrade(ctx context.Context, token, providerResourceID string, connLimit int) (RegradeResult, error)
}

// RegradeResult is the outcome of a Backend.Regrade call.
type RegradeResult struct {
Applied bool // true if the new connection cap was applied
AppliedConnLimit int // the cap that is now in effect (-1 = unlimited)
SkipReason string // populated when Applied is false
}

// Credentials returned by Provision.
Expand Down
8 changes: 8 additions & 0 deletions internal/backend/postgres/dedicated.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ func (p *DedicatedProvider) Deprovision(ctx context.Context, token, providerReso
return p.deprovisionLocal(ctx, token)
}

// Regrade is a no-op for the dedicated provider. The Neon path manages
// connection limits through the Neon project plan, not a per-role
// CONNECTION LIMIT; the local-admin path sets no per-role cap at provision
// time. Either way there is no cap to re-apply, so a skip result is returned.
func (p *DedicatedProvider) Regrade(ctx context.Context, token, providerResourceID string, connLimit int) (RegradeResult, error) {
return RegradeResult{Applied: false, SkipReason: "backend has no per-role connection cap"}, nil
}

// --- Neon API path ---

func (p *DedicatedProvider) provisionNeon(ctx context.Context, token, tier string) (*Credentials, error) {
Expand Down
59 changes: 59 additions & 0 deletions internal/backend/postgres/k8s.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,65 @@ func (b *K8sBackend) Deprovision(ctx context.Context, token, providerResourceID
return nil
}

// Regrade re-applies a connection cap to the customer's app Postgres role.
// Used after a plan upgrade: the role's CONNECTION LIMIT was set at provision
// time from the old (lower) tier and nothing re-applies the new cap.
//
// It resolves the resource → its namespace/Service/admin Secret the same way
// StorageBytes does, then runs ALTER ROLE on the customer DB. Re-applying the
// same value is a harmless no-op (idempotent).
//
// When the pod is unreachable (paused, terminating, legacy row without the
// modern Secret/Service) this returns RegradeResult{Applied:false} with a
// skip reason and no error — the caller retries on the next sweep.
func (b *K8sBackend) Regrade(ctx context.Context, token, providerResourceID string, connLimit int) (RegradeResult, error) {
ns := providerResourceID
if ns == "" {
ns = k8sNsPrefix + token
}

// Resolve admin connection — identical pattern to StorageBytes. Legacy rows
// whose pods are gone (missing Secret/Service) are non-actionable: skip,
// don't error, so the caller doesn't retry forever.
secret, err := b.cs.CoreV1().Secrets(ns).Get(ctx, "postgres-admin", metav1.GetOptions{})
if err != nil {
if k8serrors.IsNotFound(err) {
return RegradeResult{Applied: false, SkipReason: "resource not reachable: postgres-admin secret not found"}, nil
}
return RegradeResult{Applied: false, SkipReason: fmt.Sprintf("resource not reachable: get secret: %v", err)}, nil
}
svc, err := b.cs.CoreV1().Services(ns).Get(ctx, "postgres", metav1.GetOptions{})
if err != nil {
if k8serrors.IsNotFound(err) {
return RegradeResult{Applied: false, SkipReason: "resource not reachable: postgres service not found"}, nil
}
return RegradeResult{Applied: false, SkipReason: fmt.Sprintf("resource not reachable: get service: %v", err)}, nil
}

adminUser := string(secret.Data["POSTGRES_USER"])
adminPass := string(secret.Data["POSTGRES_PASSWORD"])
// The app role is derived from the token exactly as in Provision.
appUser := "usr_" + k8sShort(token)

dsn := fmt.Sprintf("postgres://%s:%s@%s:5432/postgres?sslmode=disable", adminUser, adminPass, svc.Spec.ClusterIP)
conn, err := pgx.Connect(ctx, dsn)
if err != nil {
return RegradeResult{Applied: false, SkipReason: fmt.Sprintf("resource not reachable: connect: %v", err)}, nil
}
defer conn.Close(ctx)

// ALTER ROLE re-applies the tier's connection cap. -1 = unlimited (passed
// through verbatim). Identifier quoted with %q, mirroring the CREATE USER
// path in initDatabase.
stmt := fmt.Sprintf(`ALTER ROLE %q CONNECTION LIMIT %d`, appUser, connLimit)
if _, err := conn.Exec(ctx, stmt); err != nil {
// Role missing on a live pod is non-actionable too — treat as skip.
return RegradeResult{Applied: false, SkipReason: fmt.Sprintf("resource not reachable: alter role: %v", err)}, nil
}

return RegradeResult{Applied: true, AppliedConnLimit: connLimit}, nil
}

// --- private resource creators ---

func (b *K8sBackend) applyNamespace(ctx context.Context, ns string) error {
Expand Down
7 changes: 7 additions & 0 deletions internal/backend/postgres/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,13 @@ func (b *LocalBackend) Deprovision(ctx context.Context, token, providerResourceI
return nil
}

// Regrade is a no-op for the shared local backend: Provision sets no per-role
// CONNECTION LIMIT on the shared cluster, so there is no cap to re-apply on a
// plan upgrade. Returns a skip result without error.
func (b *LocalBackend) Regrade(ctx context.Context, token, providerResourceID string, connLimit int) (RegradeResult, error) {
return RegradeResult{Applied: false, SkipReason: "backend has no per-role connection cap"}, nil
}

// buildDBURL constructs the user-facing connection URL for the provisioned database.
// sslmode=disable is explicit because the shared postgres-customers cluster does not
// have SSL configured. Without it, lib/pq defaults to sslmode=prefer and fails with
Expand Down
7 changes: 7 additions & 0 deletions internal/backend/postgres/neon.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,10 @@ func (b *NeonBackend) Deprovision(ctx context.Context, token, providerResourceID
slog.Info("db.neon.Deprovision: deprovisioned", "token", token, "project_id", providerResourceID)
return nil
}

// Regrade is a no-op for the Neon backend: connection limits are governed by
// the Neon project plan, not a per-role CONNECTION LIMIT, so there is nothing
// to re-apply on a plan upgrade.
func (b *NeonBackend) Regrade(ctx context.Context, token, providerResourceID string, connLimit int) (RegradeResult, error) {
return RegradeResult{Applied: false, SkipReason: "backend has no per-role connection cap"}, nil
}
89 changes: 89 additions & 0 deletions internal/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"

"instant.dev/common/plans"
commonv1 "instant.dev/proto/common/v1"
provisionerv1 "instant.dev/proto/provisioner/v1"

Expand Down Expand Up @@ -546,6 +547,94 @@ func (s *Server) GetStorageBytes(ctx context.Context, req *provisionerv1.Storage
}
}

// regradeConnLimits is the source of truth for per-tier Postgres connection
// caps used by RegradeResource. It is the shared plans registry — the same
// source the agent API uses via plans.Registry.ConnectionsLimit(tier,
// "postgres") — so the cap re-applied here stays platform-consistent.
var regradeConnLimits = plans.Default()

// RegradeResource re-applies the tier's per-role connection cap to an
// already-provisioned resource. It exists because a plan upgrade does not, on
// its own, re-apply the higher CONNECTION LIMIT to the customer's Postgres
// role — the role keeps the old (lower) cap until this RPC runs.
//
// Phase 1 is Postgres-only. Non-Postgres resource types and backends with no
// per-role connection cap (the shared local/dedicated/neon backends) return
// {applied:false} with a skip reason rather than an error.
func (s *Server) RegradeResource(ctx context.Context, req *provisionerv1.RegradeRequest) (*provisionerv1.RegradeResponse, error) {
if req.Token == "" {
return nil, status.Error(codes.InvalidArgument, "token is required")
}

ctx, span := otel.Tracer("instant.dev/provisioner").Start(ctx, "RegradeResource",
trace.WithAttributes(
attribute.String("resource_type", req.ResourceType.String()),
attribute.String("tier", req.Tier),
attribute.String("resource.token", req.Token),
),
)
defer span.End()

// Phase 1: Postgres only.
if req.ResourceType != commonv1.ResourceType_RESOURCE_TYPE_POSTGRES {
slog.Info("server.RegradeResource",
"token", req.Token, "tier", req.Tier,
"applied", false, "skip_reason", "unsupported resource type for regrade",
"request_id", req.RequestId)
return &provisionerv1.RegradeResponse{
Applied: false,
SkipReason: "unsupported resource type for regrade",
}, nil
}

// Select the backend that actually owns this resource. k8s namespace IDs
// (prefix "instant-customer-") go through the regular postgresBackend,
// matching the routing DeprovisionResource uses.
backend := s.postgresBackend
if s.dedicatedPostgresBackend != nil && req.ProviderResourceId != "" &&
!strings.HasPrefix(req.ProviderResourceId, "instant-customer-") {
backend = s.dedicatedPostgresBackend
}

// Only the k8s backend applies a per-role CONNECTION LIMIT at provision
// time. Every other backend would return the same skip via Regrade, but
// checking here keeps the contract explicit and avoids a needless k8s/DB
// round-trip.
if _, ok := backend.(*postgres.K8sBackend); !ok {
slog.Info("server.RegradeResource",
"token", req.Token, "tier", req.Tier,
"applied", false, "skip_reason", "backend has no per-role connection cap",
"request_id", req.RequestId)
return &provisionerv1.RegradeResponse{
Applied: false,
SkipReason: "backend has no per-role connection cap",
}, nil
}

// Connection cap comes from the shared plans registry, keeping it
// consistent with the cap the agent API reports and the k8s backend
// applies at provision time. -1 = unlimited; passed through verbatim.
connLimit := regradeConnLimits.ConnectionsLimit(req.Tier, "postgres")

result, err := backend.Regrade(ctx, req.Token, req.ProviderResourceId, connLimit)
if err != nil {
return nil, mapError("RegradeResource.postgres", err)
}

slog.Info("server.RegradeResource",
"token", req.Token, "tier", req.Tier,
"applied", result.Applied,
"applied_conn_limit", result.AppliedConnLimit,
"skip_reason", result.SkipReason,
"request_id", req.RequestId)

return &provisionerv1.RegradeResponse{
Applied: result.Applied,
AppliedConnLimit: int32(result.AppliedConnLimit),
SkipReason: result.SkipReason,
}, nil
}

// mapError converts backend errors to appropriate gRPC status codes.
func mapError(op string, err error) error {
if err == nil {
Expand Down
57 changes: 57 additions & 0 deletions internal/server/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ type mockPostgresBackend struct {
provision func(ctx context.Context, token, tier string) (*postgres.Credentials, error)
storageBytes func(ctx context.Context, token, providerResourceID string) (int64, error)
deprovision func(ctx context.Context, token, providerResourceID string) error
regrade func(ctx context.Context, token, providerResourceID string, connLimit int) (postgres.RegradeResult, error)
}

func (m *mockPostgresBackend) Provision(ctx context.Context, token, tier string) (*postgres.Credentials, error) {
Expand Down Expand Up @@ -52,6 +53,13 @@ func (m *mockPostgresBackend) Deprovision(ctx context.Context, token, id string)
return nil
}

func (m *mockPostgresBackend) Regrade(ctx context.Context, token, id string, connLimit int) (postgres.RegradeResult, error) {
if m.regrade != nil {
return m.regrade(ctx, token, id, connLimit)
}
return postgres.RegradeResult{Applied: false, SkipReason: "backend has no per-role connection cap"}, nil
}

type mockRedisBackend struct {
provision func(ctx context.Context, token, tier string) (*redis.Credentials, error)
storageBytes func(ctx context.Context, token, providerResourceID string) (int64, error)
Expand Down Expand Up @@ -357,6 +365,55 @@ func TestGetStorageBytes_Storage_NilMinIOBackend_ReturnsZero(t *testing.T) {
}
}

// --- RegradeResource tests ---

func TestRegradeResource_EmptyToken_ReturnsInvalidArgument(t *testing.T) {
srv := newTestServer()
_, err := srv.RegradeResource(context.Background(), &provisionerv1.RegradeRequest{
ResourceType: commonv1.ResourceType_RESOURCE_TYPE_POSTGRES,
Tier: "pro",
})
assertCode(t, err, codes.InvalidArgument)
}

func TestRegradeResource_NonPostgres_SkipsWithReason(t *testing.T) {
srv := newTestServer()
resp, err := srv.RegradeResource(context.Background(), &provisionerv1.RegradeRequest{
Token: "tok-123",
ResourceType: commonv1.ResourceType_RESOURCE_TYPE_REDIS,
Tier: "pro",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if resp.Applied {
t.Fatal("expected applied=false for non-postgres resource")
}
if resp.SkipReason != "unsupported resource type for regrade" {
t.Fatalf("unexpected skip_reason: %q", resp.SkipReason)
}
}

func TestRegradeResource_NonK8sBackend_SkipsWithReason(t *testing.T) {
// newTestServer wires the shared mockPostgresBackend, which is not a
// *postgres.K8sBackend — the server should skip without touching it.
srv := newTestServer()
resp, err := srv.RegradeResource(context.Background(), &provisionerv1.RegradeRequest{
Token: "tok-123",
ResourceType: commonv1.ResourceType_RESOURCE_TYPE_POSTGRES,
Tier: "pro",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if resp.Applied {
t.Fatal("expected applied=false for non-k8s backend")
}
if resp.SkipReason != "backend has no per-role connection cap" {
t.Fatalf("unexpected skip_reason: %q", resp.SkipReason)
}
}

// --- helper ---

func assertCode(t *testing.T, err error, want codes.Code) {
Expand Down