Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 152 additions & 0 deletions internal/experiments/experiments.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
// Package experiments holds the server-side variant selector for A/B tests.
//
// Design goals:
//
// - Deterministic per-identifier bucketing — the same caller always
// lands in the same variant for a given experiment, so analytics can
// be reconstructed retroactively from the audit log alone (no extra
// "assignment" row needed).
//
// - Salt = experiment name. This keeps two experiments running in
// parallel statistically independent even when bucketed by the same
// identifier (e.g. a team_id seeing both UpgradeButton and a future
// PricingHeadline experiment lands in uncorrelated buckets).
//
// - Zero external state. The "registry" is a compile-time map; the
// bucket function is a pure SHA256(identifier + salt) mod N. No DB
// round-trip, no Redis, no cache invalidation story to maintain.
//
// The first experiment registered here is UpgradeButton — the dashboard
// reads its variant out of GET /auth/me's `experiments` field and
// renders one of three button label/color combinations. Conversion is
// recorded via POST /api/v1/experiments/converted writing an audit_log
// row, which is the only assignment-time signal we keep.
package experiments

import (
"crypto/sha256"
"encoding/binary"
)

// Experiment names — used as both the registry key and the salt input
// to Pick. Exported as constants so callers (handlers, tests, the
// dashboard's audit-event filter) reference the same string.
const (
// ExperimentUpgradeButton — A/B test the upgrade CTA label and
// color across {control, urgent, value}. P1 of the pricing
// experiments track.
ExperimentUpgradeButton = "upgrade_button"
)

// Variant strings for the UpgradeButton experiment. Exported so tests
// + the dashboard can assert against the same labels without
// stringly-typed drift.
const (
VariantControl = "control"
VariantUrgent = "urgent"
VariantValue = "value"
)

// Experiment describes a single A/B test. Variants are listed in a
// stable order — Pick maps the SHA256 modulus onto this slice, so
// reordering variants reshuffles existing users. Don't reorder a live
// experiment; add new variants at the tail.
type Experiment struct {
Name string
Variants []string
// Salt is appended to the identifier before hashing. By
// convention this equals Name so two experiments stay
// independent even when sharing one identifier. Kept as a
// separate field so a future experiment can override (e.g.
// "re-bucket everyone after a fix" by rotating the salt).
Salt string
}

// registry holds every experiment the server knows about. Populated in
// init() so callers can iterate it without locking. Read-only after
// startup.
var registry = map[string]Experiment{}

func init() {
register(Experiment{
Name: ExperimentUpgradeButton,
Variants: []string{VariantControl, VariantUrgent, VariantValue},
Salt: ExperimentUpgradeButton,
})
}

// register adds an experiment to the registry. Panics on duplicate
// name — duplicate registration is always a programmer error and
// should fail loudly at startup rather than silently overwrite.
func register(e Experiment) {
if _, ok := registry[e.Name]; ok {
panic("experiments: duplicate registration: " + e.Name)
}
if len(e.Variants) == 0 {
panic("experiments: variants empty: " + e.Name)
}
registry[e.Name] = e
}

// All returns the registered experiments. Used by the /auth/me
// handler to bucket the caller into every active experiment in one
// pass. The returned map is a copy so callers can't mutate the
// registry through it.
func All() map[string]Experiment {
out := make(map[string]Experiment, len(registry))
for k, v := range registry {
out[k] = v
}
return out
}

// Get returns an experiment by name. The second return value is false
// when the name is unknown; callers should treat that as "no
// experiment running" and skip the bucket step.
func Get(name string) (Experiment, bool) {
e, ok := registry[name]
return e, ok
}

// Pick returns the variant for (experiment, identifier). It's
// deterministic: the same input always returns the same variant. An
// unknown experiment returns "" — callers must check.
//
// Identifier can be any stable string per-caller — team_id for
// claimed users, fingerprint for anonymous. Mixing them in one
// experiment is fine; the modulus distribution is the same.
func Pick(experiment, identifier string) string {
e, ok := registry[experiment]
if !ok {
return ""
}
return pickFromVariants(e.Variants, e.Salt, identifier)
}

// pickFromVariants is the pure hashing core, factored out so tests
// can exercise it with custom variant lists / salts without mutating
// the global registry.
func pickFromVariants(variants []string, salt, identifier string) string {
if len(variants) == 0 {
return ""
}
h := sha256.Sum256([]byte(identifier + "|" + salt))
// Use the first 8 bytes as a uint64 — 64 bits of entropy is
// vastly more than enough to evenly distribute across small N
// variant counts, and avoids a big.Int allocation.
n := binary.BigEndian.Uint64(h[:8])
idx := int(n % uint64(len(variants)))
return variants[idx]
}

// PickAll buckets the identifier into every registered experiment in
// one call. Used by GET /auth/me to embed an `experiments` map in
// the response so the dashboard needs one round trip to learn every
// active assignment.
func PickAll(identifier string) map[string]string {
out := make(map[string]string, len(registry))
for name, e := range registry {
out[name] = pickFromVariants(e.Variants, e.Salt, identifier)
}
return out
}
153 changes: 153 additions & 0 deletions internal/experiments/experiments_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package experiments

import (
"fmt"
"math"
"testing"
)

// TestPick_Determinism verifies the same (experiment, identifier) pair
// always returns the same variant, even across many calls. This is the
// load-bearing property — if it ever breaks, every existing bucket
// reshuffles and the conversion data goes incoherent.
func TestPick_Determinism(t *testing.T) {
ids := []string{
"team-uuid-aaa",
"team-uuid-bbb",
"fp:abcdef0123",
// Empty string is a degenerate but legal identifier — it
// happens when an unauthenticated request has no
// fingerprint yet. Should still hash to a stable bucket.
"",
// Unicode + special chars — make sure the hash is bytewise
// stable (no surprise normalization).
"team-üñîçødé-🚀",
}
for _, id := range ids {
first := Pick(ExperimentUpgradeButton, id)
for i := 0; i < 20; i++ {
got := Pick(ExperimentUpgradeButton, id)
if got != first {
t.Fatalf("Pick(%q) non-deterministic: first=%q got=%q on iter %d",
id, first, got, i)
}
}
}
}

// TestPick_UnknownExperiment returns "" so callers can detect a
// typo without a panic.
func TestPick_UnknownExperiment(t *testing.T) {
got := Pick("definitely_not_registered", "team-1")
if got != "" {
t.Fatalf("unknown experiment should return empty string, got %q", got)
}
}

// TestPick_ReturnsValidVariant guards against a regression where the
// modulus math drifts off-by-one and returns a bogus index. Every Pick
// result must be one of the registered variants for that experiment.
func TestPick_ReturnsValidVariant(t *testing.T) {
e, ok := Get(ExperimentUpgradeButton)
if !ok {
t.Fatal("UpgradeButton experiment must be registered")
}
valid := map[string]bool{}
for _, v := range e.Variants {
valid[v] = true
}
for i := 0; i < 1000; i++ {
id := fmt.Sprintf("team-%d", i)
v := Pick(ExperimentUpgradeButton, id)
if !valid[v] {
t.Fatalf("Pick(%q) returned non-registered variant %q", id, v)
}
}
}

// TestPick_DistributionRoughly33 checks the bucket distribution is
// within tolerance of even thirds across a 1000-id sample. A real
// SHA256 won't be exactly 333/333/334 but it will be close; we allow a
// generous +/-5% to keep the test from flaking on sample-size variance
// while still catching a regression where one variant gets >50% of
// traffic.
func TestPick_DistributionRoughly33(t *testing.T) {
const N = 1000
counts := map[string]int{}
for i := 0; i < N; i++ {
id := fmt.Sprintf("identifier-%d", i)
v := Pick(ExperimentUpgradeButton, id)
counts[v]++
}
e, _ := Get(ExperimentUpgradeButton)
want := float64(N) / float64(len(e.Variants))
tolerance := want * 0.15 // 15% — generous for N=1000
for _, v := range e.Variants {
got := float64(counts[v])
if math.Abs(got-want) > tolerance {
t.Errorf("variant %q: got %d, want ~%.0f (±%.0f) — distribution skew",
v, counts[v], want, tolerance)
}
}
// Sanity: counts must sum to N (no identifier dropped).
sum := 0
for _, c := range counts {
sum += c
}
if sum != N {
t.Fatalf("counts sum to %d, want %d (bucket leak)", sum, N)
}
}

// TestPickAll_HasEveryRegistered verifies the one-shot helper used by
// /auth/me returns a variant for every registered experiment with no
// gaps, and matches what Pick would have returned per-experiment.
func TestPickAll_HasEveryRegistered(t *testing.T) {
id := "team-pickall-test"
got := PickAll(id)
all := All()
if len(got) != len(all) {
t.Fatalf("PickAll returned %d entries, registered %d", len(got), len(all))
}
for name := range all {
single := Pick(name, id)
if got[name] != single {
t.Errorf("PickAll[%s]=%q, Pick(%s,id)=%q — disagreement",
name, got[name], name, single)
}
}
}

// TestAll_IsCopy ensures the All() return is a copy — callers
// mutating it must not corrupt the registry.
func TestAll_IsCopy(t *testing.T) {
a := All()
a["injected"] = Experiment{Name: "injected"}
if _, ok := Get("injected"); ok {
t.Fatal("All() returned the live registry; callers can corrupt it")
}
}

// TestSaltIsolation_DifferentSaltsDiffer verifies two experiments with
// the same variant list but different salts bucket the same id into
// (potentially) different variants — i.e., the salt isn't ignored.
// We sample 200 ids and require the two assignments disagree at least
// 40% of the time; with truly independent hashes the expected
// disagreement rate is (k-1)/k = 66.7% for k=3 variants.
func TestSaltIsolation_DifferentSaltsDiffer(t *testing.T) {
const N = 200
vs := []string{"a", "b", "c"}
disagree := 0
for i := 0; i < N; i++ {
id := fmt.Sprintf("salt-test-%d", i)
x := pickFromVariants(vs, "salt-one", id)
y := pickFromVariants(vs, "salt-two", id)
if x != y {
disagree++
}
}
if disagree < N*40/100 {
t.Fatalf("salt isolation weak: only %d/%d disagreements; expected >= %d",
disagree, N, N*40/100)
}
}
13 changes: 13 additions & 0 deletions internal/handlers/cli_auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/google/uuid"
"github.com/redis/go-redis/v9"
"instant.dev/internal/config"
"instant.dev/internal/experiments"
"instant.dev/internal/middleware"
"instant.dev/internal/models"
"instant.dev/internal/plans"
Expand Down Expand Up @@ -252,13 +253,25 @@ func (h *CLIAuthHandler) GetCurrentUser(c *fiber.Ctx) error {

plan := h.planRegistry.Get(team.PlanTier)

// Experiment bucketing — identifier is team_id for claimed
// users (always set here since RequireAuth has already run and
// populated GetTeamID). This keeps every authenticated session
// for the same team in the same variant, which is what the
// "Upgrade to Pro" copy test needs (a user must not see two
// labels in one session). Anonymous bucketing uses the
// fingerprint at the unauthenticated provision endpoints —
// /auth/me is auth-only so there's no fingerprint fallback
// path to consider here.
exps := experiments.PickAll(team.ID.String())

resp := fiber.Map{
"ok": true,
"user_id": user.ID,
"team_id": team.ID,
"email": user.Email,
"tier": team.PlanTier,
"plan_display_name": plan.DisplayName,
"experiments": exps,
}
if team.TrialEndsAt.Valid {
resp["trial_ends_at"] = team.TrialEndsAt.Time
Expand Down
Loading