From 38b29c599c3c2890b8c33fb14cd2813a78b9685d Mon Sep 17 00:00:00 2001
From: couragehong <redcourage@cryptolab.co.kr>
Date: Wed, 29 Apr 2026 09:12:51 +0900
Subject: [PATCH] =?UTF-8?q?test(go):=20policy=20rerank=20+=20novelty=20?=
 =?UTF-8?q?=E2=80=94=2019=20fn=20/=2077=20subtest?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 internal/policy/novelty_test.go | 256 +++++++++++++
 internal/policy/rerank_test.go  | 652 ++++++++++++++++++++++++++++++++
 2 files changed, 908 insertions(+)
 create mode 100644 internal/policy/novelty_test.go
 create mode 100644 internal/policy/rerank_test.go

diff --git a/internal/policy/novelty_test.go b/internal/policy/novelty_test.go
new file mode 100644
index 0000000..8224338
--- /dev/null
+++ b/internal/policy/novelty_test.go
@@ -0,0 +1,256 @@
+package policy_test
+
+// Tests for ClassifyNovelty + DefaultNoveltyThresholds.
+//
+// Python parity baseline: agents/tests/test_novelty_check.py (5 cases —
+// one per class + empty memory). Go ports those, then adds:
+//
+//   - all 4 class boundaries with just-below / at / just-above probes
+//   - custom thresholds (Python module constants {0.4, 0.7, 0.93}, which
+//     diverge from runtime D11 values {0.3, 0.7, 0.95}; both must work)
+//   - score formula: round(1.0 - sim, 4) — including the inverted polarity
+//     contract (higher score = more novel)
+//   - DefaultNoveltyThresholds value lock (silent constant drift would
+//     change recall behavior on every capture; gate it explicitly)
+//   - NoveltyClass enum string values lock (wire format)
+//
+// Black-box style — only the public Parse-equivalent (ClassifyNovelty) and
+// exported constants are exercised.
+
+import (
+	"math"
+	"testing"
+
+	"github.com/envector/rune-go/internal/domain"
+	"github.com/envector/rune-go/internal/policy"
+)
+
+// classification — covers all 4 classes with default D11 runtime thresholds
+// {0.3, 0.7, 0.95}. Python parity (test_novelty_check.py:5-42) plus
+// just-below / at-boundary probes that Python does not gate.
+//
+// Boundary contract (policy/novelty.go:37-49):
+//
+//	sim <  0.3   → novel
+//	0.3 ≤ sim <  0.7   → evolution
+//	0.7 ≤ sim <  0.95  → related
+//	sim ≥  0.95  → near_duplicate
+//
+// The boundaries are LEFT-CLOSED, RIGHT-OPEN — i.e., 0.3 itself is
+// evolution, not novel. Tested explicitly because Python uses the same
+// rule but never asserts the boundary.
+func TestClassifyNovelty_DefaultThresholds(t *testing.T) {
+	cases := []struct {
+		name string
+		sim  float64
+		want domain.NoveltyClass
+	}{
+		// novel — sim < 0.3
+		{"novel_zero", 0.0, domain.NoveltyClassNovel},
+		{"novel_python_parity", 0.2, domain.NoveltyClassNovel},
+		{"novel_just_below_boundary", 0.299, domain.NoveltyClassNovel},
+		// evolution — 0.3 ≤ sim < 0.7
+		{"evolution_at_lower_boundary", 0.3, domain.NoveltyClassEvolution},
+		{"evolution_python_parity", 0.5, domain.NoveltyClassEvolution},
+		{"evolution_just_below_upper", 0.6999, domain.NoveltyClassEvolution},
+		// related — 0.7 ≤ sim < 0.95
+		{"related_at_lower_boundary", 0.7, domain.NoveltyClassRelated},
+		{"related_python_parity", 0.85, domain.NoveltyClassRelated},
+		{"related_just_below_upper", 0.9499, domain.NoveltyClassRelated},
+		// near_duplicate — sim ≥ 0.95
+		{"near_duplicate_at_boundary", 0.95, domain.NoveltyClassNearDuplicate},
+		{"near_duplicate_python_parity", 0.97, domain.NoveltyClassNearDuplicate},
+		{"near_duplicate_one", 1.0, domain.NoveltyClassNearDuplicate},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got, _ := policy.ClassifyNovelty(tc.sim, policy.DefaultNoveltyThresholds)
+			if got != tc.want {
+				t.Errorf("ClassifyNovelty(%v).class = %q, want %q", tc.sim, got, tc.want)
+			}
+		})
+	}
+}
+
+// score — inverted polarity (1.0 - sim) rounded to 4 decimals. Python
+// parity (test_novelty_check.py:10, 18, 26, 34, 42).
+//
+// Polarity contract (policy/novelty.go:30-31): "higher score means more
+// novel" — intentionally inverted from raw similarity. The tests below
+// pin the contract at both extremes (sim=0 → score=1.0, sim=1 → score=0.0)
+// and at non-trivial midpoints to prevent a future "I thought score was
+// just sim" refactor.
+//
+// The chosen sim values do NOT need to be exactly representable in
+// binary float — round(_, 4) absorbs the natural drift (e.g.,
+// 1.0 - 0.97 = 0.030000000000000027 → *10000 = 300.0000... → round → 300
+// → /10000 = 0.03). What we DO avoid is the Python banker's rounding
+// vs Go round-half-away-from-zero divergence at exact .x...5 boundaries.
+// None of the values below land on such a boundary; the dedicated
+// TestClassifyNovelty_ScoreRounding probes near-boundary cases.
+func TestClassifyNovelty_Score(t *testing.T) {
+	cases := []struct {
+		name string
+		sim  float64
+		want float64
+	}{
+		{"sim_zero_max_novelty", 0.0, 1.0},
+		{"sim_one_zero_novelty", 1.0, 0.0},
+		{"sim_python_parity_near_duplicate_097", 0.97, 0.03},
+		{"sim_half", 0.5, 0.5},
+		{"sim_python_parity_related_085", 0.85, 0.15},
+		{"sim_python_parity_novel_02", 0.2, 0.8},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			_, got := policy.ClassifyNovelty(tc.sim, policy.DefaultNoveltyThresholds)
+			// Score is 4-decimal rounded; compare with epsilon for the
+			// Python pytest.approx equivalent.
+			if math.Abs(got-tc.want) > 1e-9 {
+				t.Errorf("ClassifyNovelty(%v).score = %v, want %v", tc.sim, got, tc.want)
+			}
+		})
+	}
+}
+
+// score rounding — Python uses banker's rounding (round half to even);
+// Go math.Round rounds half away from zero. For score = 1.0 - sim where
+// the resulting (1-sim)*10000 lands on a half boundary, the two
+// languages can disagree by 1e-4.
+//
+// Verified empirically (python3 + Go math.Round):
+//
+//	sim=0.12345 → 1-sim ≈ 0.87654999999... → *10000 ≈ 8765.5 (just over)
+//	  Python round(_, 4) = 0.8765   (banker's rounds half to even)
+//	  Go     0.8766                 (away-from-zero)
+//	sim=0.12355 → 1-sim ≈ 0.87645    → *10000 = 8764.5 (exact half)
+//	  Python = 0.8764, Go = 0.8765
+//
+// The "banker_round_diverges_from_python" case below witnesses the
+// divergence; the other cases stay on Go's contract without ambiguity.
+//
+// TODO(yg): if a future bit-identity audit insists on round-half-to-
+// even parity, swap math.Round for a custom banker's helper in
+// novelty.go and update the divergence case below.
+func TestClassifyNovelty_ScoreRounding(t *testing.T) {
+	cases := []struct {
+		name string
+		sim  float64
+		want float64
+	}{
+		// 1.0 - 0.123456 = 0.876544 → *10000 = 8765.44 → round → 8765 → 0.8765
+		// (5th decimal is 4, no half-boundary, both langs agree)
+		{"rounds_down_below_half", 0.123456, 0.8765},
+		// 1.0 - 0.987654 = 0.012346 (post-format) → *10000 ≈ 123.46 → round → 123 → 0.0123
+		// (float32 reality: 0.012345999... — still < 0.5 fractional)
+		{"rounds_down_far_from_half", 0.987654, 0.0123},
+		// 1.0 - 0.99996 ≈ 4e-5 → *10000 ≈ 0.4 → round → 0 → 0.0
+		{"underflow_to_zero", 0.99996, 0.0},
+		// **Python ↔ Go divergence witness** — sim=0.12355 makes
+		// (1-sim)*10000 = 8764.5 EXACTLY (verified via python3). Python's
+		// banker's rounding picks the even neighbor (8764 → 0.8764); Go's
+		// math.Round picks away-from-zero (8765 → 0.8765). We assert the
+		// Go value here. If this case starts failing because Go gives
+		// 0.8764, that means novelty.go switched to banker's — update the
+		// production-code TODO above.
+		{"banker_round_diverges_from_python", 0.12355, 0.8765},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			_, got := policy.ClassifyNovelty(tc.sim, policy.DefaultNoveltyThresholds)
+			if math.Abs(got-tc.want) > 1e-9 {
+				t.Errorf("score rounding: ClassifyNovelty(%v).score = %v, want %v",
+					tc.sim, got, tc.want)
+			}
+		})
+	}
+}
+
+// custom thresholds — exercises the threshold parameter rather than the
+// runtime D11 default. The values used here are Python module constants
+// from embedding.py:L16-18 (NOVELTY_THRESHOLD_{NOVEL,RELATED,NEAR_DUPLICATE}
+// = 0.4 / 0.7 / 0.93), which Python tests do NOT use as a set
+// (test_novelty_check.py only mixes them per-call).
+//
+// The runtime D11 mismatch (server.py:L102-104 passes 0.3/0.7/0.95
+// explicitly) is part of the agent-delegated SOT (architecture.md §Scope).
+// This test gates that ClassifyNovelty respects the per-call threshold
+// argument — which is the only way the runtime override works.
+func TestClassifyNovelty_CustomThresholds(t *testing.T) {
+	pyModuleThresholds := policy.NoveltyThresholds{
+		Novel:   0.4,
+		Related: 0.7,
+		NearDup: 0.93,
+	}
+
+	cases := []struct {
+		name string
+		sim  float64
+		want domain.NoveltyClass
+	}{
+		// 0.35 is novel under {0.4, ...} but evolution under default {0.3, ...}.
+		{"novel_under_higher_novel_threshold", 0.35, domain.NoveltyClassNovel},
+		// 0.4 is the new evolution boundary under module thresholds.
+		{"evolution_at_module_lower_boundary", 0.4, domain.NoveltyClassEvolution},
+		// 0.93 is near_duplicate under module thresholds but related under default.
+		{"near_duplicate_at_module_boundary", 0.93, domain.NoveltyClassNearDuplicate},
+		// 0.94 is near_duplicate under module but related under default {<0.95}.
+		{"near_duplicate_above_module_boundary", 0.94, domain.NoveltyClassNearDuplicate},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got, _ := policy.ClassifyNovelty(tc.sim, pyModuleThresholds)
+			if got != tc.want {
+				t.Errorf("ClassifyNovelty(%v, module).class = %q, want %q",
+					tc.sim, got, tc.want)
+			}
+		})
+	}
+}
+
+// DefaultNoveltyThresholds — runtime D11 values (server.py:L102-104).
+// A silent change here would shift every capture's classification —
+// gate the exact bytes.
+func TestDefaultNoveltyThresholds_LockedToD11Values(t *testing.T) {
+	if got := policy.DefaultNoveltyThresholds.Novel; got != 0.3 {
+		t.Errorf("DefaultNoveltyThresholds.Novel = %v, want 0.3 (D11)", got)
+	}
+	if got := policy.DefaultNoveltyThresholds.Related; got != 0.7 {
+		t.Errorf("DefaultNoveltyThresholds.Related = %v, want 0.7 (D11)", got)
+	}
+	if got := policy.DefaultNoveltyThresholds.NearDup; got != 0.95 {
+		t.Errorf("DefaultNoveltyThresholds.NearDup = %v, want 0.95 (D11)", got)
+	}
+}
+
+// NoveltyClass enum — wire format gate. These string values appear in
+// the capture response JSON and capture_log.jsonl entries (D20). Any
+// change is a breaking schema change — lock them at the test layer.
+//
+// Each constant is its own subtest so a paired-swap mistake (someone
+// reorders BOTH the constant and the want literal together) is visible
+// in the test output as a renamed subtest, and a single-side swap
+// (e.g., changing only the const value) fails the matching subtest.
+func TestNoveltyClass_WireValues(t *testing.T) {
+	cases := []struct {
+		name string
+		got  string
+		want string
+	}{
+		{"novel", string(domain.NoveltyClassNovel), "novel"},
+		{"evolution", string(domain.NoveltyClassEvolution), "evolution"},
+		{"related", string(domain.NoveltyClassRelated), "related"},
+		{"near_duplicate", string(domain.NoveltyClassNearDuplicate), "near_duplicate"},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			if tc.got != tc.want {
+				t.Errorf("NoveltyClass enum: got %q, want %q", tc.got, tc.want)
+			}
+		})
+	}
+}
diff --git a/internal/policy/rerank_test.go b/internal/policy/rerank_test.go
new file mode 100644
index 0000000..e2f3405
--- /dev/null
+++ b/internal/policy/rerank_test.go
@@ -0,0 +1,652 @@
+package policy_test
+
+// Tests for ApplyRecencyWeighting + FilterByTime + rerank constants.
+//
+// Python parity: agents/retriever/searcher.py:L273-300 (rerank) +
+// L523-559 (filter). Python has NO test file for these — Go is
+// establishing first-time coverage. Every case below either ports a
+// behavioral assertion implicit in the Python code or gates a
+// Go-specific contract (math.Floor age, type-switch on metadata
+// timestamp, sort.SliceStable).
+//
+// Black-box style — exercises only the public surface. Internal
+// arithmetic is gated through observable AdjustedScore values, computed
+// by hand from the documented formula:
+//
+//	adjusted = (SimilarityWeight × raw + RecencyWeight × decay) × statusMul
+//	decay    = 0.5 ^ (ageDays / HalfLifeDays)
+//	ageDays  = max(0, floor((now - ts).hours / 24))
+
+import (
+	"math"
+	"testing"
+	"time"
+
+	"github.com/envector/rune-go/internal/domain"
+	"github.com/envector/rune-go/internal/policy"
+)
+
+// fixedNow — a deterministic wall clock for all timestamp arithmetic.
+// Chosen to be far past anchored UTC midnight so the only floating-point
+// surprise comes from the formula itself, not from clock alignment.
+var fixedNow = time.Date(2026, 4, 29, 0, 0, 0, 0, time.UTC)
+
+// hitWithTS builds a SearchHit whose metadata contains a "timestamp"
+// field of the given concrete type. The two metadata types
+// ApplyRecencyWeighting handles are:
+//
+//	string  — RFC3339 (Python parity: datetime.fromisoformat)
+//	float64 — Unix seconds (Python parity: datetime.fromtimestamp)
+//
+// Anything else (int, nil, missing) leaves ageDays = 0 / decay = 1.0.
+func hitWithTS(score float64, status string, tsValue any) domain.SearchHit {
+	meta := map[string]any{}
+	if tsValue != nil {
+		meta["timestamp"] = tsValue
+	}
+	return domain.SearchHit{
+		RecordID: "dec_test",
+		Score:    score,
+		Status:   status,
+		Metadata: meta,
+	}
+}
+
+// almostEqual — float comparison with epsilon tight enough to catch a
+// single mis-multiplied weight while loose enough to absorb the natural
+// drift of 0.5^(integer/90) compounded with 0.7/0.3 weights.
+func almostEqual(a, b float64) bool { return math.Abs(a-b) < 1e-9 }
+
+// Constants are part of the public contract — silent change shifts every
+// reranked recall result. Lock the bytes.
+//
+// HalfLifeDays / SimilarityWeight / RecencyWeight are defined in
+// rerank.go:L14-18. Python: searcher.py:L31-33.
+func TestRerankConstants_LockedToPythonValues(t *testing.T) {
+	if got := policy.HalfLifeDays; got != 90.0 {
+		t.Errorf("HalfLifeDays = %v, want 90.0 (Python searcher.py:L31)", got)
+	}
+	if got := policy.SimilarityWeight; got != 0.7 {
+		t.Errorf("SimilarityWeight = %v, want 0.7 (Python searcher.py:L32)", got)
+	}
+	if got := policy.RecencyWeight; got != 0.3 {
+		t.Errorf("RecencyWeight = %v, want 0.3 (Python searcher.py:L33)", got)
+	}
+}
+
+// StatusMultiplier — wire-significant: capture/recall behavior depends
+// on these exact multipliers. Lock all 4 keys + values. Python:
+// searcher.py:L36-39.
+func TestStatusMultiplier_AllFourEntriesLocked(t *testing.T) {
+	want := map[string]float64{
+		"accepted":   1.0,
+		"proposed":   0.9,
+		"superseded": 0.5,
+		"reverted":   0.3,
+	}
+	if got := len(policy.StatusMultiplier); got != len(want) {
+		t.Fatalf("StatusMultiplier has %d entries, want %d (silent addition/removal)",
+			got, len(want))
+	}
+	for status, mult := range want {
+		got, ok := policy.StatusMultiplier[status]
+		if !ok {
+			t.Errorf("StatusMultiplier[%q] missing", status)
+			continue
+		}
+		if got != mult {
+			t.Errorf("StatusMultiplier[%q] = %v, want %v", status, got, mult)
+		}
+	}
+}
+
+// TimeRanges — gate every TimeScope key + duration. Python:
+// searcher.py:L532-535. AllTime is intentionally absent (filter
+// short-circuits via the missing-key path).
+func TestTimeRanges_AllFourEntriesLocked(t *testing.T) {
+	want := map[domain.TimeScope]time.Duration{
+		domain.TimeScopeLastWeek:    7 * 24 * time.Hour,
+		domain.TimeScopeLastMonth:   30 * 24 * time.Hour,
+		domain.TimeScopeLastQuarter: 90 * 24 * time.Hour,
+		domain.TimeScopeLastYear:    365 * 24 * time.Hour,
+	}
+	if got := len(policy.TimeRanges); got != len(want) {
+		t.Fatalf("TimeRanges has %d entries, want %d", got, len(want))
+	}
+	for scope, dur := range want {
+		got, ok := policy.TimeRanges[scope]
+		if !ok {
+			t.Errorf("TimeRanges[%v] missing", scope)
+			continue
+		}
+		if got != dur {
+			t.Errorf("TimeRanges[%v] = %v, want %v", scope, got, dur)
+		}
+	}
+	// AllTime is the implicit default — not in the map, filter falls
+	// through to "return all". Lock that contract negatively too.
+	if _, ok := policy.TimeRanges[domain.TimeScopeAllTime]; ok {
+		t.Errorf("TimeRanges[AllTime] must NOT be present (filter relies on absence)")
+	}
+}
+
+// ApplyRecencyWeighting — empty input must not panic and must return an
+// empty (or nil) slice. Cheap but it's the only test that gates the
+// guard against `range hits` on nil.
+func TestApplyRecencyWeighting_EmptyInputDoesNotPanic(t *testing.T) {
+	got := policy.ApplyRecencyWeighting(nil, fixedNow)
+	if len(got) != 0 {
+		t.Errorf("len(got) = %d, want 0", len(got))
+	}
+	got = policy.ApplyRecencyWeighting([]domain.SearchHit{}, fixedNow)
+	if len(got) != 0 {
+		t.Errorf("len(got) = %d, want 0", len(got))
+	}
+}
+
+// ApplyRecencyWeighting — formula gate. Each subtest pins one moving
+// part: the timestamp parser branch (string vs float64 vs missing vs
+// invalid), the half-life decay, and the math.Floor age semantics.
+//
+// Hand-computed expected values are inlined as comments so a reviewer
+// can verify the arithmetic without re-running the formula.
+func TestApplyRecencyWeighting_FormulaGate(t *testing.T) {
+	cases := []struct {
+		name     string
+		hit      domain.SearchHit
+		wantAdj  float64
+		wantNote string
+	}{
+		{
+			name: "no_timestamp_treats_age_as_zero",
+			hit: domain.SearchHit{
+				Score: 1.0, Status: "accepted",
+				Metadata: map[string]any{}, // no "timestamp" key
+			},
+			// decay = 0.5^0 = 1.0
+			// adj = (0.7×1.0 + 0.3×1.0) × 1.0 = 1.0
+			wantAdj:  1.0,
+			wantNote: "missing-ts path: ageDays = 0",
+		},
+		{
+			name: "nil_metadata_treats_age_as_zero",
+			hit: domain.SearchHit{
+				Score: 1.0, Status: "accepted",
+				Metadata: nil,
+			},
+			// Reading from a nil map in Go yields zero value, ok=false —
+			// same path as missing key. This case locks the contract
+			// against a future "metadata = make(map) before read"
+			// refactor that would silently change nil-input semantics.
+			wantAdj:  1.0,
+			wantNote: "nil-metadata path: zero-value read, ageDays = 0",
+		},
+		{
+			name: "rfc3339_same_day_decay_one",
+			hit: hitWithTS(1.0, "accepted",
+				fixedNow.Format(time.RFC3339)),
+			// ageDays = floor(0/24) = 0 → decay = 1.0
+			// adj = (0.7 + 0.3) × 1.0 = 1.0
+			wantAdj:  1.0,
+			wantNote: "RFC3339 path",
+		},
+		{
+			name: "rfc3339_exactly_one_half_life_decay_half",
+			hit: hitWithTS(1.0, "accepted",
+				fixedNow.Add(-90*24*time.Hour).Format(time.RFC3339)),
+			// ageDays = 90 → decay = 0.5^(90/90) = 0.5
+			// adj = (0.7×1.0 + 0.3×0.5) × 1.0 = 0.85
+			wantAdj:  0.85,
+			wantNote: "half-life boundary: 90d → decay 0.5",
+		},
+		{
+			name: "rfc3339_two_half_lives_decay_quarter",
+			hit: hitWithTS(1.0, "accepted",
+				fixedNow.Add(-180*24*time.Hour).Format(time.RFC3339)),
+			// ageDays = 180 → decay = 0.5^2 = 0.25
+			// adj = (0.7×1.0 + 0.3×0.25) × 1.0 = 0.775
+			wantAdj:  0.775,
+			wantNote: "two half-lives",
+		},
+		{
+			name: "future_timestamp_clamps_age_to_zero",
+			hit: hitWithTS(1.0, "accepted",
+				fixedNow.Add(48*time.Hour).Format(time.RFC3339)),
+			// (now - ts) = -48h → -2.0 days → floor = -2 → max(0, -2) = 0
+			// decay = 1.0; adj = 1.0
+			wantAdj:  1.0,
+			wantNote: "future ts: math.Max(0, ...) clamps Python parity",
+		},
+		{
+			name: "partial_day_age_floors_down",
+			hit: hitWithTS(1.0, "accepted",
+				fixedNow.Add(-36*time.Hour).Format(time.RFC3339)),
+			// (now - ts).hours/24 = 1.5 → floor = 1.0 (not 1.5)
+			// decay = 0.5^(1/90) ≈ 0.9923225751
+			// adj = (0.7×1.0 + 0.3×0.9923225751) × 1.0 ≈ 0.9976967725
+			//
+			// wantAdj uses LITERAL 0.7 / 0.3 / 0.5 / 1.0 / 90.0 — not the
+			// exported policy.* constants — so a typo in any constant
+			// (e.g., HalfLifeDays → 100) makes impl and want diverge.
+			// The formula form is intentional: it documents the spec
+			// inline, and binary float drift between this expression and
+			// the impl's math.Pow call is identical (same operands), so
+			// almostEqual passes only when the spec arithmetic matches.
+			// A swap of math.Floor → math.Round would also fail
+			// (1.5 rounds to 2, decay base shifts to 0.5^(2/90)).
+			wantAdj:  0.7 + 0.3*math.Pow(0.5, 1.0/90.0),
+			wantNote: "math.Floor matches Python timedelta.days int truncation",
+		},
+		{
+			name: "float64_unix_timestamp_path",
+			hit: hitWithTS(1.0, "accepted",
+				float64(fixedNow.Add(-90*24*time.Hour).Unix())),
+			// Same as "rfc3339_exactly_one_half_life_decay_half" via
+			// the float64 branch.
+			wantAdj:  0.85,
+			wantNote: "float64 path: datetime.fromtimestamp parity",
+		},
+		{
+			name: "invalid_rfc3339_string_treats_age_as_zero",
+			hit:  hitWithTS(1.0, "accepted", "not-a-timestamp"),
+			// time.Parse fails → ageDays stays 0 → decay = 1.0
+			wantAdj:  1.0,
+			wantNote: "Python except (ValueError, TypeError): ageDays unchanged",
+		},
+		{
+			name: "int_timestamp_is_skipped_go_specific_zero_age_masks_divergence",
+			hit:  hitWithTS(1.0, "accepted", int(fixedNow.Unix())),
+			// Same-day int — both Python (coerce → 0d) and Go (skip → 0d)
+			// converge on ageDays=0. Locks the Go contract on this safe path.
+			// See "int_timestamp_90d_diverges_from_python" below for the
+			// case where the divergence becomes observable.
+			wantAdj:  1.0,
+			wantNote: "same-day int: both langs reach age=0",
+		},
+		{
+			name: "int_timestamp_90d_diverges_from_python",
+			hit:  hitWithTS(1.0, "accepted", int(fixedNow.Add(-90*24*time.Hour).Unix())),
+			// **Python ↔ Go DIVERGENCE** (Phase-A documented gap).
+			// Python (searcher.py:L286-292): if ts_str is not str, runs
+			// `datetime.fromtimestamp(float(ts_str), tz=...)` which coerces
+			// int → 90d age → decay 0.5 → adj 0.85.
+			// Go (rerank.go:L49-56): type-switch only matches string and
+			// float64; int falls through to ageDays=0 → decay=1.0 → adj=1.0.
+			// Locked at Go semantics here. In production, JSON-decoded
+			// maps yield float64 (encoding/json), so this divergence is
+			// unreachable on the wire — but raw Go callers passing int
+			// would silently lose recency weighting.
+			// TODO(yg): if a future bit-identity audit insists on parity,
+			// extend rerank.go's type switch to handle int / int64.
+			wantAdj:  1.0,
+			wantNote: "Python would yield 0.85 here (90d coerced); Go locks 1.0",
+		},
+		{
+			name: "rfc3339_with_explicit_offset_zero",
+			hit: hitWithTS(1.0, "accepted",
+				fixedNow.Add(-90*24*time.Hour).Format("2006-01-02T15:04:05-07:00")),
+			// Python uses `ts_str.replace("Z", "+00:00")` then fromisoformat;
+			// Go's time.RFC3339 layout natively accepts both `Z` and
+			// `+00:00`. Same expected adj as the same-90d-`Z` case (0.85).
+			wantAdj:  0.85,
+			wantNote: "RFC3339 `+00:00` suffix variant parity",
+		},
+		{
+			name: "empty_string_timestamp_treats_age_as_zero",
+			hit:  hitWithTS(1.0, "accepted", ""),
+			// Python `if ts_str:` short-circuits on empty (falsy); Go's
+			// type assertion to string of "" succeeds, time.Parse("")
+			// fails → ageDays unchanged = 0. Both paths reach decay=1.0.
+			wantAdj:  1.0,
+			wantNote: "empty string: same outcome via different code paths",
+		},
+		{
+			name: "wrong_type_metadata_treats_age_as_zero",
+			hit:  hitWithTS(1.0, "accepted", []string{"weird"}),
+			// Symmetric with the FilterByTime "wrong_type_ts" case.
+			// Type-switch matches neither string nor float64 → ageDays=0.
+			// Python coerces via `float(["weird"])` → TypeError caught → age=0.
+			wantAdj:  1.0,
+			wantNote: "wrong type: type-switch fall-through symmetry with FilterByTime",
+		},
+		{
+			name: "score_half_with_180d_decay_gates_raw_score_scaling",
+			hit: hitWithTS(0.5, "accepted",
+				fixedNow.Add(-180*24*time.Hour).Format(time.RFC3339)),
+			// ageDays = 180 → decay = 0.25
+			// adj = (0.7×0.5 + 0.3×0.25) × 1.0 = 0.35 + 0.075 = 0.425
+			// Without this case, mutations like `SimilarityWeight*r.Score`
+			// → `*math.Sqrt(r.Score)` are invisible (1^x == 1 makes
+			// score=1.0 cases tautological).
+			wantAdj:  0.425,
+			wantNote: "score≠1.0 + decay≠1.0: gates raw-score multiplication",
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			hits := []domain.SearchHit{tc.hit}
+			policy.ApplyRecencyWeighting(hits, fixedNow)
+			got := hits[0].AdjustedScore
+			if !almostEqual(got, tc.wantAdj) {
+				t.Errorf("AdjustedScore = %v, want %v (%s)", got, tc.wantAdj, tc.wantNote)
+			}
+		})
+	}
+}
+
+// status multiplier — gate every status (4 known + unknown default + empty).
+// Uses Score=0.5 (NOT 1.0) and decay=1.0 so the bracket evaluates to
+//
+//	(0.7×0.5 + 0.3×1.0) = 0.65
+//
+// not 1.0 — preventing a swap mutation like
+// `(RecencyWeight*raw + SimilarityWeight*decay)` from passing
+// coincidentally (which it would if both raw and decay were 1.0).
+// Expected adjusted = 0.65 × statusMul.
+func TestApplyRecencyWeighting_StatusMultiplierAllValues(t *testing.T) {
+	const bracket = 0.65 // 0.7×0.5 + 0.3×1.0
+	cases := []struct {
+		name   string
+		status string
+		want   float64
+	}{
+		{"accepted", "accepted", bracket * 1.0},
+		{"proposed", "proposed", bracket * 0.9},
+		{"superseded", "superseded", bracket * 0.5},
+		{"reverted", "reverted", bracket * 0.3},
+		// unknown status defaults to 1.0 (Python: dict.get(s, 1.0))
+		{"unknown_value", "weird_unknown_value", bracket * 1.0},
+		{"empty_string", "", bracket * 1.0},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			hits := []domain.SearchHit{
+				hitWithTS(0.5, tc.status, fixedNow.Format(time.RFC3339)),
+			}
+			policy.ApplyRecencyWeighting(hits, fixedNow)
+			if !almostEqual(hits[0].AdjustedScore, tc.want) {
+				t.Errorf("status %q: AdjustedScore = %v, want %v",
+					tc.status, hits[0].AdjustedScore, tc.want)
+			}
+		})
+	}
+}
+
+// sort — descending by AdjustedScore. Python: list.sort(key=..., reverse=True).
+//
+// Critical contract: sort key is AdjustedScore, NOT raw Score. The case
+// below picks values so a "sort by raw Score desc" mutation produces a
+// DIFFERENT order than the spec — gates the sort key, not just the
+// sort direction.
+//
+//	old_high_raw    Score=0.99, age=10·90d → decay=2^-10≈0.000977
+//	                adj = (0.7×0.99 + 0.3×0.000977)×1.0 ≈ 0.69329
+//	fresh_med_raw   Score=0.50, age=0      → decay=1.0
+//	                adj = (0.7×0.50 + 0.3×1.0)×1.0 = 0.65
+//	fresh_low_raw   Score=0.30, age=0      → decay=1.0
+//	                adj = (0.7×0.30 + 0.3×1.0)×1.0 = 0.51
+//
+// Sort by raw Score desc would give [old_high_raw, fresh_med_raw,
+// fresh_low_raw]. Sort by adjusted desc gives the SAME order here too.
+// So we add a 4th hit whose adjusted is BETWEEN old_high_raw (0.69329)
+// and fresh_low_raw (0.51) but whose RAW is greater than old_high_raw —
+// impossible because 0.99 is the cap. Use a different lever:
+//
+//	old_top_raw     Score=0.99, age=10·90d → adj ≈ 0.69329 (rank 2)
+//	fresh_top_raw   Score=0.99, age=0      → adj = 0.993    (rank 1)
+//
+// "sort by raw" sees both as 0.99 → tied. Stable sort would preserve
+// input order [old_top_raw first → fresh_top_raw second]. Spec wants
+// fresh_top_raw first. This isolates the sort key from the sort
+// direction.
+//
+// All hits have status="accepted" so status_mul=1.0 doesn't move ranks.
+func TestApplyRecencyWeighting_SortsDescending(t *testing.T) {
+	hits := []domain.SearchHit{
+		// (input pos 0) Old, high raw — would be first if sort-by-raw.
+		// adj = (0.7×0.99 + 0.3×2^-10)×1.0 ≈ 0.6932930...
+		{RecordID: "old_top_raw", Score: 0.99, Status: "accepted",
+			Metadata: map[string]any{"timestamp": fixedNow.Add(-900 * 24 * time.Hour).Format(time.RFC3339)}},
+		// (input pos 1) Fresh, medium raw.
+		// adj = (0.7×0.50 + 0.3×1.0)×1.0 = 0.65
+		{RecordID: "fresh_med_raw", Score: 0.50, Status: "accepted",
+			Metadata: map[string]any{"timestamp": fixedNow.Format(time.RFC3339)}},
+		// (input pos 2) Fresh, identical raw to old_top_raw.
+		// Sort-by-raw ties with old_top_raw; spec wants this FIRST.
+		// adj = (0.7×0.99 + 0.3×1.0)×1.0 = 0.993
+		{RecordID: "fresh_top_raw", Score: 0.99, Status: "accepted",
+			Metadata: map[string]any{"timestamp": fixedNow.Format(time.RFC3339)}},
+	}
+	policy.ApplyRecencyWeighting(hits, fixedNow)
+
+	// Expected by adjusted desc: fresh_top_raw (0.993) > old_top_raw
+	// (~0.6933) > fresh_med_raw (0.65). A "sort by raw" mutation would
+	// instead tie fresh_top_raw and old_top_raw at 0.99 and (with stable
+	// sort) leave them in input order: [old_top_raw, fresh_top_raw, ...].
+	wantOrder := []string{"fresh_top_raw", "old_top_raw", "fresh_med_raw"}
+	for i, want := range wantOrder {
+		if hits[i].RecordID != want {
+			t.Errorf("position %d: got %q, want %q (full order: %v)",
+				i, hits[i].RecordID, want, recordIDs(hits))
+		}
+	}
+}
+
+// stable sort — when two hits have identical adjusted_score, their
+// relative input order must survive. Python's list.sort is stable;
+// Go uses sort.SliceStable. Without this test, a future maintainer
+// could swap to sort.Slice (unstable) and silently re-order ties.
+func TestApplyRecencyWeighting_StableSortPreservesInputOrderOnTies(t *testing.T) {
+	mkSame := func(id string) domain.SearchHit {
+		return domain.SearchHit{
+			RecordID: id, Score: 0.5, Status: "accepted",
+			Metadata: map[string]any{"timestamp": fixedNow.Format(time.RFC3339)},
+		}
+	}
+	// All three hits produce identical adjusted_score = 0.65.
+	hits := []domain.SearchHit{mkSame("first"), mkSame("second"), mkSame("third")}
+	policy.ApplyRecencyWeighting(hits, fixedNow)
+
+	for i, want := range []string{"first", "second", "third"} {
+		if hits[i].RecordID != want {
+			t.Errorf("stable sort lost input order: pos %d got %q, want %q (order: %v)",
+				i, hits[i].RecordID, want, recordIDs(hits))
+		}
+	}
+}
+
+// in-place mutation — ApplyRecencyWeighting mutates the slice it was
+// given AND returns it (Python: searcher.py:L299 results.sort(...);
+// return results — same semantics). Both behaviors are part of the
+// contract; the returned reference must share the backing array.
+//
+// Uses 2 hits with DIFFERENT scores so a "make + copy + sort + return"
+// refactor (which would still produce position-0 alias by accident if
+// ranks happened to land that way) is detected by both element address
+// AND backing-array identity.
+func TestApplyRecencyWeighting_MutatesAndReturnsSameSlice(t *testing.T) {
+	hits := []domain.SearchHit{
+		hitWithTS(0.5, "accepted", fixedNow.Format(time.RFC3339)),
+		hitWithTS(0.9, "accepted", fixedNow.Format(time.RFC3339)),
+	}
+	// Capture address of the underlying array BEFORE the call. After
+	// sort.SliceStable swaps elements in place, &hits[0] still refers
+	// to the same array slot, but the SearchHit there may have moved.
+	// Backing-array identity is what we want to assert.
+	wantBacking := &hits[0]
+
+	got := policy.ApplyRecencyWeighting(hits, fixedNow)
+
+	if &got[0] != wantBacking {
+		t.Errorf("returned slice does not share backing array with input " +
+			"— caller relying on in-place mutation breaks")
+	}
+	if len(got) != len(hits) {
+		t.Errorf("len(got)=%d, len(hits)=%d — copy semantics suspected", len(got), len(hits))
+	}
+	// Expected post-sort order: 0.9 first (adj 0.93), 0.5 second (adj 0.65).
+	if hits[0].Score != 0.9 || hits[1].Score != 0.5 {
+		t.Errorf("input slice not sorted in place: %v", []float64{hits[0].Score, hits[1].Score})
+	}
+	if !almostEqual(hits[0].AdjustedScore, 0.93) {
+		t.Errorf("AdjustedScore at [0] = %v, want 0.93", hits[0].AdjustedScore)
+	}
+}
+
+// FilterByTime — every scope keeps fresh records and drops old ones at
+// the configured cutoff. Boundary precision is asserted in a separate
+// test below.
+func TestFilterByTime_EachScopeCutoffWorks(t *testing.T) {
+	cases := []struct {
+		scope    domain.TimeScope
+		freshAge time.Duration // age that should be kept
+		oldAge   time.Duration // age that should be dropped
+	}{
+		{domain.TimeScopeLastWeek, 1 * 24 * time.Hour, 8 * 24 * time.Hour},
+		{domain.TimeScopeLastMonth, 5 * 24 * time.Hour, 31 * 24 * time.Hour},
+		{domain.TimeScopeLastQuarter, 30 * 24 * time.Hour, 91 * 24 * time.Hour},
+		{domain.TimeScopeLastYear, 100 * 24 * time.Hour, 366 * 24 * time.Hour},
+	}
+
+	for _, tc := range cases {
+		t.Run(string(tc.scope), func(t *testing.T) {
+			hits := []domain.SearchHit{
+				{RecordID: "fresh", Metadata: map[string]any{
+					"timestamp": fixedNow.Add(-tc.freshAge).Format(time.RFC3339)}},
+				{RecordID: "old", Metadata: map[string]any{
+					"timestamp": fixedNow.Add(-tc.oldAge).Format(time.RFC3339)}},
+			}
+			got := policy.FilterByTime(hits, tc.scope, fixedNow)
+			if len(got) != 1 || got[0].RecordID != "fresh" {
+				t.Errorf("%v: got %v, want [fresh]", tc.scope, recordIDs(got))
+			}
+		})
+	}
+}
+
+// AllTime + unknown scope — both must return input unchanged. AllTime
+// is the documented default; unknown is defensive (invalid input
+// shouldn't drop records silently).
+func TestFilterByTime_AllTimeAndUnknownScopeReturnUnchanged(t *testing.T) {
+	hits := []domain.SearchHit{
+		{RecordID: "ancient", Metadata: map[string]any{
+			"timestamp": fixedNow.Add(-10 * 365 * 24 * time.Hour).Format(time.RFC3339)}},
+	}
+
+	gotAllTime := policy.FilterByTime(hits, domain.TimeScopeAllTime, fixedNow)
+	if len(gotAllTime) != 1 {
+		t.Errorf("AllTime: dropped record, got %d hits", len(gotAllTime))
+	}
+
+	gotUnknown := policy.FilterByTime(hits, domain.TimeScope("not_a_scope"), fixedNow)
+	if len(gotUnknown) != 1 {
+		t.Errorf("unknown scope: dropped record, got %d hits", len(gotUnknown))
+	}
+}
+
+// FilterByTime keeps records with no timestamp / invalid timestamp.
+// Python parity (searcher.py:L546-557): the explicit "else: append"
+// branches for both missing-key and parse-failure paths.
+//
+// Rationale: dropping unparseable records would silently lose data on
+// schema drift; keeping them surfaces it via downstream search results.
+func TestFilterByTime_KeepsRecordsWithMissingOrInvalidTimestamp(t *testing.T) {
+	hits := []domain.SearchHit{
+		{RecordID: "no_ts", Metadata: map[string]any{}},
+		{RecordID: "invalid_string_ts", Metadata: map[string]any{
+			"timestamp": "not-a-timestamp"}},
+		{RecordID: "wrong_type_ts", Metadata: map[string]any{
+			"timestamp": []string{"weird"}}},
+	}
+	got := policy.FilterByTime(hits, domain.TimeScopeLastWeek, fixedNow)
+	if len(got) != 3 {
+		t.Errorf("expected all 3 kept (Python parity), got %d (%v)", len(got), recordIDs(got))
+	}
+}
+
+// FilterByTime cutoff boundary — `ts.Before(cutoff)` is strict. A
+// record with ts == cutoff is kept; one nanosecond older is dropped.
+// This is the exact boundary the Python code asserts via `ts >= cutoff`.
+//
+// Uses time.RFC3339Nano on both sides — plain time.RFC3339 truncates
+// sub-second precision (the older record would round-trip as exactly
+// 1 second older, and a regression in the boundary handling at
+// nanosecond precision would slip through silently).
+func TestFilterByTime_CutoffBoundaryIsInclusiveOfCutoffTime(t *testing.T) {
+	// LastWeek scope → cutoff = fixedNow - 7d.
+	cutoff := fixedNow.Add(-7 * 24 * time.Hour)
+	hits := []domain.SearchHit{
+		{RecordID: "at_cutoff", Metadata: map[string]any{
+			"timestamp": cutoff.Format(time.RFC3339Nano)}},
+		{RecordID: "one_ns_older", Metadata: map[string]any{
+			"timestamp": cutoff.Add(-time.Nanosecond).Format(time.RFC3339Nano)}},
+	}
+	got := policy.FilterByTime(hits, domain.TimeScopeLastWeek, fixedNow)
+
+	if len(got) != 1 || got[0].RecordID != "at_cutoff" {
+		t.Errorf("boundary contract: ts==cutoff must be kept, ts<cutoff dropped; got %v",
+			recordIDs(got))
+	}
+}
+
+// FilterByTime — future timestamps are kept. The cutoff branch only
+// drops when ts.Before(cutoff); a future ts is past cutoff in the
+// other direction, so it stays.
+//
+// Uses 30-day-FUTURE timestamps against LastWeek (7d range). A
+// regression that swapped the cutoff comparison to `abs(now-ts) >
+// range → drop` would drop these (|30d| > 7d), so the test fails on
+// that mutation. A 48h-future against LastWeek would be vacuous since
+// |2d| < 7d — kept either way.
+func TestFilterByTime_FutureTimestampIsKept(t *testing.T) {
+	farFuture := 30 * 24 * time.Hour
+	hits := []domain.SearchHit{
+		{RecordID: "future_string", Metadata: map[string]any{
+			"timestamp": fixedNow.Add(farFuture).Format(time.RFC3339)}},
+		{RecordID: "future_float64", Metadata: map[string]any{
+			"timestamp": float64(fixedNow.Add(farFuture).Unix())}},
+	}
+	got := policy.FilterByTime(hits, domain.TimeScopeLastWeek, fixedNow)
+	if len(got) != 2 {
+		t.Errorf("expected both future records kept (|30d-future| > 7d range "+
+			"would still be dropped under abs-distance regression), got %d (%v)",
+			len(got), recordIDs(got))
+	}
+}
+
+// FilterByTime — float64 unix timestamp path also filters correctly.
+// Same scenarios as the RFC3339 case above but routed through the
+// float64 branch.
+func TestFilterByTime_Float64UnixTimestampPath(t *testing.T) {
+	hits := []domain.SearchHit{
+		{RecordID: "fresh", Metadata: map[string]any{
+			"timestamp": float64(fixedNow.Add(-1 * 24 * time.Hour).Unix())}},
+		{RecordID: "old", Metadata: map[string]any{
+			"timestamp": float64(fixedNow.Add(-8 * 24 * time.Hour).Unix())}},
+	}
+	got := policy.FilterByTime(hits, domain.TimeScopeLastWeek, fixedNow)
+	if len(got) != 1 || got[0].RecordID != "fresh" {
+		t.Errorf("float64 ts: got %v, want [fresh]", recordIDs(got))
+	}
+}
+
+// FilterByTime empty input — no panic, returns empty.
+func TestFilterByTime_EmptyInput(t *testing.T) {
+	if got := policy.FilterByTime(nil, domain.TimeScopeLastWeek, fixedNow); len(got) != 0 {
+		t.Errorf("nil input: got %d hits, want 0", len(got))
+	}
+	if got := policy.FilterByTime([]domain.SearchHit{}, domain.TimeScopeLastWeek, fixedNow); len(got) != 0 {
+		t.Errorf("empty input: got %d hits, want 0", len(got))
+	}
+}
+
+// recordIDs — small helper for cleaner failure messages.
+func recordIDs(hits []domain.SearchHit) []string {
+	out := make([]string, len(hits))
+	for i, h := range hits {
+		out[i] = h.RecordID
+	}
+	return out
+}