Skip to content

Commit 8d84282

Browse files
committed
feat: add experiment evaluation framework
Add benchmarking and evaluation infrastructure: - Baseline expansion algorithms (BFS, random, frontier-balanced) - Statistical testing (Wilcoxon, t-test, bootstrap, effect sizes) - IR metrics (NDCG, MAP, MRR, precision/recall@k) - Path planting for ground truth generation - Benchmark dataset loaders and fixtures - Experiment runner with report generation
1 parent 7b2d6d0 commit 8d84282

50 files changed

Lines changed: 10680 additions & 0 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
/**
2+
* Unit tests for expansion baseline algorithms
3+
*/
4+
import {describe, expect, it } from "vitest";
5+
6+
import type { GraphExpander, Neighbor } from "../../interfaces/graph-expander";
7+
import { FrontierBalancedExpansion } from "./frontier-balanced";
8+
import { RandomPriorityExpansion } from "./random-priority";
9+
import { StandardBfsExpansion } from "./standard-bfs";
10+
11+
/**
12+
* Simple mock graph expander for testing
13+
*/
14+
class MockExpander implements GraphExpander<{ id: string }> {
15+
private adjacency = new Map<string, Neighbor[]>();
16+
private degrees = new Map<string, number>();
17+
18+
constructor(edges: Array<[string, string]>, directed = false) {
19+
// Build adjacency list
20+
const allNodes = new Set<string>();
21+
for (const [source, target] of edges) {
22+
allNodes.add(source);
23+
allNodes.add(target);
24+
}
25+
26+
for (const node of allNodes) {
27+
this.adjacency.set(node, []);
28+
}
29+
30+
for (const [source, target] of edges) {
31+
this.adjacency.get(source)!.push({ targetId: target, relationshipType: "edge" });
32+
if (!directed) {
33+
this.adjacency.get(target)!.push({ targetId: source, relationshipType: "edge" });
34+
}
35+
}
36+
37+
// Compute degrees
38+
for (const [nodeId, neighbors] of this.adjacency) {
39+
this.degrees.set(nodeId, neighbors.length);
40+
}
41+
}
42+
43+
async getNeighbors(nodeId: string): Promise<Neighbor[]> {
44+
return this.adjacency.get(nodeId) ?? [];
45+
}
46+
47+
getDegree(nodeId: string): number {
48+
return this.degrees.get(nodeId) ?? 0;
49+
}
50+
51+
async getNode(nodeId: string): Promise<{ id: string } | null> {
52+
return this.adjacency.has(nodeId) ? { id: nodeId } : null;
53+
}
54+
55+
addEdge(): void {
56+
// No-op for tests
57+
}
58+
}
59+
60+
describe("StandardBfsExpansion", () => {
61+
describe("Basic functionality", () => {
62+
it("should find paths between two connected nodes", async () => {
63+
// Simple path: A -- B -- C
64+
const expander = new MockExpander([
65+
["A", "B"],
66+
["B", "C"],
67+
]);
68+
69+
const expansion = new StandardBfsExpansion(expander, ["A", "C"]);
70+
const result = await expansion.run();
71+
72+
expect(result.paths.length).toBeGreaterThan(0);
73+
expect(result.sampledNodes.has("A")).toBe(true);
74+
expect(result.sampledNodes.has("B")).toBe(true);
75+
expect(result.sampledNodes.has("C")).toBe(true);
76+
});
77+
78+
it("should handle single seed (N=1)", async () => {
79+
const expander = new MockExpander([
80+
["A", "B"],
81+
["A", "C"],
82+
["B", "D"],
83+
]);
84+
85+
const expansion = new StandardBfsExpansion(expander, ["A"]);
86+
const result = await expansion.run();
87+
88+
expect(result.paths).toHaveLength(0); // No paths with single seed
89+
expect(result.sampledNodes.size).toBeGreaterThan(0);
90+
expect(result.sampledNodes.has("A")).toBe(true);
91+
});
92+
93+
it("should throw for empty seeds", () => {
94+
const expander = new MockExpander([["A", "B"]]);
95+
96+
expect(() => new StandardBfsExpansion(expander, [])).toThrow(
97+
"At least one seed node is required"
98+
);
99+
});
100+
101+
it("should track expansion statistics", async () => {
102+
const expander = new MockExpander([
103+
["A", "B"],
104+
["B", "C"],
105+
]);
106+
107+
const expansion = new StandardBfsExpansion(expander, ["A", "C"]);
108+
const result = await expansion.run();
109+
110+
expect(result.stats.nodesExpanded).toBeGreaterThan(0);
111+
expect(result.stats.edgesTraversed).toBeGreaterThan(0);
112+
expect(result.stats.iterations).toBeGreaterThan(0);
113+
});
114+
});
115+
116+
describe("Path discovery", () => {
117+
it("should find multiple paths in a graph with alternatives", async () => {
118+
// Diamond graph: A -- B -- D, A -- C -- D
119+
const expander = new MockExpander([
120+
["A", "B"],
121+
["A", "C"],
122+
["B", "D"],
123+
["C", "D"],
124+
]);
125+
126+
const expansion = new StandardBfsExpansion(expander, ["A", "D"]);
127+
const result = await expansion.run();
128+
129+
// Should find at least one path
130+
expect(result.paths.length).toBeGreaterThan(0);
131+
});
132+
133+
it("should handle disconnected seeds", async () => {
134+
// Two disconnected components: A -- B, C -- D
135+
const expander = new MockExpander([
136+
["A", "B"],
137+
["C", "D"],
138+
]);
139+
140+
const expansion = new StandardBfsExpansion(expander, ["A", "D"]);
141+
const result = await expansion.run();
142+
143+
// No paths between disconnected nodes
144+
expect(result.paths).toHaveLength(0);
145+
// But should still sample nodes
146+
expect(result.sampledNodes.size).toBeGreaterThan(0);
147+
});
148+
});
149+
});
150+
151+
describe("FrontierBalancedExpansion", () => {
152+
describe("Basic functionality", () => {
153+
it("should find paths between connected nodes", async () => {
154+
const expander = new MockExpander([
155+
["A", "B"],
156+
["B", "C"],
157+
]);
158+
159+
const expansion = new FrontierBalancedExpansion(expander, ["A", "C"]);
160+
const result = await expansion.run();
161+
162+
expect(result.paths.length).toBeGreaterThan(0);
163+
});
164+
165+
it("should track frontier switches", async () => {
166+
const expander = new MockExpander([
167+
["A", "B"],
168+
["B", "C"],
169+
["C", "D"],
170+
]);
171+
172+
const expansion = new FrontierBalancedExpansion(expander, ["A", "D"]);
173+
const result = await expansion.run();
174+
175+
// Frontier-balanced should switch between frontiers
176+
expect(result.stats.frontierSwitches).toBeGreaterThanOrEqual(0);
177+
});
178+
179+
it("should balance by expanding smaller frontier", async () => {
180+
// Asymmetric graph - one side has more nodes
181+
const expander = new MockExpander([
182+
["A", "B"],
183+
["A", "C"],
184+
["A", "D"],
185+
["D", "E"],
186+
["E", "Z"],
187+
]);
188+
189+
const expansion = new FrontierBalancedExpansion(expander, ["A", "Z"]);
190+
const result = await expansion.run();
191+
192+
// Should complete without error
193+
expect(result.stats.nodesExpanded).toBeGreaterThan(0);
194+
});
195+
});
196+
});
197+
198+
describe("RandomPriorityExpansion", () => {
199+
describe("Basic functionality", () => {
200+
it("should find paths between connected nodes", async () => {
201+
const expander = new MockExpander([
202+
["A", "B"],
203+
["B", "C"],
204+
]);
205+
206+
const expansion = new RandomPriorityExpansion(expander, ["A", "C"], 42);
207+
const result = await expansion.run();
208+
209+
expect(result.paths.length).toBeGreaterThan(0);
210+
});
211+
212+
it("should produce reproducible results with same seed", async () => {
213+
const expander1 = new MockExpander([
214+
["A", "B"],
215+
["B", "C"],
216+
["A", "D"],
217+
["D", "C"],
218+
]);
219+
const expander2 = new MockExpander([
220+
["A", "B"],
221+
["B", "C"],
222+
["A", "D"],
223+
["D", "C"],
224+
]);
225+
226+
const expansion1 = new RandomPriorityExpansion(expander1, ["A", "C"], 42);
227+
const expansion2 = new RandomPriorityExpansion(expander2, ["A", "C"], 42);
228+
229+
const result1 = await expansion1.run();
230+
const result2 = await expansion2.run();
231+
232+
// Same seed should produce same iteration count
233+
expect(result1.stats.iterations).toBe(result2.stats.iterations);
234+
});
235+
236+
it("should produce different results with different seeds", async () => {
237+
// Run multiple times with different seeds on larger graph
238+
const edges: Array<[string, string]> = [];
239+
for (let index = 0; index < 10; index++) {
240+
edges.push([`N${index}`, `N${index + 1}`]);
241+
if (index > 0) edges.push([`N${index}`, `N${index - 1}`]);
242+
}
243+
244+
const results: number[] = [];
245+
for (const seed of [1, 2, 3]) {
246+
const expander = new MockExpander(edges);
247+
const expansion = new RandomPriorityExpansion(expander, ["N0", "N10"], seed);
248+
const result = await expansion.run();
249+
results.push(result.stats.iterations);
250+
}
251+
252+
// At least some variation expected (though not guaranteed)
253+
// This is a probabilistic test
254+
expect(results.length).toBe(3);
255+
});
256+
});
257+
});
258+
259+
describe("Baseline comparison", () => {
260+
it("all methods should find the same paths in a simple graph", async () => {
261+
const edges: Array<[string, string]> = [
262+
["A", "B"],
263+
["B", "C"],
264+
];
265+
266+
const seeds: [string, string] = ["A", "C"];
267+
268+
const bfsResult = await new StandardBfsExpansion(
269+
new MockExpander(edges),
270+
seeds
271+
).run();
272+
273+
const balancedResult = await new FrontierBalancedExpansion(
274+
new MockExpander(edges),
275+
seeds
276+
).run();
277+
278+
const randomResult = await new RandomPriorityExpansion(
279+
new MockExpander(edges),
280+
seeds,
281+
42
282+
).run();
283+
284+
// All should find at least one path
285+
expect(bfsResult.paths.length).toBeGreaterThan(0);
286+
expect(balancedResult.paths.length).toBeGreaterThan(0);
287+
expect(randomResult.paths.length).toBeGreaterThan(0);
288+
289+
// All should sample the same nodes in this simple graph
290+
expect(bfsResult.sampledNodes.size).toBe(balancedResult.sampledNodes.size);
291+
expect(bfsResult.sampledNodes.size).toBe(randomResult.sampledNodes.size);
292+
});
293+
});

0 commit comments

Comments
 (0)