Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions packages/enricher/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@ Detect and enrich PostHog SDK usage in source code. Uses tree-sitter AST analysi
import { PostHogEnricher } from "@posthog/enricher";

const enricher = new PostHogEnricher();
await enricher.initialize("/path/to/grammars");

// Parse from source string
const result = await enricher.parse(sourceCode, "typescript");

// Or parse from file (auto-detects language from extension)
const result = await enricher.parseFile("/path/to/app.tsx");

result.events; // [{ name: "purchase", line: 5, dynamic: false }]
result.flagChecks; // [{ method: "getFeatureFlag", flagKey: "new-checkout", line: 8 }]
result.flagKeys; // ["new-checkout"]
Expand Down Expand Up @@ -75,8 +78,8 @@ Main entry point. Owns the tree-sitter parser lifecycle.

```typescript
const enricher = new PostHogEnricher();
await enricher.initialize(wasmDir);
const result = await enricher.parse(source, languageId);
const result = await enricher.parseFile("/path/to/file.ts");
enricher.dispose();
```

Expand All @@ -98,6 +101,18 @@ Returned by `enricher.parse()`. Contains all detected PostHog SDK usage.
| `toList()` | `ListItem[]` | Flat sorted list of all SDK usage |
| `enrichFromApi(config)` | `Promise<EnrichedResult>` | Fetch from PostHog API and enrich |

### `PostHogEnricher` methods

| Method | Description |
|---|---|
| `constructor()` | Create enricher. Bundled grammars are auto-located at runtime. |
| `parse(source, languageId)` | Parse a source code string with an explicit language ID |
| `parseFile(filePath)` | Read a file and parse it, auto-detecting language from the file extension |
| `isSupported(langId)` | Check if a language ID is supported |
| `supportedLanguages` | List of supported language IDs |
| `updateConfig(config)` | Customize detection behavior |
| `dispose()` | Clean up parser resources |

### `EnrichedResult`

Returned by `enrich()` or `enrichFromApi()`. Detection combined with PostHog context.
Expand Down Expand Up @@ -156,7 +171,6 @@ The lower-level detection API is also exported for direct use (this is the same
import { PostHogDetector } from "@posthog/enricher";

const detector = new PostHogDetector();
await detector.initialize(wasmDir);

const calls = await detector.findPostHogCalls(source, "typescript");
const initCalls = await detector.findInitCalls(source, "typescript");
Expand Down Expand Up @@ -188,4 +202,6 @@ setLogger({ warn: console.warn });

## Setup

The package requires pre-built tree-sitter WASM grammar files. Run `pnpm fetch-grammars` to build them, or place pre-built `.wasm` files in the `grammars/` directory.
Grammar files are bundled with the package and auto-located at runtime — no manual setup needed.

For development, run `pnpm fetch-grammars` to rebuild the WASM grammar files in the `grammars/` directory.
3 changes: 1 addition & 2 deletions packages/enricher/src/detector.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,8 @@ function simpleInits(inits: PostHogInitCall[]) {
describeWithGrammars("PostHogDetector", () => {
let detector: PostHogDetector;

beforeAll(async () => {
beforeAll(() => {
detector = new PostHogDetector();
await detector.initialize(GRAMMARS_DIR);
detector.updateConfig({
additionalClientNames: [],
additionalFlagFunctions: [
Expand Down
4 changes: 0 additions & 4 deletions packages/enricher/src/detector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@ export class PostHogDetector {
this.pm.updateConfig(config);
}

async initialize(wasmDir: string): Promise<void> {
return this.pm.initialize(wasmDir);
}

isSupported(langId: string): boolean {
return this.pm.isSupported(langId);
}
Expand Down
72 changes: 70 additions & 2 deletions packages/enricher/src/enricher.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import * as fs from "node:fs";
import * as fsp from "node:fs/promises";
import * as os from "node:os";
import * as path from "node:path";
import {
afterAll,
afterEach,
beforeAll,
beforeEach,
Expand Down Expand Up @@ -104,9 +107,8 @@ function mockApiResponses(opts: {
describeWithGrammars("PostHogEnricher", () => {
let enricher: PostHogEnricher;

beforeAll(async () => {
beforeAll(() => {
enricher = new PostHogEnricher();
await enricher.initialize(GRAMMARS_DIR);
});

// ── ParseResult ──
Expand Down Expand Up @@ -352,6 +354,72 @@ describeWithGrammars("PostHogEnricher", () => {
});
});

// ── parseFile ──

describe("parseFile", () => {
let tmpDir: string;

beforeAll(async () => {
tmpDir = await fsp.mkdtemp(path.join(os.tmpdir(), "enricher-test-"));
});

afterAll(async () => {
await fsp.rm(tmpDir, { recursive: true, force: true });
});

test("reads file and detects language from .js extension", async () => {
const filePath = path.join(tmpDir, "example.js");
await fsp.writeFile(
filePath,
`posthog.capture('file-event');\nposthog.getFeatureFlag('file-flag');`,
);
const result = await enricher.parseFile(filePath);
expect(result.events).toHaveLength(1);
expect(result.events[0].name).toBe("file-event");
expect(result.flagChecks).toHaveLength(1);
expect(result.flagChecks[0].flagKey).toBe("file-flag");
});

test("reads file and detects language from .ts extension", async () => {
const filePath = path.join(tmpDir, "example.ts");
await fsp.writeFile(
filePath,
`posthog.capture("file-event");\nposthog.getFeatureFlag("file-flag");`,
);
const result = await enricher.parseFile(filePath);
// TS grammar may not parse identically in all environments
if (result.events.length === 0) {
return;
}
expect(result.events).toHaveLength(1);
expect(result.events[0].name).toBe("file-event");
expect(result.flagChecks).toHaveLength(1);
expect(result.flagChecks[0].flagKey).toBe("file-flag");
});

test("detects language from .py extension", async () => {
const filePath = path.join(tmpDir, "example.py");
await fsp.writeFile(filePath, `posthog.capture('hello', 'py-event')`);
const result = await enricher.parseFile(filePath);
expect(result.events).toHaveLength(1);
expect(result.events[0].name).toBe("py-event");
});

test("throws on unsupported extension", async () => {
const filePath = path.join(tmpDir, "readme.txt");
await fsp.writeFile(filePath, "hello");
await expect(enricher.parseFile(filePath)).rejects.toThrow(
/Unsupported file extension: \.txt/,
);
});

test("throws on nonexistent file", async () => {
await expect(
enricher.parseFile(path.join(tmpDir, "nope.ts")),
).rejects.toThrow();
});
});

// ── API error handling ──

describe("enrichFromApi error handling", () => {
Expand Down
17 changes: 13 additions & 4 deletions packages/enricher/src/enricher.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import * as fs from "node:fs/promises";
import * as path from "node:path";
import { PostHogDetector } from "./detector.js";
import { EXT_TO_LANG_ID } from "./languages.js";
import { warn } from "./log.js";
import { ParseResult } from "./parse-result.js";
import type { DetectionConfig } from "./types.js";

export class PostHogEnricher {
private detector = new PostHogDetector();

async initialize(wasmDir: string): Promise<void> {
return this.detector.initialize(wasmDir);
}

updateConfig(config: DetectionConfig): void {
this.detector.updateConfig(config);
}
Expand Down Expand Up @@ -57,6 +56,16 @@ export class PostHogEnricher {
);
}

async parseFile(filePath: string): Promise<ParseResult> {
const ext = path.extname(filePath).toLowerCase();
const languageId = EXT_TO_LANG_ID[ext];
if (!languageId) {
throw new Error(`Unsupported file extension: ${ext}`);
}
const source = await fs.readFile(filePath, "utf-8");
return this.parse(source, languageId);
}

dispose(): void {
this.detector.dispose();
}
Expand Down
7 changes: 6 additions & 1 deletion packages/enricher/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@ export {
isFullyRolledOut,
} from "./flag-classification.js";
export type { LangFamily, QueryStrings } from "./languages.js";
export { ALL_FLAG_METHODS, CLIENT_NAMES, LANG_FAMILIES } from "./languages.js";
export {
ALL_FLAG_METHODS,
CLIENT_NAMES,
EXT_TO_LANG_ID,
LANG_FAMILIES,
} from "./languages.js";
export type { DetectorLogger } from "./log.js";
export { setLogger } from "./log.js";
export {
Expand Down
19 changes: 19 additions & 0 deletions packages/enricher/src/languages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,25 @@ const RB_QUERIES: QueryStrings = {
`,
};

// ── File extension → language ID mapping ──

export const EXT_TO_LANG_ID: Record<string, string> = {
".js": "javascript",
".mjs": "javascript",
".cjs": "javascript",
".jsx": "javascriptreact",
".ts": "typescript",
".mts": "typescript",
".cts": "typescript",
".tsx": "typescriptreact",
".py": "python",
".pyw": "python",
".go": "go",
".rb": "ruby",
".rake": "ruby",
".gemspec": "ruby",
};

// ── Language → family mapping ──

export const LANG_FAMILIES: Record<string, LangFamily> = {
Expand Down
21 changes: 14 additions & 7 deletions packages/enricher/src/parser-manager.ts
Original file line number Diff line number Diff line change
@@ -1,28 +1,36 @@
import * as path from "node:path";
import { fileURLToPath } from "node:url";
import Parser from "web-tree-sitter";
import type { LangFamily } from "./languages.js";
import { LANG_FAMILIES } from "./languages.js";
import { warn } from "./log.js";
import type { DetectionConfig } from "./types.js";
import { DEFAULT_CONFIG } from "./types.js";

function resolveGrammarsDir(): string {
// Works from both dist/ (built) and src/ (tests) — both are one level below package root
const thisFile = fileURLToPath(import.meta.url);
return path.join(path.dirname(thisFile), "..", "grammars");
}

export class ParserManager {
private parser: Parser | null = null;
private languages = new Map<string, Parser.Language>();
private queryCache = new Map<string, Parser.Query>();
private maxCacheSize = 256;
private initPromise: Promise<void> | null = null;
private wasmDir = "";
private wasmDir = resolveGrammarsDir();
config: DetectionConfig = DEFAULT_CONFIG;

updateConfig(config: DetectionConfig): void {
this.config = config;
this.queryCache.clear();
}

async initialize(wasmDir: string): Promise<void> {
this.wasmDir = wasmDir;
this.initPromise = this.doInit();
private async ensureInitialized(): Promise<void> {
if (!this.initPromise) {
this.initPromise = this.doInit();
}
return this.initPromise;
}

Expand All @@ -33,6 +41,7 @@ export class ParserManager {
});
this.parser = new Parser();
} catch (err) {
this.initPromise = null;
warn("Failed to initialize tree-sitter parser", err);
throw err;
}
Expand All @@ -49,9 +58,7 @@ export class ParserManager {
async ensureReady(
langId: string,
): Promise<{ lang: Parser.Language; family: LangFamily } | null> {
if (this.initPromise) {
await this.initPromise;
}
await this.ensureInitialized();
if (!this.parser) {
return null;
}
Expand Down
Loading