Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions packages/cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
"exports": {
".": "./src/index.ts"
},
"imports": {
"#src/*": "./src/*"
},
"bin": {
"bytebell": "./src/index.ts"
},
Expand Down
16 changes: 2 additions & 14 deletions packages/cli/tsconfig.json
Original file line number Diff line number Diff line change
@@ -1,16 +1,4 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"rootDir": "./src",
"outDir": "./dist",
"jsx": "react-jsx"
},
"include": ["src/**/*"],
"references": [
{ "path": "../config" },
{ "path": "../errors" },
{ "path": "../ingest-github" },
{ "path": "../logger" },
{ "path": "../types" }
]
"extends": "../../../../tsconfig.base.json",
"include": ["src/**/*.ts", "src/**/*.tsx", "src/**/*.json"]
}
12 changes: 10 additions & 2 deletions packages/config/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,17 @@ function seedConfig(value: unknown): BytebellConfig
function __isSeeded(): boolean
class ConfigSeededError extends Error

function setBytebellHomeResolver(fn: (() => string | null) | null): void

function __resetSeedForTests(): void // test-only
function __setBytebellHomeForTests(home: string | null): void // test-only
```

`setBytebellHomeResolver` registers an override function invoked on every
`getBytebellHome()` call (no caching). The resolver returns the home directory
to use for the current invocation, or `null` to fall through to the
`~/.bytebell` default. Pass `null` to clear.

`seedConfig` injects a pre-parsed config object into the in-memory cache,
validated through `configSchema.parse`. When seeded, `loadConfig()` returns
the seeded values and **does not** call `ensureBytebellHome()` or read
Expand Down Expand Up @@ -89,8 +96,9 @@ This package does **not** own:

1. **No env var reads.** Source files contain no `process.env` references.
Enforced at lint time ([eslint.config.mjs:71-94](../../eslint.config.mjs#L71-L94)).
2. **No `.env` / `dotenv` / `BYTEBELL_HOME`.** The only test seam is the
programmatic `__setBytebellHomeForTests`.
2. **No `.env` / `dotenv` / `BYTEBELL_HOME`.** Programmatic override seams
are `__setBytebellHomeForTests` (test-only, static) and
`setBytebellHomeResolver` (per-call function).
3. **Strict schema.** Unknown keys in `config.json` cause `loadConfig()` to
throw — typo defense.
4. **Defaults always present.** `loadConfig()` never returns a partial config;
Expand Down
3 changes: 3 additions & 0 deletions packages/config/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
"exports": {
".": "./src/index.ts"
},
"imports": {
"#src/*": "./src/*"
},
"dependencies": {
"@bb/types": "workspace:*",
"zod": "^4.3.6"
Expand Down
8 changes: 5 additions & 3 deletions packages/config/src/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ package-level contract; this file documents how the source tree is split.
- **[index.ts](index.ts)** — public re-exports. The only entry point other
packages may import. Anything not re-exported here is internal.
- **[paths.ts](paths.ts)** — `getBytebellHome`, `getConfigPath`, and the
cache-invalidator registry. Holds the `testHomeOverride` state used by
`__setBytebellHomeForTests`. Pure: imports nothing from the rest of the
package.
cache-invalidator registry. Holds the `testHomeOverride` slot set by
`__setBytebellHomeForTests` and the `homeResolver` slot set by
`setBytebellHomeResolver`. `getBytebellHome` consults the test override
first, then the resolver (if set and returning non-null), then falls back
to `~/.bytebell`. Pure: imports nothing from the rest of the package.
- **[schema.ts](schema.ts)** — Zod `configSchema`, `BytebellConfig` type,
`ConfigValueMap`, `DEFAULT_CONFIG`, `REQUIRED_KEYS` (infra-always),
`requiredKeysFor(provider)` (combines infra + provider-specific keys
Expand Down
8 changes: 7 additions & 1 deletion packages/config/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,10 @@ export type { ConfigCompletenessResult } from "./loader.ts";

export { setConfigValue, ensureBytebellHome, ConfigSeededError } from "./writer.ts";

export { getBytebellHome, getConfigPath, isDevMode, __setBytebellHomeForTests } from "./paths.ts";
export {
getBytebellHome,
getConfigPath,
isDevMode,
setBytebellHomeResolver,
__setBytebellHomeForTests,
} from "./paths.ts";
18 changes: 18 additions & 0 deletions packages/config/src/paths.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,33 @@ import os from "node:os";
import path from "node:path";

let testHomeOverride: string | null = null;
let homeResolver: (() => string | null) | null = null;
const cacheInvalidators: Array<() => void> = [];

export function getBytebellHome(): string {
if (testHomeOverride !== null) {
return testHomeOverride;
}
if (homeResolver !== null) {
const resolved = homeResolver();
if (resolved !== null) {
return resolved;
}
}
return path.join(os.homedir(), ".bytebell");
}

/**
* Register an override resolver for `getBytebellHome()`. The resolver runs on
* every call (no caching) so it may return different values across invocations.
* Returning `null` falls through to the `~/.bytebell` default. Pass `null` to
* clear the resolver.
*/
export function setBytebellHomeResolver(fn: (() => string | null) | null): void {
homeResolver = fn;
__notifyConfigChanged();
}

export function getConfigPath(): string {
return path.join(getBytebellHome(), "config.json");
}
Expand Down
11 changes: 2 additions & 9 deletions packages/config/tsconfig.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,4 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"rootDir": "./src",
"outDir": "./dist",
"noEmit": false,
"emitDeclarationOnly": true
},
"include": ["src/**/*"],
"references": [{ "path": "../types" }]
"extends": "../../../../tsconfig.base.json",
"include": ["src/**/*.ts", "src/**/*.tsx", "src/**/*.json"]
}
3 changes: 3 additions & 0 deletions packages/errors/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
"exports": {
".": "./src/index.ts"
},
"imports": {
"#src/*": "./src/*"
},
"dependencies": {
"@bb/types": "workspace:*"
}
Expand Down
9 changes: 2 additions & 7 deletions packages/errors/tsconfig.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"rootDir": "./src",
"outDir": "./dist"
},
"include": ["src/**/*"],
"references": [{ "path": "../types" }]
"extends": "../../../../tsconfig.base.json",
"include": ["src/**/*.ts", "src/**/*.tsx", "src/**/*.json"]
}
54 changes: 54 additions & 0 deletions packages/ingest-business-context/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# `@bb/ingest-business-context` — context

## Tier

Domain. Depends on Kernel (`@bb/types`, `@bb/errors`), Infrastructure (`@bb/config`, `@bb/neo4j`),
Cross-cutting (`@bb/llm`), and Strategy (`@bb/queue`). One horizontal Domain→Domain dependency on
`@bb/ingest-github` (read-only path helpers + the on-disk layout it owns). May be imported by
Binaries (`@bb/server` calls `registerBusinessContextWorker()` once at boot). Never by `@bb/cli`.

## Responsibility

Attaches human-authored business-context notes to a specific indexed commit of a GitHub knowledge.
The package consumes `JobType.BusinessContextProcessing` jobs. For each job it:

1. Validates the commit is indexed (Neo4j contains either `:File {knowledgeId}` or
`:FileVersion {knowledgeId, commitHash}`).
2. Reads optional enrichment from disk (`metaRoot/repo-summary.json`, `metaRoot/org/<orgId>/*.json`).
3. Runs one LLM call to generate a concise title, then three parallel LLM calls covering
product fields, technical fields, and the shared overview.
4. Persists the result to disk at
`metaRoot/commits/<commitHash>/business-context/<sanitizedTitle>/{original.txt,analysis.json}`.
5. Projects the analysis into Neo4j as a `:BusinessContext` node plus a `:BusinessContextVersion`
snapshot keyed by `(knowledgeId, commitHash)`. The version node `:DESCRIBES` every
`:FileVersion {knowledgeId, commitHash}` that exists for the same commit; if none exist yet
(BC authored before the commit was snapshot), zero edges are created and a later run will
backfill them via the same idempotent MERGE.
6. Creates `:OrgKeyword` nodes for each array field (10 typed relationship classes such as
`HAS_DOMAIN_KEYWORD`, `HAS_STAKEHOLDER`, `HAS_AFFECTED_MODULE`) connected to the parent
`:BusinessContext` via `:APPEARS_IN_BUSINESS_CONTEXT`.

## Public exports

- `registerBusinessContextWorker(deps?)` — boots the worker. Called by the deployable at startup.
- `executeBusinessContextStrategy(input)` — the disk pipeline (validate → enrichment → title →
analysis → save). Returns the resolved storage paths and the title. Safe to call directly from
HTTP for synchronous flows.
- `storeBusinessContextToNeo4j(input, analysis, sanitizedTitle)` — graph persistence. Separated
so callers can run it inline or defer it.
- `BUSINESS_CONTEXT_FIELD_DEFS` — single source of truth for the 16-field LLM analysis schema.
- Types: `BusinessContextInput`, `BusinessContextAnalysis`, `BusinessContextStorageResult`,
`BusinessContextNeo4jResult`, `BusinessContextAnalysisMetadata`, `CommitNotIndexedError`.

## Invariants

- Single LLM call surface — never bypass `@bb/llm`. Outputs are validated against the field-defs
schema before persistence.
- `:BusinessContext` and `:BusinessContextVersion` are addressed by `(knowledgeId, nodeId)` /
`(knowledgeId, nodeId, commitHash)`; all MERGEs are idempotent and re-runnable.
- `nodeId` is the sanitized title (kebab-case, ≤80 chars). Two BC submissions that LLM-title to the
same string will MERGE onto the same node — by design.
- No outbound calls. No GitHub-API lookups. The strategy never clones or pulls — it operates on
the meta-output already produced by `@bb/ingest-github` for the indexed commit.
- All disk writes scoped under `metaRootFor(knowledgeId)/commits/<commitHash>/business-context/`
via the `@bb/ingest-github` path helpers — this package never invents its own layout.
24 changes: 24 additions & 0 deletions packages/ingest-business-context/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"name": "@bb/ingest-business-context",
"version": "0.0.0",
"private": true,
"type": "module",
"main": "./src/index.ts",
"types": "./src/index.ts",
"exports": {
".": "./src/index.ts"
},
"imports": {
"#src/*": "./src/*"
},
"dependencies": {
"@bb/config": "workspace:*",
"@bb/errors": "workspace:*",
"@bb/ingest-github": "workspace:*",
"@bb/llm": "workspace:*",
"@bb/logger": "workspace:*",
"@bb/neo4j": "workspace:*",
"@bb/queue": "workspace:*",
"@bb/types": "workspace:*"
}
}
37 changes: 37 additions & 0 deletions packages/ingest-business-context/src/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# `@bb/ingest-business-context/src` — implementation map

See [../README.md](../README.md) for the package contract.

## Layout

```
src/
README.md
index.ts Public barrel
field-defs.ts 16-field analysis schema (single source of truth)
types.ts Input / output / metadata interfaces
errors.ts CommitNotIndexedError, BusinessContextAnalysisFailedError

prompt/ System + user prompt builders (title, analysis, user-message)
disk/ Disk persistence (sanitize-title, save-original, save-analysis, load-cached)
llm/ Enrichment-reader, enrichment-format, call-builder, merge, title, analyze-parallel
neo4j/ Indexes, relationship-types, serialize, write-node, write-version, write-keywords
strategy/ commit-validator, execute, store-graph
worker/ handler, register
```

## Import rules

- Cross-folder within the package → `src/folder/file.ts`.
- Sibling within the same folder → `./file.ts`.
- Cross-package → `@bb/foo`.
- **Never** `../` parent traversal.

## Module-graph rules

- `disk/**` depends only on `node:fs`, `@bb/ingest-github` (paths), `@bb/logger`, and `src/types.ts`.
- `llm/**` depends only on `@bb/llm`, `@bb/logger`, `@bb/ingest-github` (paths), and `src/prompt/`, `src/field-defs.ts`, `src/types.ts`.
- `neo4j/**` depends only on `@bb/neo4j`, `@bb/logger`, and `src/types.ts`.
- `strategy/**` depends on `disk/`, `llm/`, `neo4j/`, `src/errors.ts`, `@bb/ingest-github` (paths), `@bb/logger`, `@bb/neo4j`.
- `worker/**` depends on `strategy/`, `@bb/queue`, `@bb/types`, `@bb/config`, `@bb/logger`.
- No layer skips another. The public API (`index.ts`) re-exports from each layer.
17 changes: 17 additions & 0 deletions packages/ingest-business-context/src/disk/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# `disk/` — context

Persists business-context artefacts under
`metaRoot/commits/<commitHash>/business-context/<sanitizedTitle>/`. Paths come
from `@bb/ingest-github`'s `businessContextDir()` — this folder never builds
its own paths.

| File | Responsibility |
| ------------------- | ------------------------------------------------------------------------------- |
| `sanitize-title.ts` | LLM title → kebab-case filesystem-safe slug (≤80 chars). Also the Neo4j nodeId. |
| `save-original.ts` | Writes `original.txt` (raw user-authored text, mode 0600). |
| `save-analysis.ts` | Wraps the analysis in a metadata envelope and writes `analysis.json`. |
| `load-cached.ts` | Reads back a saved envelope; tolerant of missing / malformed files. |

Cache key is the sanitized title alone. Two BC submissions whose LLM titles
sanitise to the same slug share the same cached analysis (intentional — same
idea, same node).
41 changes: 41 additions & 0 deletions packages/ingest-business-context/src/disk/load-cached.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import { readFile } from "node:fs/promises";
import path from "node:path";
import { businessContextDir } from "@bb/ingest-github";
import { logger } from "@bb/logger";
import type { BusinessContextAnalysisMetadata } from "#src/types.ts";

/**
* Returns a previously-saved analysis envelope if one exists for this title,
* otherwise `null`. The cache key is the sanitized title — same title across
* re-runs returns the same envelope and skips a fresh LLM call.
*
* Tolerant of missing or malformed files: the strategy treats `null` as a
* cache miss and proceeds with a full LLM run. We never crash on stale JSON.
*/
export async function loadCachedAnalysis(
knowledgeId: string,
commitHash: string,
sanitizedTitle: string,
): Promise<BusinessContextAnalysisMetadata | null> {
const filePath = path.join(businessContextDir(knowledgeId, commitHash, sanitizedTitle), "analysis.json");
let content: string;
try {
content = await readFile(filePath, "utf-8");
} catch {
return null;
}
try {
const parsed = JSON.parse(content) as BusinessContextAnalysisMetadata;
if (parsed.analysis === undefined || parsed.analysis === null) {
logger.warn(`business-context: cached envelope at ${filePath} has no analysis field; ignoring`);
return null;
}
logger.info(
`business-context: cache HIT at ${filePath} (generated ${parsed.generatedAt}, model ${parsed.modelName})`,
);
return parsed;
} catch (err) {
logger.warn(`business-context: failed to parse cached analysis ${filePath}: ${(err as Error).message}`);
return null;
}
}
25 changes: 25 additions & 0 deletions packages/ingest-business-context/src/disk/sanitize-title.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
const NON_ALNUM_DASH = /[^a-z0-9\s-]/gu;
const WHITESPACE_RUN = /\s+/gu;
const DASH_RUN = /-{2,}/gu;
const LEADING_OR_TRAILING_DASH = /^-|-$/gu;

/**
* Converts an LLM-generated title into a filesystem-safe, URL-safe slug.
*
* Lowercase. Non-alphanumerics collapse to single hyphens. Capped at 80 chars
* so the resulting directory name is comfortably under filesystem limits on
* every platform. Used as both the on-disk directory name and the Neo4j
* `nodeId` — two BC submissions whose LLM titles sanitise to the same slug
* MERGE onto the same `:BusinessContext` node (by design — same idea, same
* node).
*/
export function sanitizeTitle(title: string): string {
return title
.toLowerCase()
.replace(NON_ALNUM_DASH, "")
.replace(WHITESPACE_RUN, "-")
.replace(DASH_RUN, "-")
.replace(LEADING_OR_TRAILING_DASH, "")
.slice(0, 80)
.replace(/-$/u, "");
}
Loading
Loading