diff --git a/packages/code-map/__tests__/parse.test.ts b/packages/code-map/__tests__/parse.test.ts index a15d881c0..ce4bc3156 100644 --- a/packages/code-map/__tests__/parse.test.ts +++ b/packages/code-map/__tests__/parse.test.ts @@ -51,6 +51,31 @@ describe('parse module', () => { expect(mockQuery.captures).toHaveBeenCalledWith(mockTree.rootNode) }) + it('should skip parsing source larger than the byte limit', () => { + const mockParser = createMockTreeSitterParser() + const mockLanguageConfig: LanguageConfig = { + extensions: ['.ts'], + wasmFile: 'tree-sitter-typescript.wasm', + queryText: 'mock query', + parser: mockParser, + query: createMockTreeSitterQuery(), + } + + const result = parseTokens( + 'test.ts', + mockLanguageConfig, + () => 'x'.repeat(20), + { maxBytes: 10 }, + ) + + expect(result).toEqual({ + numLines: 0, + identifiers: [], + calls: [], + }) + expect(mockParser.parse).not.toHaveBeenCalled() + }) + it('should handle null file content gracefully', () => { const mockLanguageConfig: LanguageConfig = { extensions: ['.ts'], @@ -594,5 +619,22 @@ console.log('Total:', formatCurrency(total)); expect(typeof result.tokenScores).toBe('object') expect(typeof result.tokenCallers).toBe('object') }) + + it('should continue scoring when a provided reader rejects for one file', async () => { + const result = await getFileTokenScores( + '/tmp/test-project', + ['src/unreadable.ts', 'src/readable.ts'], + async (filePath: string) => { + if (filePath === 'src/unreadable.ts') { + throw new Error('permission denied') + } + + return 'export function readable() { return helper() }\nfunction helper() { return 1 }\n' + }, + ) + + expect(result.tokenScores).toBeDefined() + expect(result.tokenCallers).toBeDefined() + }) }) }) diff --git a/packages/code-map/src/parse.ts b/packages/code-map/src/parse.ts index 09c1866a2..8ddf3337a 100644 --- a/packages/code-map/src/parse.ts +++ b/packages/code-map/src/parse.ts @@ -3,12 +3,51 @@ import * as path from 'path' import { getLanguageConfig } from './languages' -import type { LanguageConfig } from './languages'; +import type { LanguageConfig } from './languages' import type { Parser, Query } from 'web-tree-sitter' export const DEBUG_PARSING = false const IGNORE_TOKENS = ['__init__', '__post_init__', '__call__', 'constructor'] const MAX_CALLERS = 25 +const DEFAULT_MAX_PARSE_FILES = 10_000 +const DEFAULT_MAX_PARSE_FILE_BYTES = 1_000_000 +const DEFAULT_MAX_TOTAL_PARSE_BYTES = 500_000_000 + +const MAX_PARSE_FILES = getPositiveIntegerEnv( + 'CODEBUFF_MAX_PARSE_FILES', + DEFAULT_MAX_PARSE_FILES, +) +const MAX_PARSE_FILE_BYTES = getPositiveIntegerEnv( + 'CODEBUFF_MAX_PARSE_FILE_BYTES', + DEFAULT_MAX_PARSE_FILE_BYTES, +) +const MAX_TOTAL_PARSE_BYTES = getPositiveIntegerEnv( + 'CODEBUFF_MAX_TOTAL_PARSE_BYTES', + DEFAULT_MAX_TOTAL_PARSE_BYTES, +) + +type ParseTokensOptions = { + maxBytes?: number + remainingBytes?: number +} + +type ParsedTokens = { + numLines: number + identifiers: string[] + calls: string[] +} + +type ParsedTokensForScoring = ParsedTokens & { + bytes: number + skipped: boolean +} + +type SourceReader = (filePath: string) => string | null | Promise + +type FileCallData = { + calls: string[] + scores: Record +} export interface TokenCallerMap { [filePath: string]: { @@ -24,110 +63,52 @@ export interface FileTokenData { export async function getFileTokenScores( projectRoot: string, filePaths: string[], - readFile?: (filePath: string) => string | null, + readFile?: SourceReader, ): Promise { const startTime = Date.now() - const tokenScores: { [filePath: string]: { [token: string]: number } } = {} - const externalCalls: { [token: string]: number } = {} + const tokenScores: Record> = {} + const externalCalls: Record = {} const fileCallsMap = new Map() + let parsedFiles = 0 + let totalParsedBytes = 0 - // First pass: collect all identifiers and calls for (const filePath of filePaths) { + if ( + parsedFiles >= MAX_PARSE_FILES || + totalParsedBytes >= MAX_TOTAL_PARSE_BYTES + ) { + break + } + const fullPath = path.join(projectRoot, filePath) const languageConfig = await getLanguageConfig(fullPath) - if (languageConfig) { - let parseResults - if (readFile) { - // When readFile is provided, use relative filePath - parseResults = parseTokens(filePath, languageConfig, readFile) - } else { - // When readFile is not provided, use full path to read from file system - parseResults = parseTokens(fullPath, languageConfig) - } - const { identifiers, calls, numLines } = parseResults - - const tokenScoresForFile: { [token: string]: number } = {} - tokenScores[filePath] = tokenScoresForFile - - const dirs = path.dirname(fullPath).split(path.sep) - const depth = dirs.length - const tokenBaseScore = - 0.8 ** depth * Math.sqrt(numLines / (identifiers.length + 1)) - - // Store defined tokens - for (const identifier of identifiers) { - if (!IGNORE_TOKENS.includes(identifier)) { - tokenScoresForFile[identifier] = tokenBaseScore - } - } + if (!languageConfig) continue - // Store calls for this file - fileCallsMap.set(filePath, calls) + const parsed = await parseTokensForScoring({ + filePath, + fullPath, + languageConfig, + readFile, + remainingBytes: MAX_TOTAL_PARSE_BYTES - totalParsedBytes, + }) + if (parsed.skipped) continue - // Track external calls - for (const call of calls) { - if (!tokenScoresForFile[call]) { - externalCalls[call] = (externalCalls[call] ?? 0) + 1 - } - } - } - } - // Build a map of tokens to their defining files for O(1) lookup - const tokenDefinitionMap = new Map() - const highestScores = new Map() - for (const [filePath, scores] of Object.entries(tokenScores)) { - for (const [token, score] of Object.entries(scores)) { - const currentHighestScore = highestScores.get(token) ?? -Infinity - // Keep the file with the higher score for this token - if (score > currentHighestScore) { - highestScores.set(token, score) - tokenDefinitionMap.set(token, filePath) - } - } - } + parsedFiles++ + totalParsedBytes += parsed.bytes - const tokenCallers: TokenCallerMap = {} + const { scores, calls } = scoreFileTokens(fullPath, parsed) + tokenScores[filePath] = scores + fileCallsMap.set(filePath, calls) - // For each file's calls, add it as a caller to the defining file's tokens - for (const [callingFile, calls] of fileCallsMap.entries()) { for (const call of calls) { - const definingFile = tokenDefinitionMap.get(call) - if (!definingFile || callingFile === definingFile) { - continue - } - - // Skip token names in default objects, e.g. toString, hasOwnProperty - if (call in {}) { - continue - } - - if (!tokenCallers[definingFile]) { - tokenCallers[definingFile] = {} - } - - if (!tokenCallers[definingFile][call]) { - tokenCallers[definingFile][call] = [] - } - const callerFiles = tokenCallers[definingFile][call] - if ( - callerFiles.length < MAX_CALLERS && - !callerFiles.includes(callingFile) - ) { - callerFiles.push(callingFile) + if (!scores[call]) { + externalCalls[call] = (externalCalls[call] ?? 0) + 1 } } } - // Apply call frequency boost to token scores - for (const scores of Object.values(tokenScores)) { - for (const token of Object.keys(scores)) { - const numCalls = externalCalls[token] ?? 0 - if (typeof numCalls !== 'number') continue - scores[token] *= 1 + Math.log(1 + numCalls) - // Round to 3 decimal places - scores[token] = Math.round(scores[token] * 1000) / 1000 - } - } + const tokenCallers = buildTokenCallers(tokenScores, fileCallsMap) + boostScoresByExternalCalls(tokenScores, externalCalls) if (DEBUG_PARSING) { const endTime = Date.now() @@ -155,25 +136,79 @@ export function parseTokens( filePath: string, languageConfig: LanguageConfig, readFile?: (filePath: string) => string | null, -) { + options: ParseTokensOptions = {}, +): ParsedTokens { + const { numLines, identifiers, calls } = parseTokensWithLimits( + filePath, + languageConfig, + readFile, + options, + ) + return { numLines, identifiers, calls } +} + +async function parseTokensForScoring(params: { + filePath: string + fullPath: string + languageConfig: LanguageConfig + readFile?: SourceReader + remainingBytes: number +}): Promise { + const { filePath, fullPath, languageConfig, readFile, remainingBytes } = + params + + if (!readFile) { + return parseTokensWithLimits(fullPath, languageConfig, undefined, { + maxBytes: MAX_PARSE_FILE_BYTES, + remainingBytes, + }) + } + + try { + const source = await readFile(filePath) + return parseTokensWithLimits(filePath, languageConfig, () => source, { + maxBytes: MAX_PARSE_FILE_BYTES, + remainingBytes, + }) + } catch (e) { + if (DEBUG_PARSING) { + console.error(`Error reading source: ${e}`) + console.log(filePath) + } + return emptyParsedTokens(false) + } +} + +function parseTokensWithLimits( + filePath: string, + languageConfig: LanguageConfig, + readFile: ((filePath: string) => string | null) | undefined, + options: ParseTokensOptions, +): ParsedTokensForScoring { const { parser, query } = languageConfig try { - const sourceCode = readFile - ? readFile(filePath) - : fs.readFileSync(filePath, 'utf8') - if (sourceCode === null) { - return { - numLines: 0, - identifiers: [] as string[], - calls: [] as string[], - } + const maxBytes = options.maxBytes ?? MAX_PARSE_FILE_BYTES + const remainingBytes = options.remainingBytes ?? MAX_TOTAL_PARSE_BYTES + if (remainingBytes <= 0) { + return emptyParsedTokens(true) + } + + const source = loadSourceWithinLimits({ + filePath, + readFile, + maxBytes, + remainingBytes, + }) + if (!source) { + return emptyParsedTokens(true) } - const numLines = (sourceCode.match(/\n/g)?.length ?? 0) + 1 + if (!parser || !query) { throw new Error('Parser or query not found') } - const parseResults = parseFile(parser, query, sourceCode) + + const parseResults = parseFile(parser, query, source.code) const identifiers = Array.from(new Set(parseResults.identifier)) const calls = Array.from(new Set(parseResults['call.identifier'])) @@ -184,21 +219,136 @@ export function parseTokens( } return { - numLines, + numLines: countLines(source.code), identifiers: identifiers ?? [], calls: calls ?? [], + bytes: source.bytes, + skipped: false, } } catch (e) { if (DEBUG_PARSING) { console.error(`Error parsing query: ${e}`) console.log(filePath) } + return emptyParsedTokens(false) + } +} + +function loadSourceWithinLimits(params: { + filePath: string + readFile?: (filePath: string) => string | null + maxBytes: number + remainingBytes: number +}): { code: string; bytes: number } | null { + const { filePath, readFile, maxBytes, remainingBytes } = params + + if (!readFile) { + const bytes = fs.statSync(filePath).size + if (bytes > maxBytes || bytes > remainingBytes) return null + return { - numLines: 0, - identifiers: [] as string[], - calls: [] as string[], + code: fs.readFileSync(filePath, 'utf8'), + bytes, + } + } + + const code = readFile(filePath) + if (code === null) return null + + const bytes = Buffer.byteLength(code, 'utf8') + if (bytes > maxBytes || bytes > remainingBytes) return null + + return { code, bytes } +} + +function scoreFileTokens(fullPath: string, parsed: ParsedTokens): FileCallData { + const scores: Record = {} + const dirs = path.dirname(fullPath).split(path.sep) + const depth = dirs.length + const tokenBaseScore = + 0.8 ** depth * Math.sqrt(parsed.numLines / (parsed.identifiers.length + 1)) + + for (const identifier of parsed.identifiers) { + if (!IGNORE_TOKENS.includes(identifier)) { + scores[identifier] = tokenBaseScore } } + + return { scores, calls: parsed.calls } +} + +function buildTokenCallers( + tokenScores: Record>, + fileCallsMap: Map, +): TokenCallerMap { + const tokenDefinitionMap = new Map() + const highestScores = new Map() + + for (const [filePath, scores] of Object.entries(tokenScores)) { + for (const [token, score] of Object.entries(scores)) { + const currentHighestScore = highestScores.get(token) ?? -Infinity + if (score > currentHighestScore) { + highestScores.set(token, score) + tokenDefinitionMap.set(token, filePath) + } + } + } + + const tokenCallers: TokenCallerMap = {} + for (const [callingFile, calls] of fileCallsMap.entries()) { + for (const call of calls) { + const definingFile = tokenDefinitionMap.get(call) + if (!definingFile || callingFile === definingFile || call in {}) { + continue + } + + const callersByToken = (tokenCallers[definingFile] ??= {}) + const callerFiles = (callersByToken[call] ??= []) + if ( + callerFiles.length < MAX_CALLERS && + !callerFiles.includes(callingFile) + ) { + callerFiles.push(callingFile) + } + } + } + + return tokenCallers +} + +function boostScoresByExternalCalls( + tokenScores: Record>, + externalCalls: Record, +): void { + for (const scores of Object.values(tokenScores)) { + for (const token of Object.keys(scores)) { + const numCalls = externalCalls[token] ?? 0 + scores[token] *= 1 + Math.log(1 + numCalls) + scores[token] = Math.round(scores[token] * 1000) / 1000 + } + } +} + +function emptyParsedTokens(skipped: boolean): ParsedTokensForScoring { + return { + numLines: 0, + identifiers: [], + calls: [], + bytes: 0, + skipped, + } +} + +function countLines(sourceCode: string): number { + return (sourceCode.match(/\n/g)?.length ?? 0) + 1 +} + +function getPositiveIntegerEnv(name: string, fallback: number): number { + const raw = process.env[name] + if (!raw) return fallback + + const parsed = Number.parseInt(raw, 10) + return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback } function parseFile( @@ -210,16 +360,20 @@ function parseFile( if (!tree) { return {} } - const captures = query.captures(tree.rootNode) - const result: { [key: string]: string[] } = {} + try { + const captures = query.captures(tree.rootNode) + const result: { [key: string]: string[] } = {} - for (const capture of captures) { - const { name, node } = capture - if (!result[name]) { - result[name] = [] + for (const capture of captures) { + const { name, node } = capture + if (!result[name]) { + result[name] = [] + } + result[name].push(node.text) } - result[name].push(node.text) - } - return result + return result + } finally { + ;(tree as { delete?: () => void }).delete?.() + } } diff --git a/sdk/src/__tests__/initial-session-state.test.ts b/sdk/src/__tests__/initial-session-state.test.ts index e8e1ac5d5..d8e8d2abb 100644 --- a/sdk/src/__tests__/initial-session-state.test.ts +++ b/sdk/src/__tests__/initial-session-state.test.ts @@ -116,6 +116,31 @@ describe('Initial Session State', () => { }) test('discovers project files automatically when projectFiles is undefined', async () => { + mockFs.readdir = (async (dirPath: string) => { + if (dirPath === '/test-project') { + return ['src', '.git', 'knowledge.md', 'README.md', '.gitignore'] + } + if (dirPath === '/test-project/src') { + return ['index.ts', 'utils.ts', 'generated.ts'] + } + return [] + }) as CodebuffFileSystem['readdir'] + mockFs.stat = (async (filePath: string) => + ({ + isDirectory: () => + filePath === '/test-project/src' || filePath === '/test-project/.git', + isFile: () => + filePath !== '/test-project/src' && filePath !== '/test-project/.git', + size: filePath.endsWith('generated.ts') ? 1_000_001 : 100, + }) as MockStatResult & { size: number }) as CodebuffFileSystem['stat'] + + const readFilePaths: string[] = [] + const originalReadFile = mockFs.readFile + mockFs.readFile = (async (filePath: string, encoding?: BufferEncoding) => { + readFilePaths.push(filePath) + return originalReadFile(filePath, encoding) + }) as CodebuffFileSystem['readFile'] + const sessionState = await initialSessionState({ cwd: '/test-project', projectFiles: undefined, @@ -126,6 +151,13 @@ describe('Initial Session State', () => { expect(sessionState.fileContext.fileTree).toBeDefined() expect(sessionState.mainAgentState.agentId).toBe('main-agent') expect(sessionState.mainAgentState.messageHistory).toEqual([]) + expect(readFilePaths.some((p) => p.endsWith('src/index.ts'))).toBe(true) + expect(readFilePaths.some((p) => p.endsWith('src/utils.ts'))).toBe(true) + expect(readFilePaths.some((p) => p.endsWith('src/generated.ts'))).toBe( + false, + ) + expect(readFilePaths.some((p) => p.endsWith('README.md'))).toBe(false) + expect(readFilePaths.some((p) => p.endsWith('knowledge.md'))).toBe(true) }) test('derives knowledgeFiles from projectFiles when not provided', async () => { diff --git a/sdk/src/run-state.ts b/sdk/src/run-state.ts index f2ea5af7a..86f19b838 100644 --- a/sdk/src/run-state.ts +++ b/sdk/src/run-state.ts @@ -53,9 +53,7 @@ export function selectHighestPriorityKnowledgeFile( ): string | undefined { // Loop through priorities and find the first match directly for (const priorityName of KNOWLEDGE_FILE_NAMES_LOWERCASE) { - const match = candidates.find((f) => - f.toLowerCase().endsWith(priorityName), - ) + const match = candidates.find((f) => f.toLowerCase().endsWith(priorityName)) if (match) return match } return undefined @@ -136,26 +134,27 @@ function processCustomToolDefinitions( /** * Computes project file indexes (file tree and token scores) */ -async function computeProjectIndex( - cwd: string, - projectFiles: Record, -): Promise<{ +type ProjectIndexInput = { + cwd: string + fileTree: FileTreeNode[] + filePaths: string[] + readFile?: (filePath: string) => string | null | Promise +} + +const MAX_DISCOVERED_PROJECT_READ_BYTES = 1_000_000 + +async function computeProjectIndex(params: ProjectIndexInput): Promise<{ fileTree: FileTreeNode[] fileTokenScores: Record tokenCallers: Record }> { - const filePaths = Object.keys(projectFiles).sort() - const fileTree = buildFileTree(filePaths) + const { cwd, fileTree, filePaths, readFile } = params let fileTokenScores = {} let tokenCallers = {} if (filePaths.length > 0) { try { - const tokenData = await getFileTokenScores( - cwd, - filePaths, - (filePath: string) => projectFiles[filePath] || null, - ) + const tokenData = await getFileTokenScores(cwd, filePaths, readFile) fileTokenScores = tokenData.tokenScores tokenCallers = tokenData.tokenCallers } catch (error) { @@ -167,6 +166,68 @@ async function computeProjectIndex( return { fileTree, fileTokenScores, tokenCallers } } +function getProjectIndexInput(params: { + cwd: string + fs?: CodebuffFileSystem + logger?: Logger + projectFiles?: Record + discoveredProject?: { fileTree: FileTreeNode[]; filePaths: string[] } +}): ProjectIndexInput | undefined { + const { cwd, fs, logger, projectFiles, discoveredProject } = params + + if (projectFiles) { + const filePaths = Object.keys(projectFiles).sort() + return { + cwd, + fileTree: buildFileTree(filePaths), + filePaths, + readFile: (filePath: string) => projectFiles[filePath] || null, + } + } + + if (discoveredProject) { + if (!fs || !logger) return undefined + + return { + cwd, + fileTree: discoveredProject.fileTree, + filePaths: discoveredProject.filePaths.sort(), + readFile: createDiscoveredProjectReader({ cwd, fs, logger }), + } + } + + return undefined +} + +function createDiscoveredProjectReader(params: { + cwd: string + fs: CodebuffFileSystem + logger: Logger +}): (filePath: string) => Promise { + const { cwd, fs, logger } = params + + return async (filePath: string) => { + const fullPath = path.join(cwd, filePath) + try { + const stats = await fs.stat(fullPath) + if (getFileSize(stats) > MAX_DISCOVERED_PROJECT_READ_BYTES) { + return null + } + return await fs.readFile(fullPath, 'utf8') + } catch (error) { + logger.debug?.( + { filePath, error: getErrorObject(error) }, + 'Failed to read discovered project file for symbol scoring', + ) + return null + } + } +} + +function getFileSize(stats: Awaited>) { + return typeof stats.size === 'number' ? stats.size : 0 +} + /** * Helper to convert ChildProcess to Promise with stdout/stderr */ @@ -261,43 +322,20 @@ async function getGitChanges(params: { } /** - * Discovers project files using .gitignore patterns when projectFiles is undefined + * Discovers project paths using .gitignore patterns when projectFiles is undefined. + * This intentionally does not read every file into memory; large repositories can + * contain generated or binary files that are expensive to retain before parsing. */ -async function discoverProjectFiles(params: { +async function discoverProjectPaths(params: { cwd: string fs: CodebuffFileSystem - logger: Logger -}): Promise> { - const { cwd, fs, logger } = params +}): Promise<{ fileTree: FileTreeNode[]; filePaths: string[] }> { + const { cwd, fs } = params const fileTree = await getProjectFileTree({ projectRoot: cwd, fs }) const filePaths = getAllFilePaths(fileTree) - let error - - // Create projectFiles with empty content - the token scorer will read from disk - const projectFilePromises = Object.fromEntries( - filePaths.map((filePath) => [ - filePath, - fs.readFile(path.join(cwd, filePath), 'utf8').catch((err) => { - error = err - return '[ERROR_READING_FILE]' - }), - ]), - ) - if (error) { - logger.warn( - { error: getErrorObject(error) }, - 'Failed to discover some project files', - ) - } - const projectFilesResolved: Record = {} - for (const [filePath, contentPromise] of Object.entries( - projectFilePromises, - )) { - projectFilesResolved[filePath] = await contentPromise - } - return projectFilesResolved + return { fileTree, filePaths } } /** @@ -322,7 +360,10 @@ export async function loadUserKnowledgeFiles(params: { try { entries = await fs.readdir(homeDir) } catch (error) { - logger.debug?.({ homeDir, error: getErrorObject(error) }, 'Failed to read home directory') + logger.debug?.( + { homeDir, error: getErrorObject(error) }, + 'Failed to read home directory', + ) return userKnowledgeFiles } @@ -351,7 +392,10 @@ export async function loadUserKnowledgeFiles(params: { // Only use the first file found (highest priority) break } catch (error) { - logger.debug?.({ filePath, error: getErrorObject(error) }, 'Failed to read user knowledge file') + logger.debug?.( + { filePath, error: getErrorObject(error) }, + 'Failed to read user knowledge file', + ) } } } @@ -407,6 +451,32 @@ function deriveKnowledgeFiles( return knowledgeFiles } +async function loadKnowledgeFilesFromPaths(params: { + cwd: string + filePaths: string[] + fs: CodebuffFileSystem + logger: Logger +}): Promise> { + const { cwd, filePaths, fs, logger } = params + const selectedFilePaths = selectKnowledgeFilePaths(filePaths) + + const knowledgeFiles: Record = {} + for (const filePath of selectedFilePaths) { + try { + knowledgeFiles[filePath] = await fs.readFile( + path.join(cwd, filePath), + 'utf8', + ) + } catch (error) { + logger.debug?.( + { filePath, error: getErrorObject(error) }, + 'Failed to read project knowledge file', + ) + } + } + return knowledgeFiles +} + export async function initialSessionState( params: InitialSessionStateOptions, ): Promise { @@ -443,12 +513,27 @@ export async function initialSessionState( } } + let discoveredProject: + | { fileTree: FileTreeNode[]; filePaths: string[] } + | undefined + // Auto-discover project files if not provided and cwd is available if (projectFiles === undefined && cwd) { - projectFiles = await discoverProjectFiles({ cwd, fs, logger }) + discoveredProject = await discoverProjectPaths({ cwd, fs }) } if (knowledgeFiles === undefined) { - knowledgeFiles = projectFiles ? deriveKnowledgeFiles(projectFiles) : {} + if (projectFiles) { + knowledgeFiles = deriveKnowledgeFiles(projectFiles) + } else if (cwd && discoveredProject) { + knowledgeFiles = await loadKnowledgeFilesFromPaths({ + cwd, + filePaths: discoveredProject.filePaths, + fs, + logger, + }) + } else { + knowledgeFiles = {} + } } let processedAgentTemplates: Record = {} @@ -461,13 +546,15 @@ export async function initialSessionState( customToolDefinitions, ) - // Generate file tree and token scores from projectFiles if available let fileTree: FileTreeNode[] = [] let fileTokenScores: Record = {} let tokenCallers: Record = {} - if (cwd && projectFiles) { - const result = await computeProjectIndex(cwd, projectFiles) + const projectIndex = cwd + ? getProjectIndexInput({ cwd, fs, logger, projectFiles, discoveredProject }) + : undefined + if (projectIndex) { + const result = await computeProjectIndex(projectIndex) fileTree = result.fileTree fileTokenScores = result.fileTokenScores tokenCallers = result.tokenCallers @@ -491,7 +578,11 @@ export async function initialSessionState( } // Load skills from project and home directories - const skills = await loadSkills({ cwd: cwd ?? process.cwd(), skillsPath: skillsDir, verbose: false }) + const skills = await loadSkills({ + cwd: cwd ?? process.cwd(), + skillsPath: skillsDir, + verbose: false, + }) const initialState = getInitialSessionState({ projectRoot: cwd ?? process.cwd(), @@ -618,11 +709,17 @@ export async function applyOverridesToSessionState( // Apply projectFiles override (recomputes file tree and token scores) if (overrides.projectFiles !== undefined) { if (cwd) { - const { fileTree, fileTokenScores, tokenCallers } = - await computeProjectIndex(cwd, overrides.projectFiles) - sessionState.fileContext.fileTree = fileTree - sessionState.fileContext.fileTokenScores = fileTokenScores - sessionState.fileContext.tokenCallers = tokenCallers + const projectIndex = getProjectIndexInput({ + cwd, + projectFiles: overrides.projectFiles, + }) + if (projectIndex) { + const { fileTree, fileTokenScores, tokenCallers } = + await computeProjectIndex(projectIndex) + sessionState.fileContext.fileTree = fileTree + sessionState.fileContext.fileTokenScores = fileTokenScores + sessionState.fileContext.tokenCallers = tokenCallers + } } else { // If projectFiles are provided but no cwd, reset file context fields sessionState.fileContext.fileTree = []