diff --git a/packages/code-map/__tests__/parse.test.ts b/packages/code-map/__tests__/parse.test.ts
index a15d881c0..ce4bc3156 100644
--- a/packages/code-map/__tests__/parse.test.ts
+++ b/packages/code-map/__tests__/parse.test.ts
@@ -51,6 +51,31 @@ describe('parse module', () => {
       expect(mockQuery.captures).toHaveBeenCalledWith(mockTree.rootNode)
     })
 
+    it('should skip parsing source larger than the byte limit', () => {
+      const mockParser = createMockTreeSitterParser()
+      const mockLanguageConfig: LanguageConfig = {
+        extensions: ['.ts'],
+        wasmFile: 'tree-sitter-typescript.wasm',
+        queryText: 'mock query',
+        parser: mockParser,
+        query: createMockTreeSitterQuery(),
+      }
+
+      const result = parseTokens(
+        'test.ts',
+        mockLanguageConfig,
+        () => 'x'.repeat(20),
+        { maxBytes: 10 },
+      )
+
+      expect(result).toEqual({
+        numLines: 0,
+        identifiers: [],
+        calls: [],
+      })
+      expect(mockParser.parse).not.toHaveBeenCalled()
+    })
+
     it('should handle null file content gracefully', () => {
       const mockLanguageConfig: LanguageConfig = {
         extensions: ['.ts'],
@@ -594,5 +619,22 @@ console.log('Total:', formatCurrency(total));
       expect(typeof result.tokenScores).toBe('object')
       expect(typeof result.tokenCallers).toBe('object')
     })
+
+    it('should continue scoring when a provided reader rejects for one file', async () => {
+      const result = await getFileTokenScores(
+        '/tmp/test-project',
+        ['src/unreadable.ts', 'src/readable.ts'],
+        async (filePath: string) => {
+          if (filePath === 'src/unreadable.ts') {
+            throw new Error('permission denied')
+          }
+
+          return 'export function readable() { return helper() }\nfunction helper() { return 1 }\n'
+        },
+      )
+
+      expect(result.tokenScores).toBeDefined()
+      expect(result.tokenCallers).toBeDefined()
+    })
   })
 })
diff --git a/packages/code-map/src/parse.ts b/packages/code-map/src/parse.ts
index 09c1866a2..8ddf3337a 100644
--- a/packages/code-map/src/parse.ts
+++ b/packages/code-map/src/parse.ts
@@ -3,12 +3,51 @@ import * as path from 'path'
 
 import { getLanguageConfig } from './languages'
 
-import type { LanguageConfig } from './languages';
+import type { LanguageConfig } from './languages'
 import type { Parser, Query } from 'web-tree-sitter'
 
 export const DEBUG_PARSING = false
 const IGNORE_TOKENS = ['__init__', '__post_init__', '__call__', 'constructor']
 const MAX_CALLERS = 25
+const DEFAULT_MAX_PARSE_FILES = 10_000
+const DEFAULT_MAX_PARSE_FILE_BYTES = 1_000_000
+const DEFAULT_MAX_TOTAL_PARSE_BYTES = 500_000_000
+
+const MAX_PARSE_FILES = getPositiveIntegerEnv(
+  'CODEBUFF_MAX_PARSE_FILES',
+  DEFAULT_MAX_PARSE_FILES,
+)
+const MAX_PARSE_FILE_BYTES = getPositiveIntegerEnv(
+  'CODEBUFF_MAX_PARSE_FILE_BYTES',
+  DEFAULT_MAX_PARSE_FILE_BYTES,
+)
+const MAX_TOTAL_PARSE_BYTES = getPositiveIntegerEnv(
+  'CODEBUFF_MAX_TOTAL_PARSE_BYTES',
+  DEFAULT_MAX_TOTAL_PARSE_BYTES,
+)
+
+type ParseTokensOptions = {
+  maxBytes?: number
+  remainingBytes?: number
+}
+
+type ParsedTokens = {
+  numLines: number
+  identifiers: string[]
+  calls: string[]
+}
+
+type ParsedTokensForScoring = ParsedTokens & {
+  bytes: number
+  skipped: boolean
+}
+
+type SourceReader = (filePath: string) => string | null | Promise<string | null>
+
+type FileCallData = {
+  calls: string[]
+  scores: Record<string, number>
+}
 
 export interface TokenCallerMap {
   [filePath: string]: {
@@ -24,110 +63,52 @@ export interface FileTokenData {
 export async function getFileTokenScores(
   projectRoot: string,
   filePaths: string[],
-  readFile?: (filePath: string) => string | null,
+  readFile?: SourceReader,
 ): Promise<FileTokenData> {
   const startTime = Date.now()
-  const tokenScores: { [filePath: string]: { [token: string]: number } } = {}
-  const externalCalls: { [token: string]: number } = {}
+  const tokenScores: Record<string, Record<string, number>> = {}
+  const externalCalls: Record<string, number> = {}
   const fileCallsMap = new Map<string, string[]>()
+  let parsedFiles = 0
+  let totalParsedBytes = 0
 
-  // First pass: collect all identifiers and calls
   for (const filePath of filePaths) {
+    if (
+      parsedFiles >= MAX_PARSE_FILES ||
+      totalParsedBytes >= MAX_TOTAL_PARSE_BYTES
+    ) {
+      break
+    }
+
     const fullPath = path.join(projectRoot, filePath)
     const languageConfig = await getLanguageConfig(fullPath)
-    if (languageConfig) {
-      let parseResults
-      if (readFile) {
-        // When readFile is provided, use relative filePath
-        parseResults = parseTokens(filePath, languageConfig, readFile)
-      } else {
-        // When readFile is not provided, use full path to read from file system
-        parseResults = parseTokens(fullPath, languageConfig)
-      }
-      const { identifiers, calls, numLines } = parseResults
-
-      const tokenScoresForFile: { [token: string]: number } = {}
-      tokenScores[filePath] = tokenScoresForFile
-
-      const dirs = path.dirname(fullPath).split(path.sep)
-      const depth = dirs.length
-      const tokenBaseScore =
-        0.8 ** depth * Math.sqrt(numLines / (identifiers.length + 1))
-
-      // Store defined tokens
-      for (const identifier of identifiers) {
-        if (!IGNORE_TOKENS.includes(identifier)) {
-          tokenScoresForFile[identifier] = tokenBaseScore
-        }
-      }
+    if (!languageConfig) continue
 
-      // Store calls for this file
-      fileCallsMap.set(filePath, calls)
+    const parsed = await parseTokensForScoring({
+      filePath,
+      fullPath,
+      languageConfig,
+      readFile,
+      remainingBytes: MAX_TOTAL_PARSE_BYTES - totalParsedBytes,
+    })
+    if (parsed.skipped) continue
 
-      // Track external calls
-      for (const call of calls) {
-        if (!tokenScoresForFile[call]) {
-          externalCalls[call] = (externalCalls[call] ?? 0) + 1
-        }
-      }
-    }
-  }
-  // Build a map of tokens to their defining files for O(1) lookup
-  const tokenDefinitionMap = new Map<string, string>()
-  const highestScores = new Map<string, number>()
-  for (const [filePath, scores] of Object.entries(tokenScores)) {
-    for (const [token, score] of Object.entries(scores)) {
-      const currentHighestScore = highestScores.get(token) ?? -Infinity
-      // Keep the file with the higher score for this token
-      if (score > currentHighestScore) {
-        highestScores.set(token, score)
-        tokenDefinitionMap.set(token, filePath)
-      }
-    }
-  }
+    parsedFiles++
+    totalParsedBytes += parsed.bytes
 
-  const tokenCallers: TokenCallerMap = {}
+    const { scores, calls } = scoreFileTokens(fullPath, parsed)
+    tokenScores[filePath] = scores
+    fileCallsMap.set(filePath, calls)
 
-  // For each file's calls, add it as a caller to the defining file's tokens
-  for (const [callingFile, calls] of fileCallsMap.entries()) {
     for (const call of calls) {
-      const definingFile = tokenDefinitionMap.get(call)
-      if (!definingFile || callingFile === definingFile) {
-        continue
-      }
-
-      // Skip token names in default objects, e.g. toString, hasOwnProperty
-      if (call in {}) {
-        continue
-      }
-
-      if (!tokenCallers[definingFile]) {
-        tokenCallers[definingFile] = {}
-      }
-
-      if (!tokenCallers[definingFile][call]) {
-        tokenCallers[definingFile][call] = []
-      }
-      const callerFiles = tokenCallers[definingFile][call]
-      if (
-        callerFiles.length < MAX_CALLERS &&
-        !callerFiles.includes(callingFile)
-      ) {
-        callerFiles.push(callingFile)
+      if (!scores[call]) {
+        externalCalls[call] = (externalCalls[call] ?? 0) + 1
       }
     }
   }
 
-  // Apply call frequency boost to token scores
-  for (const scores of Object.values(tokenScores)) {
-    for (const token of Object.keys(scores)) {
-      const numCalls = externalCalls[token] ?? 0
-      if (typeof numCalls !== 'number') continue
-      scores[token] *= 1 + Math.log(1 + numCalls)
-      // Round to 3 decimal places
-      scores[token] = Math.round(scores[token] * 1000) / 1000
-    }
-  }
+  const tokenCallers = buildTokenCallers(tokenScores, fileCallsMap)
+  boostScoresByExternalCalls(tokenScores, externalCalls)
 
   if (DEBUG_PARSING) {
     const endTime = Date.now()
@@ -155,25 +136,79 @@ export function parseTokens(
   filePath: string,
   languageConfig: LanguageConfig,
   readFile?: (filePath: string) => string | null,
-) {
+  options: ParseTokensOptions = {},
+): ParsedTokens {
+  const { numLines, identifiers, calls } = parseTokensWithLimits(
+    filePath,
+    languageConfig,
+    readFile,
+    options,
+  )
+  return { numLines, identifiers, calls }
+}
+
+async function parseTokensForScoring(params: {
+  filePath: string
+  fullPath: string
+  languageConfig: LanguageConfig
+  readFile?: SourceReader
+  remainingBytes: number
+}): Promise<ParsedTokensForScoring> {
+  const { filePath, fullPath, languageConfig, readFile, remainingBytes } =
+    params
+
+  if (!readFile) {
+    return parseTokensWithLimits(fullPath, languageConfig, undefined, {
+      maxBytes: MAX_PARSE_FILE_BYTES,
+      remainingBytes,
+    })
+  }
+
+  try {
+    const source = await readFile(filePath)
+    return parseTokensWithLimits(filePath, languageConfig, () => source, {
+      maxBytes: MAX_PARSE_FILE_BYTES,
+      remainingBytes,
+    })
+  } catch (e) {
+    if (DEBUG_PARSING) {
+      console.error(`Error reading source: ${e}`)
+      console.log(filePath)
+    }
+    return emptyParsedTokens(false)
+  }
+}
+
+function parseTokensWithLimits(
+  filePath: string,
+  languageConfig: LanguageConfig,
+  readFile: ((filePath: string) => string | null) | undefined,
+  options: ParseTokensOptions,
+): ParsedTokensForScoring {
   const { parser, query } = languageConfig
 
   try {
-    const sourceCode = readFile
-      ? readFile(filePath)
-      : fs.readFileSync(filePath, 'utf8')
-    if (sourceCode === null) {
-      return {
-        numLines: 0,
-        identifiers: [] as string[],
-        calls: [] as string[],
-      }
+    const maxBytes = options.maxBytes ?? MAX_PARSE_FILE_BYTES
+    const remainingBytes = options.remainingBytes ?? MAX_TOTAL_PARSE_BYTES
+    if (remainingBytes <= 0) {
+      return emptyParsedTokens(true)
+    }
+
+    const source = loadSourceWithinLimits({
+      filePath,
+      readFile,
+      maxBytes,
+      remainingBytes,
+    })
+    if (!source) {
+      return emptyParsedTokens(true)
     }
-    const numLines = (sourceCode.match(/\n/g)?.length ?? 0) + 1
+
     if (!parser || !query) {
       throw new Error('Parser or query not found')
     }
-    const parseResults = parseFile(parser, query, sourceCode)
+
+    const parseResults = parseFile(parser, query, source.code)
     const identifiers = Array.from(new Set(parseResults.identifier))
     const calls = Array.from(new Set(parseResults['call.identifier']))
 
@@ -184,21 +219,136 @@ export function parseTokens(
     }
 
     return {
-      numLines,
+      numLines: countLines(source.code),
       identifiers: identifiers ?? [],
       calls: calls ?? [],
+      bytes: source.bytes,
+      skipped: false,
     }
   } catch (e) {
     if (DEBUG_PARSING) {
       console.error(`Error parsing query: ${e}`)
       console.log(filePath)
     }
+    return emptyParsedTokens(false)
+  }
+}
+
+function loadSourceWithinLimits(params: {
+  filePath: string
+  readFile?: (filePath: string) => string | null
+  maxBytes: number
+  remainingBytes: number
+}): { code: string; bytes: number } | null {
+  const { filePath, readFile, maxBytes, remainingBytes } = params
+
+  if (!readFile) {
+    const bytes = fs.statSync(filePath).size
+    if (bytes > maxBytes || bytes > remainingBytes) return null
+
     return {
-      numLines: 0,
-      identifiers: [] as string[],
-      calls: [] as string[],
+      code: fs.readFileSync(filePath, 'utf8'),
+      bytes,
+    }
+  }
+
+  const code = readFile(filePath)
+  if (code === null) return null
+
+  const bytes = Buffer.byteLength(code, 'utf8')
+  if (bytes > maxBytes || bytes > remainingBytes) return null
+
+  return { code, bytes }
+}
+
+function scoreFileTokens(fullPath: string, parsed: ParsedTokens): FileCallData {
+  const scores: Record<string, number> = {}
+  const dirs = path.dirname(fullPath).split(path.sep)
+  const depth = dirs.length
+  const tokenBaseScore =
+    0.8 ** depth * Math.sqrt(parsed.numLines / (parsed.identifiers.length + 1))
+
+  for (const identifier of parsed.identifiers) {
+    if (!IGNORE_TOKENS.includes(identifier)) {
+      scores[identifier] = tokenBaseScore
     }
   }
+
+  return { scores, calls: parsed.calls }
+}
+
+function buildTokenCallers(
+  tokenScores: Record<string, Record<string, number>>,
+  fileCallsMap: Map<string, string[]>,
+): TokenCallerMap {
+  const tokenDefinitionMap = new Map<string, string>()
+  const highestScores = new Map<string, number>()
+
+  for (const [filePath, scores] of Object.entries(tokenScores)) {
+    for (const [token, score] of Object.entries(scores)) {
+      const currentHighestScore = highestScores.get(token) ?? -Infinity
+      if (score > currentHighestScore) {
+        highestScores.set(token, score)
+        tokenDefinitionMap.set(token, filePath)
+      }
+    }
+  }
+
+  const tokenCallers: TokenCallerMap = {}
+  for (const [callingFile, calls] of fileCallsMap.entries()) {
+    for (const call of calls) {
+      const definingFile = tokenDefinitionMap.get(call)
+      if (!definingFile || callingFile === definingFile || call in {}) {
+        continue
+      }
+
+      const callersByToken = (tokenCallers[definingFile] ??= {})
+      const callerFiles = (callersByToken[call] ??= [])
+      if (
+        callerFiles.length < MAX_CALLERS &&
+        !callerFiles.includes(callingFile)
+      ) {
+        callerFiles.push(callingFile)
+      }
+    }
+  }
+
+  return tokenCallers
+}
+
+function boostScoresByExternalCalls(
+  tokenScores: Record<string, Record<string, number>>,
+  externalCalls: Record<string, number>,
+): void {
+  for (const scores of Object.values(tokenScores)) {
+    for (const token of Object.keys(scores)) {
+      const numCalls = externalCalls[token] ?? 0
+      scores[token] *= 1 + Math.log(1 + numCalls)
+      scores[token] = Math.round(scores[token] * 1000) / 1000
+    }
+  }
+}
+
+function emptyParsedTokens(skipped: boolean): ParsedTokensForScoring {
+  return {
+    numLines: 0,
+    identifiers: [],
+    calls: [],
+    bytes: 0,
+    skipped,
+  }
+}
+
+function countLines(sourceCode: string): number {
+  return (sourceCode.match(/\n/g)?.length ?? 0) + 1
+}
+
+function getPositiveIntegerEnv(name: string, fallback: number): number {
+  const raw = process.env[name]
+  if (!raw) return fallback
+
+  const parsed = Number.parseInt(raw, 10)
+  return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback
 }
 
 function parseFile(
@@ -210,16 +360,20 @@ function parseFile(
   if (!tree) {
     return {}
   }
-  const captures = query.captures(tree.rootNode)
-  const result: { [key: string]: string[] } = {}
+  try {
+    const captures = query.captures(tree.rootNode)
+    const result: { [key: string]: string[] } = {}
 
-  for (const capture of captures) {
-    const { name, node } = capture
-    if (!result[name]) {
-      result[name] = []
+    for (const capture of captures) {
+      const { name, node } = capture
+      if (!result[name]) {
+        result[name] = []
+      }
+      result[name].push(node.text)
     }
-    result[name].push(node.text)
-  }
 
-  return result
+    return result
+  } finally {
+    ;(tree as { delete?: () => void }).delete?.()
+  }
 }
diff --git a/sdk/src/__tests__/initial-session-state.test.ts b/sdk/src/__tests__/initial-session-state.test.ts
index e8e1ac5d5..d8e8d2abb 100644
--- a/sdk/src/__tests__/initial-session-state.test.ts
+++ b/sdk/src/__tests__/initial-session-state.test.ts
@@ -116,6 +116,31 @@ describe('Initial Session State', () => {
   })
 
   test('discovers project files automatically when projectFiles is undefined', async () => {
+    mockFs.readdir = (async (dirPath: string) => {
+      if (dirPath === '/test-project') {
+        return ['src', '.git', 'knowledge.md', 'README.md', '.gitignore']
+      }
+      if (dirPath === '/test-project/src') {
+        return ['index.ts', 'utils.ts', 'generated.ts']
+      }
+      return []
+    }) as CodebuffFileSystem['readdir']
+    mockFs.stat = (async (filePath: string) =>
+      ({
+        isDirectory: () =>
+          filePath === '/test-project/src' || filePath === '/test-project/.git',
+        isFile: () =>
+          filePath !== '/test-project/src' && filePath !== '/test-project/.git',
+        size: filePath.endsWith('generated.ts') ? 1_000_001 : 100,
+      }) as MockStatResult & { size: number }) as CodebuffFileSystem['stat']
+
+    const readFilePaths: string[] = []
+    const originalReadFile = mockFs.readFile
+    mockFs.readFile = (async (filePath: string, encoding?: BufferEncoding) => {
+      readFilePaths.push(filePath)
+      return originalReadFile(filePath, encoding)
+    }) as CodebuffFileSystem['readFile']
+
     const sessionState = await initialSessionState({
       cwd: '/test-project',
       projectFiles: undefined,
@@ -126,6 +151,13 @@ describe('Initial Session State', () => {
     expect(sessionState.fileContext.fileTree).toBeDefined()
     expect(sessionState.mainAgentState.agentId).toBe('main-agent')
     expect(sessionState.mainAgentState.messageHistory).toEqual([])
+    expect(readFilePaths.some((p) => p.endsWith('src/index.ts'))).toBe(true)
+    expect(readFilePaths.some((p) => p.endsWith('src/utils.ts'))).toBe(true)
+    expect(readFilePaths.some((p) => p.endsWith('src/generated.ts'))).toBe(
+      false,
+    )
+    expect(readFilePaths.some((p) => p.endsWith('README.md'))).toBe(false)
+    expect(readFilePaths.some((p) => p.endsWith('knowledge.md'))).toBe(true)
   })
 
   test('derives knowledgeFiles from projectFiles when not provided', async () => {
diff --git a/sdk/src/run-state.ts b/sdk/src/run-state.ts
index f2ea5af7a..86f19b838 100644
--- a/sdk/src/run-state.ts
+++ b/sdk/src/run-state.ts
@@ -53,9 +53,7 @@ export function selectHighestPriorityKnowledgeFile(
 ): string | undefined {
   // Loop through priorities and find the first match directly
   for (const priorityName of KNOWLEDGE_FILE_NAMES_LOWERCASE) {
-    const match = candidates.find((f) =>
-      f.toLowerCase().endsWith(priorityName),
-    )
+    const match = candidates.find((f) => f.toLowerCase().endsWith(priorityName))
     if (match) return match
   }
   return undefined
@@ -136,26 +134,27 @@ function processCustomToolDefinitions(
 /**
  * Computes project file indexes (file tree and token scores)
  */
-async function computeProjectIndex(
-  cwd: string,
-  projectFiles: Record<string, string>,
-): Promise<{
+type ProjectIndexInput = {
+  cwd: string
+  fileTree: FileTreeNode[]
+  filePaths: string[]
+  readFile?: (filePath: string) => string | null | Promise<string | null>
+}
+
+const MAX_DISCOVERED_PROJECT_READ_BYTES = 1_000_000
+
+async function computeProjectIndex(params: ProjectIndexInput): Promise<{
   fileTree: FileTreeNode[]
   fileTokenScores: Record<string, any>
   tokenCallers: Record<string, any>
 }> {
-  const filePaths = Object.keys(projectFiles).sort()
-  const fileTree = buildFileTree(filePaths)
+  const { cwd, fileTree, filePaths, readFile } = params
   let fileTokenScores = {}
   let tokenCallers = {}
 
   if (filePaths.length > 0) {
     try {
-      const tokenData = await getFileTokenScores(
-        cwd,
-        filePaths,
-        (filePath: string) => projectFiles[filePath] || null,
-      )
+      const tokenData = await getFileTokenScores(cwd, filePaths, readFile)
       fileTokenScores = tokenData.tokenScores
       tokenCallers = tokenData.tokenCallers
     } catch (error) {
@@ -167,6 +166,68 @@ async function computeProjectIndex(
   return { fileTree, fileTokenScores, tokenCallers }
 }
 
+function getProjectIndexInput(params: {
+  cwd: string
+  fs?: CodebuffFileSystem
+  logger?: Logger
+  projectFiles?: Record<string, string>
+  discoveredProject?: { fileTree: FileTreeNode[]; filePaths: string[] }
+}): ProjectIndexInput | undefined {
+  const { cwd, fs, logger, projectFiles, discoveredProject } = params
+
+  if (projectFiles) {
+    const filePaths = Object.keys(projectFiles).sort()
+    return {
+      cwd,
+      fileTree: buildFileTree(filePaths),
+      filePaths,
+      readFile: (filePath: string) => projectFiles[filePath] || null,
+    }
+  }
+
+  if (discoveredProject) {
+    if (!fs || !logger) return undefined
+
+    return {
+      cwd,
+      fileTree: discoveredProject.fileTree,
+      filePaths: discoveredProject.filePaths.sort(),
+      readFile: createDiscoveredProjectReader({ cwd, fs, logger }),
+    }
+  }
+
+  return undefined
+}
+
+function createDiscoveredProjectReader(params: {
+  cwd: string
+  fs: CodebuffFileSystem
+  logger: Logger
+}): (filePath: string) => Promise<string | null> {
+  const { cwd, fs, logger } = params
+
+  return async (filePath: string) => {
+    const fullPath = path.join(cwd, filePath)
+    try {
+      const stats = await fs.stat(fullPath)
+      if (getFileSize(stats) > MAX_DISCOVERED_PROJECT_READ_BYTES) {
+        return null
+      }
+      return await fs.readFile(fullPath, 'utf8')
+    } catch (error) {
+      logger.debug?.(
+        { filePath, error: getErrorObject(error) },
+        'Failed to read discovered project file for symbol scoring',
+      )
+      return null
+    }
+  }
+}
+
+function getFileSize(stats: Awaited<ReturnType<CodebuffFileSystem['stat']>>) {
+  return typeof stats.size === 'number' ? stats.size : 0
+}
+
 /**
  * Helper to convert ChildProcess to Promise with stdout/stderr
  */
@@ -261,43 +322,20 @@ async function getGitChanges(params: {
 }
 
 /**
- * Discovers project files using .gitignore patterns when projectFiles is undefined
+ * Discovers project paths using .gitignore patterns when projectFiles is undefined.
+ * This intentionally does not read every file into memory; large repositories can
+ * contain generated or binary files that are expensive to retain before parsing.
  */
-async function discoverProjectFiles(params: {
+async function discoverProjectPaths(params: {
   cwd: string
   fs: CodebuffFileSystem
-  logger: Logger
-}): Promise<Record<string, string>> {
-  const { cwd, fs, logger } = params
+}): Promise<{ fileTree: FileTreeNode[]; filePaths: string[] }> {
+  const { cwd, fs } = params
 
   const fileTree = await getProjectFileTree({ projectRoot: cwd, fs })
   const filePaths = getAllFilePaths(fileTree)
-  let error
-
-  // Create projectFiles with empty content - the token scorer will read from disk
-  const projectFilePromises = Object.fromEntries(
-    filePaths.map((filePath) => [
-      filePath,
-      fs.readFile(path.join(cwd, filePath), 'utf8').catch((err) => {
-        error = err
-        return '[ERROR_READING_FILE]'
-      }),
-    ]),
-  )
-  if (error) {
-    logger.warn(
-      { error: getErrorObject(error) },
-      'Failed to discover some project files',
-    )
-  }
 
-  const projectFilesResolved: Record<string, string> = {}
-  for (const [filePath, contentPromise] of Object.entries(
-    projectFilePromises,
-  )) {
-    projectFilesResolved[filePath] = await contentPromise
-  }
-  return projectFilesResolved
+  return { fileTree, filePaths }
 }
 
 /**
@@ -322,7 +360,10 @@ export async function loadUserKnowledgeFiles(params: {
   try {
     entries = await fs.readdir(homeDir)
   } catch (error) {
-    logger.debug?.({ homeDir, error: getErrorObject(error) }, 'Failed to read home directory')
+    logger.debug?.(
+      { homeDir, error: getErrorObject(error) },
+      'Failed to read home directory',
+    )
     return userKnowledgeFiles
   }
 
@@ -351,7 +392,10 @@ export async function loadUserKnowledgeFiles(params: {
         // Only use the first file found (highest priority)
         break
       } catch (error) {
-        logger.debug?.({ filePath, error: getErrorObject(error) }, 'Failed to read user knowledge file')
+        logger.debug?.(
+          { filePath, error: getErrorObject(error) },
+          'Failed to read user knowledge file',
+        )
       }
     }
   }
@@ -407,6 +451,32 @@ function deriveKnowledgeFiles(
   return knowledgeFiles
 }
 
+async function loadKnowledgeFilesFromPaths(params: {
+  cwd: string
+  filePaths: string[]
+  fs: CodebuffFileSystem
+  logger: Logger
+}): Promise<Record<string, string>> {
+  const { cwd, filePaths, fs, logger } = params
+  const selectedFilePaths = selectKnowledgeFilePaths(filePaths)
+
+  const knowledgeFiles: Record<string, string> = {}
+  for (const filePath of selectedFilePaths) {
+    try {
+      knowledgeFiles[filePath] = await fs.readFile(
+        path.join(cwd, filePath),
+        'utf8',
+      )
+    } catch (error) {
+      logger.debug?.(
+        { filePath, error: getErrorObject(error) },
+        'Failed to read project knowledge file',
+      )
+    }
+  }
+  return knowledgeFiles
+}
+
 export async function initialSessionState(
   params: InitialSessionStateOptions,
 ): Promise<SessionState> {
@@ -443,12 +513,27 @@ export async function initialSessionState(
     }
   }
 
+  let discoveredProject:
+    | { fileTree: FileTreeNode[]; filePaths: string[] }
+    | undefined
+
   // Auto-discover project files if not provided and cwd is available
   if (projectFiles === undefined && cwd) {
-    projectFiles = await discoverProjectFiles({ cwd, fs, logger })
+    discoveredProject = await discoverProjectPaths({ cwd, fs })
   }
   if (knowledgeFiles === undefined) {
-    knowledgeFiles = projectFiles ? deriveKnowledgeFiles(projectFiles) : {}
+    if (projectFiles) {
+      knowledgeFiles = deriveKnowledgeFiles(projectFiles)
+    } else if (cwd && discoveredProject) {
+      knowledgeFiles = await loadKnowledgeFilesFromPaths({
+        cwd,
+        filePaths: discoveredProject.filePaths,
+        fs,
+        logger,
+      })
+    } else {
+      knowledgeFiles = {}
+    }
   }
 
   let processedAgentTemplates: Record<string, any> = {}
@@ -461,13 +546,15 @@ export async function initialSessionState(
     customToolDefinitions,
   )
 
-  // Generate file tree and token scores from projectFiles if available
   let fileTree: FileTreeNode[] = []
   let fileTokenScores: Record<string, any> = {}
   let tokenCallers: Record<string, any> = {}
 
-  if (cwd && projectFiles) {
-    const result = await computeProjectIndex(cwd, projectFiles)
+  const projectIndex = cwd
+    ? getProjectIndexInput({ cwd, fs, logger, projectFiles, discoveredProject })
+    : undefined
+  if (projectIndex) {
+    const result = await computeProjectIndex(projectIndex)
     fileTree = result.fileTree
     fileTokenScores = result.fileTokenScores
     tokenCallers = result.tokenCallers
@@ -491,7 +578,11 @@ export async function initialSessionState(
   }
 
   // Load skills from project and home directories
-  const skills = await loadSkills({ cwd: cwd ?? process.cwd(), skillsPath: skillsDir, verbose: false })
+  const skills = await loadSkills({
+    cwd: cwd ?? process.cwd(),
+    skillsPath: skillsDir,
+    verbose: false,
+  })
 
   const initialState = getInitialSessionState({
     projectRoot: cwd ?? process.cwd(),
@@ -618,11 +709,17 @@ export async function applyOverridesToSessionState(
   // Apply projectFiles override (recomputes file tree and token scores)
   if (overrides.projectFiles !== undefined) {
     if (cwd) {
-      const { fileTree, fileTokenScores, tokenCallers } =
-        await computeProjectIndex(cwd, overrides.projectFiles)
-      sessionState.fileContext.fileTree = fileTree
-      sessionState.fileContext.fileTokenScores = fileTokenScores
-      sessionState.fileContext.tokenCallers = tokenCallers
+      const projectIndex = getProjectIndexInput({
+        cwd,
+        projectFiles: overrides.projectFiles,
+      })
+      if (projectIndex) {
+        const { fileTree, fileTokenScores, tokenCallers } =
+          await computeProjectIndex(projectIndex)
+        sessionState.fileContext.fileTree = fileTree
+        sessionState.fileContext.fileTokenScores = fileTokenScores
+        sessionState.fileContext.tokenCallers = tokenCallers
+      }
     } else {
       // If projectFiles are provided but no cwd, reset file context fields
       sessionState.fileContext.fileTree = []