Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/config/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,13 @@ export const EMBEDDING_MODELS = {
export const DEFAULT_PROVIDER_MODELS = {
"github-copilot": "text-embedding-3-small",
"openai": "text-embedding-3-small",
"google": "text-embedding-005",
"google": "gemini-embedding-001",
"ollama": "nomic-embed-text",
} as const

export const AUTO_DETECT_PROVIDER_ORDER = [
"github-copilot",
"openai",
"google",
"ollama",
] as const;
6 changes: 5 additions & 1 deletion src/config/schema.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Config schema without zod dependency to avoid version conflicts with OpenCode SDK

import { DEFAULT_INCLUDE, DEFAULT_EXCLUDE, EMBEDDING_MODELS, DEFAULT_PROVIDER_MODELS } from "./constants.js";
import { AUTO_DETECT_PROVIDER_ORDER, DEFAULT_INCLUDE, DEFAULT_EXCLUDE, EMBEDDING_MODELS, DEFAULT_PROVIDER_MODELS } from "./constants.js";
import { substituteEnvString } from "./env-substitution.js";

export type IndexScope = "project" | "global";
Expand Down Expand Up @@ -426,6 +426,10 @@ export type EmbeddingProvider = keyof typeof EMBEDDING_MODELS;

export const availableProviders: EmbeddingProvider[] = Object.keys(EMBEDDING_MODELS) as EmbeddingProvider[]

export const autoDetectProviders: EmbeddingProvider[] = AUTO_DETECT_PROVIDER_ORDER.filter(
(provider): provider is EmbeddingProvider => provider in EMBEDDING_MODELS,
);

export type ProviderModels = {
[P in keyof typeof EMBEDDING_MODELS]: keyof (typeof EMBEDDING_MODELS)[P]
}
Expand Down
6 changes: 3 additions & 3 deletions src/embeddings/detector.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { type EmbeddingProvider, type CustomProviderConfig, type BaseModelInfo, getDefaultModelForProvider, isValidModel, availableProviders, EmbeddingModelName, EMBEDDING_MODELS } from "../config";
import { type EmbeddingProvider, type CustomProviderConfig, type BaseModelInfo, getDefaultModelForProvider, isValidModel, autoDetectProviders, EmbeddingModelName, EMBEDDING_MODELS } from "../config";
import { existsSync, readFileSync } from "fs";
import * as path from "path";
import * as os from "os";
Expand Down Expand Up @@ -91,7 +91,7 @@ export async function detectEmbeddingProvider<P extends EmbeddingProvider>(
}

export async function tryDetectProvider(): Promise<ConfiguredProviderInfo> {
for (const provider of availableProviders) {
for (const provider of autoDetectProviders) {
const credentials = await getProviderCredentials(provider);
if (credentials) {
return {
Expand All @@ -103,7 +103,7 @@ export async function tryDetectProvider(): Promise<ConfiguredProviderInfo> {
}

throw new Error(
`No embedding-capable provider found. Please authenticate with OpenCode using one of: ${availableProviders.join(", ")}.`
`No embedding-capable provider found. Please authenticate with OpenCode using one of: ${autoDetectProviders.join(", ")}.`
);
}

Expand Down
63 changes: 52 additions & 11 deletions src/indexer/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ export interface StatusResult {
currentBranch: string;
baseBranch: string;
compatibility: IndexCompatibility | null;
failedBatchesCount: number;
failedBatchesPath?: string;
}

export interface IndexProgress {
Expand Down Expand Up @@ -1527,22 +1529,37 @@ export class Indexer {
private saveFailedBatches(batches: FailedBatch[]): void {
if (batches.length === 0) {
if (existsSync(this.failedBatchesPath)) {
fsPromises.unlink(this.failedBatchesPath).catch(() => { });
try {
unlinkSync(this.failedBatchesPath);
} catch {
// Ignore cleanup failures; stale diagnostics are best-effort only.
}
}
return;
}
writeFileSync(this.failedBatchesPath, JSON.stringify(batches, null, 2));
}

private addFailedBatch(batch: PendingChunk[], error: string): void {
const existing = this.loadFailedBatches();
existing.push({
chunks: batch,
error,
attemptCount: 1,
lastAttempt: new Date().toISOString(),
});
this.saveFailedBatches(existing);
private collectRetryableFailedChunks(
currentFileHashes: Map<string, string>,
unchangedFilePaths: Set<string>
): PendingChunk[] {
const retryableById = new Map<string, PendingChunk>();

for (const batch of this.loadFailedBatches()) {
for (const chunk of batch.chunks) {
const filePath = chunk.metadata.filePath;
if (!currentFileHashes.has(filePath)) {
continue;
}
if (!unchangedFilePaths.has(filePath)) {
continue;
}
retryableById.set(chunk.id, chunk);
}
}

return Array.from(retryableById.values());
}

private getProviderRateLimits(provider: string): {
Expand Down Expand Up @@ -2087,6 +2104,7 @@ export class Indexer {
skippedFiles: [],
parseFailures: [],
};
const failedBatchesForCurrentRun: FailedBatch[] = [];

onProgress?.({
phase: "scanning",
Expand Down Expand Up @@ -2245,6 +2263,18 @@ export class Indexer {
}
}

const retryableFailedChunks = this.collectRetryableFailedChunks(currentFileHashes, unchangedFilePaths);
if (retryableFailedChunks.length > 0) {
const pendingChunkIds = new Set(pendingChunks.map((chunk) => chunk.id));
for (const chunk of retryableFailedChunks) {
if (!pendingChunkIds.has(chunk.id)) {
pendingChunks.push(chunk);
pendingChunkIds.add(chunk.id);
currentChunkIds.add(chunk.id);
}
}
}

if (chunkDataBatch.length > 0) {
database.upsertChunksBatch(chunkDataBatch);
}
Expand Down Expand Up @@ -2376,6 +2406,7 @@ export class Indexer {
database.addSymbolsToBranchBatch(this.currentBranch, Array.from(allSymbolIds));
this.fileHashCache = currentFileHashes;
this.saveFileHashCache();
this.saveFailedBatches([]);
stats.durationMs = Date.now() - startTime;
onProgress?.({
phase: "complete",
Expand All @@ -2397,6 +2428,7 @@ export class Indexer {
invertedIndex.save();
this.fileHashCache = currentFileHashes;
this.saveFileHashCache();
this.saveFailedBatches([]);
stats.durationMs = Date.now() - startTime;
onProgress?.({
phase: "complete",
Expand Down Expand Up @@ -2530,7 +2562,12 @@ export class Indexer {
});
} catch (error) {
stats.failedChunks += batch.length;
this.addFailedBatch(batch, getErrorMessage(error));
failedBatchesForCurrentRun.push({
chunks: batch,
error: getErrorMessage(error),
attemptCount: 1,
lastAttempt: new Date().toISOString(),
});
this.logger.recordEmbeddingError();
this.logger.embedding("error", `Failed to embed batch after retries`, {
batchSize: batch.length,
Expand All @@ -2541,6 +2578,7 @@ export class Indexer {
}

await queue.onIdle();
this.saveFailedBatches(failedBatchesForCurrentRun);

onProgress?.({
phase: "storing",
Expand Down Expand Up @@ -2962,6 +3000,7 @@ export class Indexer {

async getStatus(): Promise<StatusResult> {
const { store, configuredProviderInfo } = await this.ensureInitialized();
const failedBatchesCount = this.getFailedBatchesCount();

return {
indexed: store.count() > 0,
Expand All @@ -2972,6 +3011,8 @@ export class Indexer {
currentBranch: this.currentBranch,
baseBranch: this.baseBranch,
compatibility: this.indexCompatibility,
failedBatchesCount,
failedBatchesPath: failedBatchesCount > 0 ? this.failedBatchesPath : undefined,
};
}

Expand Down
86 changes: 2 additions & 84 deletions src/mcp-server.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { z } from "zod";

import { Indexer, type IndexStats } from "./indexer/index.js";
import { Indexer } from "./indexer/index.js";
import type { ParsedCodebaseIndexConfig, LogLevel } from "./config/schema.js";
import { formatDefinitionLookup } from "./tools/utils.js";
import { formatDefinitionLookup, formatIndexStats, formatStatus } from "./tools/utils.js";
import { formatCostEstimate } from "./utils/cost.js";
import type { LogEntry } from "./utils/logger.js";

Expand All @@ -18,88 +18,6 @@ function truncateContent(content: string): string {
);
}

function formatIndexStats(stats: IndexStats, verbose: boolean = false): string {
const lines: string[] = [];

if (stats.indexedChunks === 0 && stats.removedChunks === 0) {
lines.push(`Indexed. ${stats.totalFiles} files processed, ${stats.existingChunks} code chunks already up to date.`);
} else if (stats.indexedChunks === 0) {
lines.push(`Indexed. ${stats.totalFiles} files, removed ${stats.removedChunks} stale chunks, ${stats.existingChunks} chunks remain.`);
} else {
let main = `Indexed. ${stats.totalFiles} files processed, ${stats.indexedChunks} new chunks embedded.`;
if (stats.existingChunks > 0) {
main += ` ${stats.existingChunks} unchanged chunks skipped.`;
}
lines.push(main);

if (stats.removedChunks > 0) {
lines.push(`Removed ${stats.removedChunks} stale chunks.`);
}

if (stats.failedChunks > 0) {
lines.push(`Failed: ${stats.failedChunks} chunks.`);
}

lines.push(`Tokens: ${stats.tokensUsed.toLocaleString()}, Duration: ${(stats.durationMs / 1000).toFixed(1)}s`);
}

if (verbose) {
if (stats.skippedFiles.length > 0) {
const tooLarge = stats.skippedFiles.filter(f => f.reason === "too_large");
const excluded = stats.skippedFiles.filter(f => f.reason === "excluded");
const gitignored = stats.skippedFiles.filter(f => f.reason === "gitignore");

lines.push("");
lines.push(`Skipped files: ${stats.skippedFiles.length}`);
if (tooLarge.length > 0) {
lines.push(` Too large (${tooLarge.length}): ${tooLarge.slice(0, 5).map(f => f.path).join(", ")}${tooLarge.length > 5 ? "..." : ""}`);
}
if (excluded.length > 0) {
lines.push(` Excluded (${excluded.length}): ${excluded.slice(0, 5).map(f => f.path).join(", ")}${excluded.length > 5 ? "..." : ""}`);
}
if (gitignored.length > 0) {
lines.push(` Gitignored (${gitignored.length}): ${gitignored.slice(0, 5).map(f => f.path).join(", ")}${gitignored.length > 5 ? "..." : ""}`);
}
}

if (stats.parseFailures.length > 0) {
lines.push("");
lines.push(`Files with no extractable chunks (${stats.parseFailures.length}): ${stats.parseFailures.slice(0, 10).join(", ")}${stats.parseFailures.length > 10 ? "..." : ""}`);
}
}

return lines.join("\n");
}

function formatStatus(status: {
indexed: boolean;
vectorCount: number;
provider: string;
model: string;
indexPath: string;
currentBranch: string;
baseBranch: string;
}): string {
if (!status.indexed) {
return "Codebase is not indexed. Run index_codebase to create an index.";
}

const lines = [
`Index status:`,
` Indexed chunks: ${status.vectorCount.toLocaleString()}`,
` Provider: ${status.provider}`,
` Model: ${status.model}`,
` Location: ${status.indexPath}`,
];

if (status.currentBranch !== "default") {
lines.push(` Current branch: ${status.currentBranch}`);
lines.push(` Base branch: ${status.baseBranch}`);
}

return lines.join("\n");
}

const CHUNK_TYPE_ENUM = [
"function", "class", "method", "interface", "type",
"enum", "struct", "impl", "trait", "module", "other",
Expand Down
29 changes: 29 additions & 0 deletions src/tools/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ function truncateContent(content: string): string {

export function formatIndexStats(stats: IndexStats, verbose: boolean = false): string {
const lines: string[] = [];

if (stats.failedChunks > 0) {
lines.push(`INDEXING WARNING: ${stats.failedChunks} chunks failed to embed.`);
if (stats.failedBatchesPath) {
lines.push(`Inspect failed batches at: ${stats.failedBatchesPath}`);
}
lines.push("");
}

if (stats.indexedChunks === 0 && stats.removedChunks === 0) {
lines.push(`${stats.totalFiles} files processed, ${stats.existingChunks} code chunks already up to date.`);
Expand Down Expand Up @@ -67,6 +75,19 @@ export function formatIndexStats(stats: IndexStats, verbose: boolean = false): s

export function formatStatus(status: StatusResult): string {
if (!status.indexed) {
if (status.failedBatchesCount > 0) {
const lines = [
"Codebase is not indexed. The last indexing run left failed embedding batches.",
"Fix the provider/model configuration, then rerun index_codebase normally to retry the saved failed batches. Use force=true only for a full rebuild or compatibility reset.",
];

if (status.failedBatchesPath) {
lines.push(`Failed batches: ${status.failedBatchesPath}`);
}

return lines.join("\n");
}

return "Codebase is not indexed. Run index_codebase to create an index.";
}

Expand All @@ -82,6 +103,14 @@ export function formatStatus(status: StatusResult): string {
lines.push(`Base branch: ${status.baseBranch}`);
}

if (status.failedBatchesCount > 0) {
lines.push("");
lines.push(`INDEXING WARNING: ${status.failedBatchesCount} failed embedding batch${status.failedBatchesCount === 1 ? " remains" : "es remain"}.`);
if (status.failedBatchesPath) {
lines.push(`Failed batches: ${status.failedBatchesPath}`);
}
}

if (status.compatibility && !status.compatibility.compatible) {
lines.push("");
lines.push(`COMPATIBILITY WARNING: ${status.compatibility.reason}`);
Expand Down
12 changes: 10 additions & 2 deletions tests/config.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {
} from "../src/config/schema.js";
import {
EMBEDDING_MODELS,
AUTO_DETECT_PROVIDER_ORDER,
DEFAULT_PROVIDER_MODELS,
} from "../src/config/constants.js";

Expand Down Expand Up @@ -836,8 +837,8 @@ describe("config schema", () => {
it("should return correct model for google", () => {
const model = getDefaultModelForProvider("google");
expect(model.provider).toBe("google");
expect(model.model).toBe("text-embedding-005");
expect(model.dimensions).toBe(768);
expect(model.model).toBe("gemini-embedding-001");
expect(model.dimensions).toBe(1536);
});

it("should return correct model for ollama", () => {
Expand Down Expand Up @@ -937,5 +938,12 @@ describe("config schema", () => {
const defaultProviders = Object.keys(DEFAULT_PROVIDER_MODELS);
expect(defaultProviders.sort()).toEqual(providers.sort());
});

it("should prefer GitHub Copilot before Google for auto-detection", () => {
expect(AUTO_DETECT_PROVIDER_ORDER[0]).toBe("github-copilot");
expect(AUTO_DETECT_PROVIDER_ORDER.indexOf("google")).toBeGreaterThan(
AUTO_DETECT_PROVIDER_ORDER.indexOf("github-copilot"),
);
});
});
});
Loading
Loading