From 854e1b5d1b4dc748e1c522dcc6750b8fdcbb5255 Mon Sep 17 00:00:00 2001 From: enitrat Date: Fri, 17 Oct 2025 12:29:16 +0800 Subject: [PATCH] fix: stalled ingesters in non-interactive mode --- ingesters/src/generateEmbeddings.ts | 4 +++- ingesters/src/ingesters/AsciiDocIngester.ts | 2 +- ingesters/src/ingesters/CairoBookIngester.ts | 8 ++++++-- ingesters/src/ingesters/CoreLibDocsIngester.ts | 8 ++++++-- ingesters/src/ingesters/StarknetBlogIngester.ts | 8 ++++++-- ingesters/src/utils/RecursiveMarkdownSplitter.ts | 2 +- 6 files changed, 23 insertions(+), 9 deletions(-) diff --git a/ingesters/src/generateEmbeddings.ts b/ingesters/src/generateEmbeddings.ts index f999cd6..d00d2b7 100644 --- a/ingesters/src/generateEmbeddings.ts +++ b/ingesters/src/generateEmbeddings.ts @@ -184,6 +184,7 @@ async function ingestSource(source: DocumentSource): Promise { * Main function to run the ingestion process */ async function main() { + let errorCode = 0; try { // Prompt user for target const target = await promptForTarget(); @@ -202,11 +203,12 @@ async function main() { logger.info('All specified ingestion processes completed successfully.'); } catch (error) { logger.error('An error occurred during the ingestion process:', error); + errorCode = 1; } finally { // Clean up resources if (vectorStore) { await vectorStore.close(); - process.exit(0); + process.exit(errorCode); } } } diff --git a/ingesters/src/ingesters/AsciiDocIngester.ts b/ingesters/src/ingesters/AsciiDocIngester.ts index 6a142c5..6870a59 100644 --- a/ingesters/src/ingesters/AsciiDocIngester.ts +++ b/ingesters/src/ingesters/AsciiDocIngester.ts @@ -237,7 +237,7 @@ export abstract class AsciiDocIngester extends BaseIngester { sections.forEach((section: ParsedSection, index: number) => { const hash: string = calculateHash(section.content); const sourceLink = `${this.config.baseUrl}/${page.name}${this.config.urlSuffix}${section.anchor ? '#' + section.anchor : ''}`; - console.log( + console.debug( `Section Title: ${section.title}, source: ${this.source}, sourceLink: ${sourceLink}`, ); chunks.push( diff --git a/ingesters/src/ingesters/CairoBookIngester.ts b/ingesters/src/ingesters/CairoBookIngester.ts index 8fbdbb3..e2c8ec3 100644 --- a/ingesters/src/ingesters/CairoBookIngester.ts +++ b/ingesters/src/ingesters/CairoBookIngester.ts @@ -3,6 +3,7 @@ import { MarkdownIngester } from './MarkdownIngester'; import { type BookChunk, DocumentSource } from '../types'; import { Document } from '@langchain/core/documents'; import { VectorStore } from '../db/postgresVectorStore'; +import { type VectorStoreUpdateOptions } from '../utils/vectorStoreUtils'; import { logger } from '../utils/logger'; import * as fs from 'fs/promises'; import * as path from 'path'; @@ -110,7 +111,10 @@ export class CairoBookIngester extends MarkdownIngester { * Core Library specific processing based on the pre-summarized markdown file * @param vectorStore */ - public override async process(vectorStore: VectorStore): Promise { + public override async process( + vectorStore: VectorStore, + options?: VectorStoreUpdateOptions, + ): Promise { try { // 1. Read the pre-summarized documentation const text = await this.readSummaryFile(); @@ -123,7 +127,7 @@ export class CairoBookIngester extends MarkdownIngester { ); // 3. Update the vector store with the chunks - await this.updateVectorStore(vectorStore, chunks); + await this.updateVectorStore(vectorStore, chunks, options); // 4. Clean up any temporary files (no temp files in this case) await this.cleanupDownloadedFiles(); diff --git a/ingesters/src/ingesters/CoreLibDocsIngester.ts b/ingesters/src/ingesters/CoreLibDocsIngester.ts index f155767..18f093d 100644 --- a/ingesters/src/ingesters/CoreLibDocsIngester.ts +++ b/ingesters/src/ingesters/CoreLibDocsIngester.ts @@ -5,6 +5,7 @@ import { MarkdownIngester } from './MarkdownIngester'; import { type BookChunk, DocumentSource } from '../types'; import { Document } from '@langchain/core/documents'; import { VectorStore } from '../db/postgresVectorStore'; +import { type VectorStoreUpdateOptions } from '../utils/vectorStoreUtils'; import { logger } from '../utils/logger'; import { calculateHash } from '../utils/contentUtils'; import { @@ -114,7 +115,10 @@ export class CoreLibDocsIngester extends MarkdownIngester { * Core Library specific processing based on the pre-summarized markdown file * @param vectorStore */ - public override async process(vectorStore: VectorStore): Promise { + public override async process( + vectorStore: VectorStore, + options?: VectorStoreUpdateOptions, + ): Promise { try { // 1. Read the pre-summarized documentation const text = await this.readCorelibSummaryFile(); @@ -127,7 +131,7 @@ export class CoreLibDocsIngester extends MarkdownIngester { ); // 3. Update the vector store with the chunks - await this.updateVectorStore(vectorStore, chunks); + await this.updateVectorStore(vectorStore, chunks, options); // 4. Clean up any temporary files (no temp files in this case) await this.cleanupDownloadedFiles(); diff --git a/ingesters/src/ingesters/StarknetBlogIngester.ts b/ingesters/src/ingesters/StarknetBlogIngester.ts index 4d0a3f6..ff913b5 100644 --- a/ingesters/src/ingesters/StarknetBlogIngester.ts +++ b/ingesters/src/ingesters/StarknetBlogIngester.ts @@ -3,6 +3,7 @@ import { MarkdownIngester } from './MarkdownIngester'; import { type BookChunk, DocumentSource } from '../types'; import { Document } from '@langchain/core/documents'; import { VectorStore } from '../db/postgresVectorStore'; +import { type VectorStoreUpdateOptions } from '../utils/vectorStoreUtils'; import { logger } from '../utils/logger'; import * as fs from 'fs/promises'; import * as path from 'path'; @@ -111,7 +112,10 @@ export class StarknetBlogIngester extends MarkdownIngester { * Starknet Blog specific processing based on the pre-summarized markdown file * @param vectorStore */ - public override async process(vectorStore: VectorStore): Promise { + public override async process( + vectorStore: VectorStore, + options?: VectorStoreUpdateOptions, + ): Promise { try { // 1. Read the pre-summarized documentation const text = await this.readSummaryFile(); @@ -124,7 +128,7 @@ export class StarknetBlogIngester extends MarkdownIngester { ); // 3. Update the vector store with the chunks - await this.updateVectorStore(vectorStore, chunks); + await this.updateVectorStore(vectorStore, chunks, options); // 4. Clean up any temporary files (no temp files in this case) await this.cleanupDownloadedFiles(); diff --git a/ingesters/src/utils/RecursiveMarkdownSplitter.ts b/ingesters/src/utils/RecursiveMarkdownSplitter.ts index 882cec8..02fa592 100644 --- a/ingesters/src/utils/RecursiveMarkdownSplitter.ts +++ b/ingesters/src/utils/RecursiveMarkdownSplitter.ts @@ -950,7 +950,7 @@ export class RecursiveMarkdownSplitter { } } - console.log(`Chunk Title: ${title}, Source link: ${sourceLink}`); + console.debug(`Chunk Title: ${title}, Source link: ${sourceLink}`); chunks.push({ content: rawChunk.content,