Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion ingesters/src/generateEmbeddings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ async function ingestSource(source: DocumentSource): Promise<void> {
* Main function to run the ingestion process
*/
async function main() {
let errorCode = 0;
try {
// Prompt user for target
const target = await promptForTarget();
Expand All @@ -202,11 +203,12 @@ async function main() {
logger.info('All specified ingestion processes completed successfully.');
} catch (error) {
logger.error('An error occurred during the ingestion process:', error);
errorCode = 1;
} finally {
// Clean up resources
if (vectorStore) {
await vectorStore.close();
process.exit(0);
process.exit(errorCode);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion ingesters/src/ingesters/AsciiDocIngester.ts
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ export abstract class AsciiDocIngester extends BaseIngester {
sections.forEach((section: ParsedSection, index: number) => {
const hash: string = calculateHash(section.content);
const sourceLink = `${this.config.baseUrl}/${page.name}${this.config.urlSuffix}${section.anchor ? '#' + section.anchor : ''}`;
console.log(
console.debug(
`Section Title: ${section.title}, source: ${this.source}, sourceLink: ${sourceLink}`,
);
chunks.push(
Expand Down
8 changes: 6 additions & 2 deletions ingesters/src/ingesters/CairoBookIngester.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { MarkdownIngester } from './MarkdownIngester';
import { type BookChunk, DocumentSource } from '../types';
import { Document } from '@langchain/core/documents';
import { VectorStore } from '../db/postgresVectorStore';
import { type VectorStoreUpdateOptions } from '../utils/vectorStoreUtils';
import { logger } from '../utils/logger';
import * as fs from 'fs/promises';
import * as path from 'path';
Expand Down Expand Up @@ -110,7 +111,10 @@ export class CairoBookIngester extends MarkdownIngester {
* Core Library specific processing based on the pre-summarized markdown file
* @param vectorStore
*/
public override async process(vectorStore: VectorStore): Promise<void> {
public override async process(
vectorStore: VectorStore,
options?: VectorStoreUpdateOptions,
): Promise<void> {
try {
// 1. Read the pre-summarized documentation
const text = await this.readSummaryFile();
Expand All @@ -123,7 +127,7 @@ export class CairoBookIngester extends MarkdownIngester {
);

// 3. Update the vector store with the chunks
await this.updateVectorStore(vectorStore, chunks);
await this.updateVectorStore(vectorStore, chunks, options);

// 4. Clean up any temporary files (no temp files in this case)
await this.cleanupDownloadedFiles();
Expand Down
8 changes: 6 additions & 2 deletions ingesters/src/ingesters/CoreLibDocsIngester.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { MarkdownIngester } from './MarkdownIngester';
import { type BookChunk, DocumentSource } from '../types';
import { Document } from '@langchain/core/documents';
import { VectorStore } from '../db/postgresVectorStore';
import { type VectorStoreUpdateOptions } from '../utils/vectorStoreUtils';
import { logger } from '../utils/logger';
import { calculateHash } from '../utils/contentUtils';
import {
Expand Down Expand Up @@ -114,7 +115,10 @@ export class CoreLibDocsIngester extends MarkdownIngester {
* Core Library specific processing based on the pre-summarized markdown file
* @param vectorStore
*/
public override async process(vectorStore: VectorStore): Promise<void> {
public override async process(
vectorStore: VectorStore,
options?: VectorStoreUpdateOptions,
): Promise<void> {
try {
// 1. Read the pre-summarized documentation
const text = await this.readCorelibSummaryFile();
Expand All @@ -127,7 +131,7 @@ export class CoreLibDocsIngester extends MarkdownIngester {
);

// 3. Update the vector store with the chunks
await this.updateVectorStore(vectorStore, chunks);
await this.updateVectorStore(vectorStore, chunks, options);

// 4. Clean up any temporary files (no temp files in this case)
await this.cleanupDownloadedFiles();
Expand Down
8 changes: 6 additions & 2 deletions ingesters/src/ingesters/StarknetBlogIngester.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { MarkdownIngester } from './MarkdownIngester';
import { type BookChunk, DocumentSource } from '../types';
import { Document } from '@langchain/core/documents';
import { VectorStore } from '../db/postgresVectorStore';
import { type VectorStoreUpdateOptions } from '../utils/vectorStoreUtils';
import { logger } from '../utils/logger';
import * as fs from 'fs/promises';
import * as path from 'path';
Expand Down Expand Up @@ -111,7 +112,10 @@ export class StarknetBlogIngester extends MarkdownIngester {
* Starknet Blog specific processing based on the pre-summarized markdown file
* @param vectorStore
*/
public override async process(vectorStore: VectorStore): Promise<void> {
public override async process(
vectorStore: VectorStore,
options?: VectorStoreUpdateOptions,
): Promise<void> {
try {
// 1. Read the pre-summarized documentation
const text = await this.readSummaryFile();
Expand All @@ -124,7 +128,7 @@ export class StarknetBlogIngester extends MarkdownIngester {
);

// 3. Update the vector store with the chunks
await this.updateVectorStore(vectorStore, chunks);
await this.updateVectorStore(vectorStore, chunks, options);

// 4. Clean up any temporary files (no temp files in this case)
await this.cleanupDownloadedFiles();
Expand Down
2 changes: 1 addition & 1 deletion ingesters/src/utils/RecursiveMarkdownSplitter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -950,7 +950,7 @@ export class RecursiveMarkdownSplitter {
}
}

console.log(`Chunk Title: ${title}, Source link: ${sourceLink}`);
console.debug(`Chunk Title: ${title}, Source link: ${sourceLink}`);

chunks.push({
content: rawChunk.content,
Expand Down