From 3ff11901302e58af2451bd9d4b6cfc30a7b76841 Mon Sep 17 00:00:00 2001 From: alvinouille Date: Fri, 18 Apr 2025 13:15:09 +0200 Subject: [PATCH 01/10] add postgresql container --- .cursor/rules/imports.mdc | 134 +++++++++++++++---------- .cursor/rules/navigation.mdc | 134 ++++++++++++------------- .cursor/rules/project_instructions.mdc | 74 +++++++------- .gitignore | 2 + docker-compose.yml | 31 +++++- package.json | 4 +- 6 files changed, 214 insertions(+), 165 deletions(-) diff --git a/.cursor/rules/imports.mdc b/.cursor/rules/imports.mdc index 5ca90f1d..4c4fe415 100644 --- a/.cursor/rules/imports.mdc +++ b/.cursor/rules/imports.mdc @@ -8,93 +8,121 @@ globs: *.ts,*.tsx,*.js,*.jsx ### Backend and Agent Libraries - `express`: Web server framework. - - Used in: `packages/backend/src/app.ts` - - Import: `import express from 'express';` + - Used in: `packages/backend/src/routes/cairocoder.ts` + - Import: `import express, { Router } from 'express';` - `cors`: CORS middleware for Express. - - Used in: `packages/backend/src/app.ts` + - Used in: `packages/backend/src/server.ts` - Import: `import cors from 'cors';` - `mongodb`: MongoDB client for database operations. - - Used in: `packages/agents/src/db/` + - Used in: `packages/agents/src/db/vectorStore.ts` - Import: `import { MongoClient } from 'mongodb';` +- `@langchain/core`: LangChain core libraries. + - Used in: `packages/agents/src/core/agentFactory.ts` + - Import: `import { BaseMessage } from '@langchain/core/messages';` + - Import: `import { Embeddings } from '@langchain/core/embeddings';` + - Import: `import { Document } from '@langchain/core/documents';` - `anthropic`: Anthropic Claude API client. - - Used in: `packages/agents/src/lib/` + - Used in: `packages/agents/src/models/` - Import: `import Anthropic from '@anthropic-ai/sdk';` - `openai`: OpenAI API client. - - Used in: `packages/agents/src/lib/` + - Used in: `packages/agents/src/models/` - Import: `import OpenAI from 'openai';` - `@google/generative-ai`: Google AI API client. - - Used in: `packages/agents/src/lib/` + - Used in: `packages/agents/src/models/` - Import: `import { GoogleGenerativeAI } from '@google/generative-ai';` +- `uuid`: For generating unique identifiers. + - Used in: `packages/backend/src/routes/cairocoder.ts` + - Import: `import { v4 as uuidv4 } from 'uuid';` +- `toml`: For parsing TOML configuration files. + - Used in: `packages/agents/src/config/settings.ts` + - Import: `import toml from '@iarna/toml';` -### Frontend Libraries -- `react`: UI library. - - Used in: `packages/ui/components/` - - Import: `import React from 'react';` -- `next`: React framework. - - Used in: `packages/ui/app/` - - Import: `import { useRouter } from 'next/router';` -- `tailwindcss`: CSS framework. - - Used in: `packages/ui/components/` - - Applied via class names. +### Data Ingestion Libraries +- `axios`: HTTP client for downloading documentation. + - Used in: `packages/ingester/src/ingesters/MarkdownIngester.ts` + - Import: `import axios from 'axios';` +- `adm-zip`: For handling ZIP archives. + - Used in: `packages/ingester/src/ingesters/MarkdownIngester.ts` + - Import: `import AdmZip from 'adm-zip';` +- `fs/promises`: Node.js file system promises API. + - Used in: `packages/ingester/src/ingesters/MarkdownIngester.ts` + - Import: `import * as fs from 'fs/promises';` ## Internal Modules ### Agent Modules -- `pipeline`: RAG pipeline components. - - Used in: `packages/agents/src/core/ragAgentFactory.ts` - - Import: `import { QueryProcessor, DocumentRetriever, CodeGenerator } from './pipeline';` +- `core/pipeline`: RAG pipeline components. + - Used in: `packages/agents/src/core/agentFactory.ts` + - Import: `import { RagPipeline } from './pipeline/ragPipeline';` - `config`: Configuration management. - - Used in: `packages/agents/src/` - - Import: `import { config } from './config';` + - Used in: `packages/agents/src/core/agentFactory.ts` + - Import: `import { getAgentConfig } from '../config/agent';` - `db`: Database interaction. - - Used in: `packages/agents/src/core/` - - Import: `import { VectorStore } from './db/vectorStore';` -- `models`: LLM and embedding models interfaces. - - Used in: `packages/agents/src/core/` - - Import: `import { LLMProviderFactory } from './models/llmProviderFactory';` - - Import: `import { EmbeddingProviderFactory } from './models/embeddingProviderFactory';` + - Used in: `packages/agents/src/core/agentFactory.ts` + - Import: `import { VectorStore } from '../db/vectorStore';` +- `types`: Type definitions. + - Used in: `packages/agents/src/core/agentFactory.ts` + - Import: `import { LLMConfig } from '../types';` +- `utils`: Utility functions. + - Used in: `packages/backend/src/app.ts` + - Import: `import { logger } from '@starknet-agent/agents/utils/index';` ### Backend Modules -- `routes`: API routes. - - Used in: `packages/backend/src/app.ts` - - Import: `import { generateRoutes } from './routes/generate';` - - Import: `import { modelsRoutes } from './routes/models';` -- `handlers`: Request handlers. - - Used in: `packages/backend/src/routes/` - - Import: `import { generateHandler } from '../handlers/generateHandler';` +- `routes`: API route definitions. + - Used in: `packages/backend/src/server.ts` + - Import: `import routes from '../routes';` +- `config`: Server configuration. + - Used in: `packages/backend/src/server.ts` + - Import: `import { initializeLLMConfig } from './config/llm';` + - Import: `import { getPort } from '@starknet-agent/agents/config/settings';` +- `cairocoder`: Main endpoint handler. + - Used in: `packages/backend/src/routes/index.ts` + - Import: `import cairocoderRouter from './cairocoder';` ### Ingester Modules -- `baseIngester`: Abstract base class for all ingesters. - - Used in: `packages/ingester/src/ingesters/` +- `BaseIngester`: Abstract base class for all ingesters. + - Used in: `packages/ingester/src/ingesters/MarkdownIngester.ts` - Import: `import { BaseIngester } from '../BaseIngester';` -- `ingesterFactory`: Factory for creating ingesters. - - Used in: `packages/ingester/src/scripts/` - - Import: `import { IngesterFactory } from '../IngesterFactory';` -- `utils`: Utility functions. - - Used in: `packages/ingester/src/` - - Import: `import { downloadFile, extractArchive } from './utils/fileUtils';` - - Import: `import { processContent, splitMarkdown } from './utils/contentUtils';` +- `IngesterFactory`: Factory for creating ingesters. + - Used in: `packages/ingester/src/generateEmbeddings.ts` + - Import: `import { IngesterFactory } from './IngesterFactory';` +- `utils`: Utility functions for ingestion. + - Used in: `packages/ingester/src/ingesters/MarkdownIngester.ts` + - Import: `import { processDocFiles } from '../utils/fileUtils';` + - Import: `import { isInsideCodeBlock, calculateHash } from '../utils/contentUtils';` ## Common Import Patterns ### For Backend API Routes ```typescript -import express from 'express'; -import { generateHandler } from '../handlers/generateHandler'; -import { config } from '../config'; +import express, { Router } from 'express'; +import { AIMessage, HumanMessage, SystemMessage, BaseMessage } from '@langchain/core/messages'; +import { v4 as uuidv4 } from 'uuid'; +import { + getVectorDbConfig, + logger, + RagAgentFactory, + LLMConfig, + VectorStore, +} from '@starknet-agent/agents'; ``` ### For Agent Core ```typescript -import { VectorStore } from './db/vectorStore'; -import { LLMProviderFactory } from './models/llmProviderFactory'; -import { EmbeddingProviderFactory } from './models/embeddingProviderFactory'; +import { BaseMessage } from '@langchain/core/messages'; +import { Embeddings } from '@langchain/core/embeddings'; +import { getAgentConfig } from '../config/agent'; +import { RagPipeline } from './pipeline/ragPipeline'; +import { VectorStore } from '../db/vectorStore'; +import { LLMConfig } from '../types'; ``` ### For Ingesters ```typescript +import * as fs from 'fs/promises'; +import * as path from 'path'; +import { Document } from '@langchain/core/documents'; +import { BookChunk, DocumentSource } from '@starknet-agent/agents/types/index'; import { BaseIngester } from '../BaseIngester'; -import { BookPageDto, ParsedSection, BookChunk } from '../types'; -import { Document } from 'langchain/document'; -import { VectorStore } from '../../agents/src/db/vectorStore'; +import { BookConfig, BookPageDto, ParsedSection } from '../utils/types'; ``` diff --git a/.cursor/rules/navigation.mdc b/.cursor/rules/navigation.mdc index 6164309a..b73994ea 100644 --- a/.cursor/rules/navigation.mdc +++ b/.cursor/rules/navigation.mdc @@ -1,109 +1,105 @@ ---- -description: -globs: ---- -# Navigation Rules for Starknet Agent Codebase +# Navigation Rules for Cairo Coder Codebase ## Project Structure Overview -The Starknet Agent codebase is organized as a monorepo with multiple packages: +The Cairo Coder codebase is organized as a monorepo with multiple packages: -- `packages/agents`: Contains the core agent logic for RAG-based search and response generation -- `packages/backend`: Express-based server handling WebSocket connections and API endpoints -- `packages/ui`: Next.js frontend application -- `packages/ingester`: Tools for ingesting and processing data for the vector database +- `packages/agents`: Contains the core agent logic for RAG-based Cairo code generation +- `packages/backend`: Express-based server handling API endpoints for code generation +- `packages/ingester`: Tools for ingesting and processing Cairo documentation for the vector database - `packages/typescript-config`: Shared TypeScript configuration ## Key Directories and Files ### Agent Logic -- `packages/agents/src/pipeline`: Contains the RAG pipeline implementation +- `packages/agents/src/core/pipeline`: Contains the RAG pipeline implementation + - `ragPipeline.ts`: Orchestrates the entire RAG process + - `queryProcessor.ts`: Processes and reformulates user queries + - `documentRetriever.ts`: Retrieves relevant documents from vector database + - `answerGenerator.ts`: Generates Cairo code based on retrieved documents - `packages/agents/src/core`: Core agent functionality -- `packages/agents/src/config`: Configuration handling + - `agentFactory.ts`: Factory for creating RAG agents - `packages/agents/src/db`: Database interaction logic +- `packages/agents/src/config`: Configuration handling - `packages/agents/src/utils`: Utility functions -- `packages/agents/src/lib`: Shared libraries -- `packages/agents/src/ragAgentFactory.ts`: Factory for creating RAG agents -- `packages/agents/src/suggestionGeneratorAgent.ts`: Agent for generating search suggestions +- `packages/agents/src/types`: TypeScript type definitions ### Backend -- `packages/backend/src/websocket`: WebSocket server implementation -- `packages/backend/src/routes`: API route definitions +- `packages/backend/src/api`: API routes and handlers + - `cairocoder.ts`: Main handler for the Cairo code generation endpoint + - `routes.ts`: API route definitions + - `config.ts`: API configuration - `packages/backend/src/app.ts`: Express application setup - `packages/backend/src/server.ts`: Server initialization - `packages/backend/src/config`: Server configuration -- `packages/backend/src/utils`: Utility functions -### Frontend -- `packages/ui/app`: Next.js app directory with page components -- `packages/ui/components`: Reusable UI components -- `packages/ui/lib`: Frontend utilities and helpers -- `packages/ui/public`: Static assets - -### Data Ingestion -- `packages/ingester/scripts`: Scripts for data ingestion and embedding generation +### Ingestion System +- `packages/ingester/src/`: Source code for the ingester package + - `BaseIngester.ts`: Abstract base class for all ingesters + - `IngesterFactory.ts`: Factory for creating ingesters based on source + - `generateEmbeddings.ts`: Main script for generating embeddings + - `ingesters/`: Source-specific ingester implementations + - `CairoBookIngester.ts`: Ingester for Cairo Book + - `CairoByExampleIngester.ts`: Ingester for Cairo By Example + - `StarknetDocsIngester.ts`: Ingester for Starknet Docs (may be used for Cairo docs) + - `StarknetFoundryIngester.ts`: Ingester for Starknet Foundry + - `OpenZeppelinDocsIngester.ts`: Ingester for OpenZeppelin Docs + - `MarkdownIngester.ts`: Base ingester for Markdown format docs + - `AsciiDocIngester.ts`: Base ingester for AsciiDoc format docs + - `utils/`: Utility functions for ingestion + - `shared.ts`: Shared types and interfaces ## Navigation Patterns 1. **Following the RAG Pipeline Flow**: - - Start at `packages/agents/src/ragAgentFactory.ts` - - Explore the pipeline components in `packages/agents/src/pipeline` - - Understand how results are sent back via `packages/backend/src/websocket` + - Start at `packages/agents/src/core/agentFactory.ts` + - Explore the pipeline components in `packages/agents/src/core/pipeline/` + - See how code generation happens in `packages/agents/src/core/pipeline/answerGenerator.ts` 2. **Understanding API Endpoints**: - Start at `packages/backend/src/app.ts` - - Follow through to `packages/backend/src/routes` - - See how they connect to agent functionality - -3. **Exploring the UI Flow**: - - Start at `packages/ui/app/page.tsx` (main entry point) - - Look at the chat interface components - - Understand how the UI connects to the backend via WebSockets + - Follow through to `packages/backend/src/api/routes.ts` + - Examine the main handler in `packages/backend/src/api/cairocoder.ts` -4. **Configuration Flow**: - - Check `packages/agents/config.toml` for agent configuration - - See how configuration is loaded in `packages/agents/src/config.ts` +3. **Configuration Flow**: + - Look for configuration files in the root directory + - Check `packages/agents/src/config/` for agent configuration + - See how configuration is loaded in backend via `packages/backend/src/config/` ## Common Navigation Tasks -- **To understand the RAG process**: Follow the pipeline components in `packages/agents/src/pipeline` -- **To see how user queries are processed**: Start at the WebSocket handlers in `packages/backend/src/websocket` -- **To explore the UI components**: Look at the components in `packages/ui/components` -- **To understand data ingestion**: Check the scripts in `packages/ingester/scripts` - -## Ingester Package -- `packages/ingester/src/`: Source code for the ingester package - - `BaseIngester.ts`: Abstract base class for all ingesters - - `IngesterFactory.ts`: Factory for creating ingesters based on source - - `ingesters/`: Source-specific ingester implementations - - `CairoBookIngester.ts`: Ingester for Cairo Book - - `StarknetDocsIngester.ts`: Ingester for Starknet Docs - - `StarknetFoundryIngester.ts`: Ingester for Starknet Foundry - - `CairoByExampleIngester.ts`: Ingester for Cairo By Example - - `OpenZeppelinDocsIngester.ts`: Ingester for OpenZeppelin Docs - - `utils/`: Utility functions - - `fileUtils.ts`: File operations - - `contentUtils.ts`: Content processing - - `vectorStoreUtils.ts`: Vector store operations - - `types.ts`: Common types and interfaces - - `scripts/`: Scripts for running ingestion - - `generateEmbeddings.ts`: Main script for generating embeddings +- **To understand the RAG process**: Follow the pipeline components in `packages/agents/src/core/pipeline/` +- **To see how user queries are processed**: Start at the API handlers in `packages/backend/src/api/cairocoder.ts` +- **To understand data ingestion**: Check the ingester implementations in `packages/ingester/src/ingesters/` +- **To modify embeddings generation**: Look at `packages/ingester/src/generateEmbeddings.ts` ## Key Files for Common Tasks ### Adding a New Documentation Source 1. Create a new ingester in `packages/ingester/src/ingesters/` 2. Update `IngesterFactory.ts` to include the new source -3. Update the `DocumentSource` type in `packages/agents/src/types.ts` +3. Update the document source types in `packages/agents/src/types/` ### Modifying the RAG Pipeline -1. Update the relevant component in `packages/agents/src/pipeline/` -2. Adjust the configuration in `packages/agents/config.toml` if needed +1. Update the relevant component in `packages/agents/src/core/pipeline/` +2. Adjust the pipeline configuration in `packages/agents/src/core/agentFactory.ts` if needed -### Updating the UI -1. Modify the relevant component in `packages/ui/components/` -2. Update styles in `packages/ui/styles/` if needed +### Adding or Modifying API Endpoints +1. Update route definitions in `packages/backend/src/api/routes.ts` +2. Implement handlers in `packages/backend/src/api/cairocoder.ts` or add new handlers ### Running Ingestion -1. Use the script at `packages/ingester/src/scripts/generateEmbeddings.ts` -2. Or run `pnpm run generate-embeddings` from the project root +1. Use the script at `packages/ingester/src/generateEmbeddings.ts` +2. Or run `pnpm generate-embeddings` from the project root + +## Key Endpoints + +- `/generate`: Main endpoint for Cairo code generation + - Accepts POST requests with messages array in OpenAI format + - Returns generated Cairo code + +## Docker and Deployment Files + +- `docker-compose.yml`: Main Docker Compose configuration +- `backend.dockerfile`: Dockerfile for the backend service +- `ingest.dockerfile`: Dockerfile for running ingestion tasks diff --git a/.cursor/rules/project_instructions.mdc b/.cursor/rules/project_instructions.mdc index e05c8151..b76bb1d0 100644 --- a/.cursor/rules/project_instructions.mdc +++ b/.cursor/rules/project_instructions.mdc @@ -1,75 +1,73 @@ --- description: Project Instructions -globs: +globs: --- -# Cairo Coder Project Instructions +# Starknet Agent Project Instructions ## Overview -- Cairo Coder is an AI-powered code generation service specifically designed for generating Cairo smart contracts and programs. -- It uses Retrieval-Augmented Generation (RAG) to provide accurate, well-documented Cairo code based on natural language descriptions. -- The project is built with TypeScript, Node.js, Express, MongoDB Atlas (vector search), and the Cairo programming language. -- Based on Starknet Agent, which was forked from Perplexica, adapted specifically for Cairo code generation. +- Starknet Agent is an AI-powered search engine specifically designed for the Starknet Ecosystem. +- It uses Retrieval-Augmented Generation (RAG) to provide accurate, source-cited answers to questions about Starknet and Cairo. +- The project is built with TypeScript, Node.js, Express, MongoDB Atlas (vector search), and Next.js. +- Originally forked from Perplexica, adapted for the Starknet ecosystem. ## Architecture - Monorepo structure with multiple packages: - - `packages/agents/`: Core RAG pipeline (query processing, document retrieval, code generation) - - `packages/backend/`: Express server with REST API endpoints for code generation - - `packages/ingester/`: Data ingestion tools for Cairo documentation sources + - `packages/agents/`: Core RAG pipeline (query processing, document retrieval, answer generation) + - `packages/backend/`: Express server with WebSocket support for real-time streaming + - `packages/ui/`: Next.js frontend application with chat interface + - `packages/ingester/`: Data ingestion tools for documentation sources - `packages/typescript-config/`: Shared TypeScript configuration ## RAG Pipeline Flow -1. **Query Processing**: Analyzes and reformulates user requests to improve retrieval -2. **Document Retrieval**: Searches vector database for relevant Cairo documentation using cosine similarity -3. **Code Generation**: Uses LLMs to generate functional Cairo code based on the retrieved documentation -4. **JSON Response**: Delivers code in an OpenAI-compatible format +1. **Query Processing**: Analyzes and reformulates user queries to improve retrieval +2. **Document Retrieval**: Searches vector database for relevant documents using cosine similarity +3. **Answer Generation**: Uses LLMs to generate comprehensive responses with source citations +4. **Real-time Streaming**: Delivers responses to the UI as they're generated -## Cairo Documentation Sources -- **Cairo Book**: Comprehensive guide to the Cairo programming language -- **Cairo Foundry Documentation** -- **Cairo By Example**: Practical examples of Cairo programming patterns +## Focus Modes +- **Starknet Ecosystem**: Searches across all indexed resources +- **Cairo Book**: Focuses on the Cairo programming language book +- **Starknet Docs**: Targets official Starknet documentation +- **Starknet Foundry**: Searches Starknet Foundry documentation +- **Cairo By Example**: Provides examples from Cairo By Example resource +- **OpenZeppelin Docs**: Searches OpenZeppelin's Starknet documentation ## Ingestion System -- The ingester package handles downloading, processing, and storing Cairo documentation. +- The ingester package handles downloading, processing, and storing documentation. - Supported documentation sources: - Cairo Book - - Cairo Language Documentation - - Cairo Foundry + - Starknet Docs + - Starknet Foundry - Cairo By Example + - OpenZeppelin Docs - Modular architecture with a `BaseIngester` abstract class and source-specific implementations. - Follows the template method pattern for standardized ingestion process. -- Run ingestion with `pnpm generate-embeddings` from the project root. +- Run ingestion with `pnpm generate-embeddings` or `pnpm generate-embeddings:yes` from the project root. +- Weekly automated embedding generation via GitHub Actions. ## Development Workflow -- Use `pnpm dev` to start the development server +- Use `pnpm dev` or `turbo dev` to start the development server - MongoDB Atlas with vector search capabilities required for embeddings storage - Configuration is managed through TOML files (copy `sample.config.toml` to `config.toml`) - Docker is used for containerization and deployment - Add new documentation sources by extending the `BaseIngester` class and registering in `IngesterFactory` -## API Endpoint -- `/generate`: Main endpoint for generating Cairo code - - Takes input in OpenAI-compatible format (array of messages) - - Returns generated Cairo code with proper formatting and documentation - - Supports system messages for context and guidance - ## Commands -- **Build**: `pnpm build` +- **Build**: `pnpm build`, `pnpm --filter @starknet-agent/agents build` - **Dev**: `pnpm dev` (starts all services with auto-reload) -- **Test**: `pnpm test` -- **Single test**: `pnpm test -t "test name pattern"` -- **Type check**: `pnpm check-types` -- **Generate embeddings**: `pnpm generate-embeddings` -- **Clean**: `pnpm clean` (removes build files), `pnpm clean:all` (removes node_modules) +- **Test**: `pnpm --filter @starknet-agent/agents test` +- **Single test**: `pnpm --filter @starknet-agent/agents test -- -t "test name pattern"` +- **Type check**: `pnpm --filter @starknet-agent/backend check-types` ## Testing - Jest is used for all testing -- Run tests with `pnpm test` +- Run tests with `pnpm test` or `turbo test` - Test files are located in `__tests__/` directories - Mock external dependencies, especially LLM and database calls - Test each ingester implementation separately with mocked vector stores ## Deployment - Docker Compose is used for deployment -- Build and run with `docker-compose up --build` -- Environment variables should be set according to `config.toml` files -- Ingestion can be run with `pnpm generate-embeddings` +- Production configuration in `docker-compose.prod-hosted.yml` +- Environment variables should be set according to `.env.example` files +- Ingestion can be run as a separate process using `ingest.dockerfile` diff --git a/.gitignore b/.gitignore index f5958240..8273d3d8 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,5 @@ Thumbs.db packages/**/node_modules packages/**/dist + +/data diff --git a/docker-compose.yml b/docker-compose.yml index b2713dae..dbbd0a88 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,21 @@ +version: '3.8' + services: - starknet-agent-backend: + postgres: + image: pgvector/pgvector:pg17 + container_name: "posgresql" + shm_size: 1g + ports: + - 5432:5432 + env_file: + - .env + volumes: + - ./data:/var/lib/postgresql/data + restart: unless-stopped + networks: + - cairo_coder_network + + backend: build: context: . dockerfile: backend.dockerfile @@ -7,8 +23,17 @@ services: - 3001:3001 extra_hosts: - host.docker.internal:host-gateway + env_file: + - .env + depends_on: + postgres: + condition: service_started restart: unless-stopped - environment: - - NODE_ENV=production + networks: + - cairo_coder_network +networks: + cairo_coder_network: +volumes: + postgres_data: diff --git a/package.json b/package.json index 6afc5081..149463c4 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,5 @@ { - "name": "starknet-agent", + "name": "cairo-coder", "private": true, "version": "0.0.0", "scripts": { @@ -10,7 +10,7 @@ "generate-embeddings": "turbo run generate-embeddings", "generate-embeddings:yes": "turbo run generate-embeddings:yes", "clean": "find packages -type d -name 'dist' -exec rm -rf {} +; find packages -type d -name '.turbo' -exec rm -rf {} +", - "clean:all": "npm run clean && find packages -type d -name 'node_modules' -exec rm -rf {} + && rm -rf node_modules" + "clean:all": "npm run clean && find packages -type d -name 'node_modules' -exec rm -rf {} + && rm -rf node_modules && pnpm clean" }, "devDependencies": { "turbo": "latest", From 291a2467dec791685acdf36e50d8306fb172f1fe Mon Sep 17 00:00:00 2001 From: alvinouille Date: Fri, 18 Apr 2025 16:56:36 +0200 Subject: [PATCH 02/10] postgres for ingester ok --- packages/agents/package.json | 2 + packages/agents/src/config/agent.ts | 2 +- packages/agents/src/config/settings.ts | 28 +- packages/agents/src/core/agentFactory.ts | 2 +- packages/agents/src/db/postgresVectorStore.ts | 472 ++++++++++++++++++ packages/agents/src/db/vectorStore.ts | 328 ++++++------ packages/agents/src/types/index.ts | 21 +- packages/agents/src/utils/index.ts | 2 +- .../backend/src/config/provider/openai.ts | 4 +- packages/backend/src/routes/cairocoder.ts | 2 +- packages/backend/src/types/index.ts | 2 +- packages/ingester/src/BaseIngester.ts | 2 +- packages/ingester/src/generateEmbeddings.ts | 6 +- packages/ingester/src/shared.ts | 2 +- .../ingester/src/utils/vectorStoreUtils.ts | 4 +- pnpm-lock.yaml | 179 +++++-- 16 files changed, 844 insertions(+), 214 deletions(-) create mode 100644 packages/agents/src/db/postgresVectorStore.ts diff --git a/packages/agents/package.json b/packages/agents/package.json index 7014a4e7..ebf21ab0 100644 --- a/packages/agents/package.json +++ b/packages/agents/package.json @@ -19,8 +19,10 @@ "commander": "^11.1.0", "compute-cosine-similarity": "^1.1.0", "compute-dot": "^1.1.0", + "dotenv": "^16.4.7", "mongodb": "^6.13.1", "node-fetch": "^3.3.2", + "pg": "^8.14.1", "winston": "^3.17.0" }, "exports": { diff --git a/packages/agents/src/config/agent.ts b/packages/agents/src/config/agent.ts index 2463bf70..77ca48d4 100644 --- a/packages/agents/src/config/agent.ts +++ b/packages/agents/src/config/agent.ts @@ -1,7 +1,7 @@ import { basicContractTemplate } from './templates/contractTemplate'; import { cairoCoderPrompts } from './prompts'; import { basicTestTemplate } from './templates/testTemplate'; -import { VectorStore } from '../db/vectorStore'; +import { VectorStore } from '../db/postgresVectorStore'; import { DocumentSource, RagSearchConfig } from '../types'; export const getAgentConfig = ( diff --git a/packages/agents/src/config/settings.ts b/packages/agents/src/config/settings.ts index d50e50f0..04779eb8 100644 --- a/packages/agents/src/config/settings.ts +++ b/packages/agents/src/config/settings.ts @@ -3,7 +3,7 @@ import fs from 'fs'; import path from 'path'; import toml from '@iarna/toml'; -import { Config, RecursivePartial } from '../types'; +import { Config, RecursivePartial, PostgresVectorStoreConfig, MongoVectorStoreConfig } from '../types'; const configFileName = 'config.toml'; @@ -43,7 +43,31 @@ export const getDeepseekApiKey = () => loadConfig().API_KEYS.DEEPSEEK; export const getGeminiApiKey = () => loadConfig().API_KEYS.GEMINI; -export const getVectorDbConfig = () => loadConfig().VECTOR_DB; +export const getVectorDbConfig = () => { + const config = loadConfig(); + const dbType = 'postgres'; + + if (dbType === 'postgres') { + return { + type: 'postgres', + COLLECTION_NAME: config.VECTOR_DB.COLLECTION_NAME || 'documents', + } as PostgresVectorStoreConfig; + } else { + // Default to MongoDB + return { + type: 'mongodb', + MONGODB_URI: config.VECTOR_DB.MONGODB_URI || '', + DB_NAME: config.VECTOR_DB.DB_NAME || '', + COLLECTION_NAME: config.VECTOR_DB.COLLECTION_NAME || 'chunks', + } as MongoVectorStoreConfig; + } +}; + +// Check if we're using PostgreSQL +export const isPostgresDb = () => { + const config = loadConfig(); + return config.VECTOR_DB.DB_TYPE === 'postgres'; +}; export const updateConfig = (config: RecursivePartial) => { const currentConfig = loadConfig(); diff --git a/packages/agents/src/core/agentFactory.ts b/packages/agents/src/core/agentFactory.ts index a40c0d5e..8df6d761 100644 --- a/packages/agents/src/core/agentFactory.ts +++ b/packages/agents/src/core/agentFactory.ts @@ -3,7 +3,7 @@ import { Embeddings } from '@langchain/core/embeddings'; import { getAgentConfig } from '../config/agent'; import EventEmitter from 'events'; import { RagPipeline } from './pipeline/ragPipeline'; -import { VectorStore } from '../db/vectorStore'; +import { VectorStore } from '../db/postgresVectorStore'; import { LLMConfig } from '../types'; export class RagAgentFactory { diff --git a/packages/agents/src/db/postgresVectorStore.ts b/packages/agents/src/db/postgresVectorStore.ts new file mode 100644 index 00000000..fa605942 --- /dev/null +++ b/packages/agents/src/db/postgresVectorStore.ts @@ -0,0 +1,472 @@ +import { Embeddings } from '@langchain/core/embeddings'; +import { DocumentInterface } from '@langchain/core/documents'; +import { logger } from '../utils'; +import { VectorStoreConfig, DocumentSource } from '../types'; +import pg, { Pool, PoolClient } from 'pg'; +import { DatabaseError as PgError } from 'pg'; +import * as dotenv from 'dotenv'; +import * as path from 'path'; +import * as fs from 'fs'; + + +/** + * Custom error class for handling Postgres-specific errors + */ +class DatabaseError extends Error { + code?: string; + detail?: string; + table?: string; + + public constructor(message: string, error?: PgError) { + if (error) { + super(`${message}: ${error.message}`); + } else { + super(message); + } + + if (error) { + this.code = error.code; + this.detail = error.detail; + this.table = error.table; + } + } + + /** + * Handles a { @see PgError } associated to a database query, logging some + * extra information in the process. + * + * @throws { DatabaseError } + */ + public static handlePgError(err: PgError): DatabaseError { + // See https://www.postgresql.org/docs/current/errcodes-appendix.html + switch (err.code) { + case '23505': // unique_violation + return new DatabaseError('Duplicate key violation', err); + + case '23503': // foreign_key_violation + return new DatabaseError('Referenced record does not exist', err); + + case '28P01': // invalid_password + return new DatabaseError('Database authentication failed', err); + + case '57P01': // admin_shutdown + case '57P02': // crash_shutdown + case '57P03': // cannot_connect_now + return new DatabaseError('Database server unavailable', err); + + case '42P01': // undefined_table + return new DatabaseError('Schema error: table not found', err); + + case '42P07': // duplicate_table + return new DatabaseError('Table already exists', err); + + case '42501': // insufficient_privilege + return new DatabaseError('Insufficient database privileges', err); + + case '42601': // syntax error + return new DatabaseError('Syntax error', err); + + case '42703': // undefined_column + return new DatabaseError('Schema error: column not found', err); + + default: + return new DatabaseError('Database operation failed', err); + } + } +} + +/** + * A query and its associated values. + */ +class Query { + public readonly query: string; + public readonly values?: any[]; + + public constructor(query: string, values?: any[]) { + this.query = query; + this.values = values; + } +} + +/** + * PostgresVectorStore class for managing document storage and similarity search with PostgreSQL + */ +export class VectorStore { + private static instance: VectorStore | null = null; + private pool: Pool; + private embeddings: Embeddings; + private tableName: string; + + private constructor(pool: Pool, embeddings: Embeddings, tableName: string) { + this.pool = pool; + this.embeddings = embeddings; + this.tableName = tableName; + + // Register vector type parser to handle vector data types + const vectorOid = 16428; // pgvector's OID, may need to be adjusted + pg.types.setTypeParser(vectorOid, (val) => { + return val ? JSON.parse(val) : null; + }); + } + + static async getInstance( + config: VectorStoreConfig, + embeddings: Embeddings, + ): Promise { + if (!VectorStore.instance) { + logger.debug('config', config); + logger.debug('process.env.POSTGRES_USER', process.env.POSTGRES_USER); + logger.debug('process.env.POSTGRES_HOST', process.env.POSTGRES_HOST); + logger.debug('process.env.POSTGRES_ROOT_DB', process.env.POSTGRES_ROOT_DB); + logger.debug('process.env.POSTGRES_PASSWORD', process.env.POSTGRES_PASSWORD); + logger.debug('process.env.POSTGRES_PORT', process.env.POSTGRES_PORT); + const pool = new Pool({ + user: process.env.POSTGRES_USER, + host: process.env.POSTGRES_HOST, + database: process.env.POSTGRES_ROOT_DB, + password: process.env.POSTGRES_PASSWORD, + port: parseInt(process.env.POSTGRES_PORT || '5432'), + max: 10, + min: 5, + }); + logger.debug('pool', pool); + pool.on('error', (err) => { + logger.error('Postgres pool error:', err); + }); + + logger.info('Connected to PostgreSQL'); + + const tableName = 'documents'; + + // Create instance first, then initialize DB + VectorStore.instance = new VectorStore(pool, embeddings, tableName); + logger.debug('VectorStore.instance', VectorStore.instance); + await VectorStore.instance.initializeDb(); + } + return VectorStore.instance; + } + + /** + * Initialize the database schema + */ + private async initializeDb(): Promise { + try { + const client = await this.pool.connect(); + try { + // Enable vector extension + await client.query('CREATE EXTENSION IF NOT EXISTS vector;'); + logger.debug('CREATE EXTENSION IF NOT EXISTS vector;'); + // Create documents table if it doesn't exist + await client.query(` + CREATE TABLE IF NOT EXISTS ${this.tableName} ( + id SERIAL PRIMARY KEY, + content TEXT NOT NULL, + metadata JSONB NOT NULL, + embedding vector(1536) NOT NULL, + uniqueId VARCHAR(255), + contentHash VARCHAR(255), + source VARCHAR(50), + UNIQUE(uniqueId) + ); + `); + logger.debug('CREATE TABLE IF NOT EXISTS ${this.tableName} (id SERIAL PRIMARY KEY, content TEXT NOT NULL, metadata JSONB NOT NULL, embedding vector(1536) NOT NULL, uniqueId VARCHAR(255), contentHash VARCHAR(255), source VARCHAR(50), UNIQUE(uniqueId));'); + // Create index on source for filtering + await client.query(` + CREATE INDEX IF NOT EXISTS idx_${this.tableName}_source ON ${this.tableName} (source); + `); + logger.debug('CREATE INDEX IF NOT EXISTS idx_${this.tableName}_source ON ${this.tableName} (source);'); + // Create vector index for similarity search + await client.query(` + CREATE INDEX IF NOT EXISTS idx_${this.tableName}_embedding ON ${this.tableName} USING ivfflat (embedding vector_cosine_ops) + WITH (lists = 100); + `); + logger.debug('CREATE INDEX IF NOT EXISTS idx_${this.tableName}_embedding ON ${this.tableName} USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);'); + logger.info('PostgreSQL database initialized'); + } finally { + client.release(); + } + } catch (error) { + logger.error('Error initializing database:', error); + throw DatabaseError.handlePgError(error as PgError); + } + } + + /** + * Perform similarity search + * @param query - The query string + * @param k - Number of results to return + * @param sources - Optional source filter + * @returns Promise + */ + async similaritySearch( + query: string, + k: number = 5, + sources?: DocumentSource | DocumentSource[], + ): Promise { + try { + // Generate embedding for the query + const embedding = await this.embeddings.embedQuery(query); + + // Build SQL query + let sql = ` + SELECT + content, + metadata, + 1 - (embedding <=> $1) as similarity + FROM ${this.tableName} + WHERE 1=1 + `; + + const values: any[] = [JSON.stringify(embedding)]; + let paramIndex = 2; + + // Add source filter if provided + if (sources) { + const sourcesArray = Array.isArray(sources) ? sources : [sources]; + if (sourcesArray.length > 0) { + sql += ` AND source = ANY($${paramIndex})`; + values.push(sourcesArray); + paramIndex++; + } + } + + // Add order by and limit + sql += ` + ORDER BY similarity DESC + LIMIT $${paramIndex} + `; + values.push(k); + + // Execute query + const client = await this.pool.connect(); + try { + const result = await client.query(sql, values); + + // Convert to DocumentInterface format + return result.rows.map((row) => ({ + pageContent: row.content, + metadata: row.metadata, + })); + } finally { + client.release(); + } + } catch (error) { + logger.error('Error in similarity search:', error); + throw DatabaseError.handlePgError(error as PgError); + } + } + + /** + * Add documents to the vector store + * @param documents - Array of documents to add + * @param uniqueIds - Optional array of unique IDs for the documents + * @returns Promise + */ + async addDocuments(documents: DocumentInterface[], options?: { ids?: string[] }): Promise { + logger.info(`Adding ${documents.length} documents to the vector store`); + + if (documents.length === 0) return; + + try { + // Generate embeddings for all documents + const texts = documents.map((doc) => doc.pageContent); + const embeddings = await this.embeddings.embedDocuments(texts); + + const client = await this.pool.connect(); + try { + await client.query('BEGIN'); + + // Prepare batch insert + const insertPromises = documents.map((doc, i) => { + const uniqueId = options?.ids?.[i] || doc.metadata.uniqueId || null; + const contentHash = doc.metadata.contentHash || null; + const source = doc.metadata.source || null; + + const query = ` + INSERT INTO ${this.tableName} (content, metadata, embedding, uniqueId, contentHash, source) + VALUES ($1, $2, $3, $4, $5, $6) + ON CONFLICT (uniqueId) + DO UPDATE SET + content = EXCLUDED.content, + metadata = EXCLUDED.metadata, + embedding = EXCLUDED.embedding, + contentHash = EXCLUDED.contentHash + `; + + return client.query(query, [ + doc.pageContent, + JSON.stringify(doc.metadata), + JSON.stringify(embeddings[i]), + uniqueId, + contentHash, + source + ]); + }); + + await Promise.all(insertPromises); + await client.query('COMMIT'); + + logger.info(`Successfully added ${documents.length} documents`); + } catch (error) { + await client.query('ROLLBACK'); + throw error; + } finally { + client.release(); + } + } catch (error) { + logger.error('Error adding documents:', error); + throw DatabaseError.handlePgError(error as PgError); + } + } + + /** + * Find a specific book chunk by name + * @param name - Name of the book chunk + * @returns Promise + */ + async findBookChunk(name: string): Promise { + try { + const client = await this.pool.connect(); + try { + const result = await client.query( + `SELECT content, metadata, contentHash FROM ${this.tableName} WHERE uniqueId = $1`, + [name] + ); + + if (result.rows.length > 0) { + const row = result.rows[0]; + return { + metadata: { + _id: name, + contentHash: row.contentHash, + ...JSON.parse(row.metadata) + }, + pageContent: row.content, + }; + } + return null; + } finally { + client.release(); + } + } catch (error) { + logger.error('Error finding book chunk:', error); + throw DatabaseError.handlePgError(error as PgError); + } + } + + /** + * Remove book pages by their unique IDs + * @param uniqueIds - Array of unique IDs to remove + * @param source - Optional source filter + * @returns Promise + */ + async removeBookPages( + uniqueIds: string[], + source: DocumentSource, + ): Promise { + if (uniqueIds.length === 0) return; + + try { + const client = await this.pool.connect(); + try { + const query = ` + DELETE FROM ${this.tableName} + WHERE uniqueId = ANY($1) + AND source = $2 + `; + + await client.query(query, [uniqueIds, source]); + logger.info(`Removed ${uniqueIds.length} pages from source ${source}`); + } finally { + client.release(); + } + } catch (error) { + logger.error('Error removing book pages:', error); + throw DatabaseError.handlePgError(error as PgError); + } + } + + /** + * Get hashes of stored book pages + * @param source - Source filter + * @returns Promise> + */ + async getStoredBookPagesHashes( + source: DocumentSource, + ): Promise> { + try { + const client = await this.pool.connect(); + try { + const result = await client.query( + `SELECT uniqueId, contentHash FROM ${this.tableName} WHERE source = $1`, + [source] + ); + + return result.rows.map((row) => ({ + uniqueId: row.uniqueid, + contentHash: row.contenthash, + })); + } finally { + client.release(); + } + } catch (error) { + logger.error('Error getting stored book pages hashes:', error); + throw DatabaseError.handlePgError(error as PgError); + } + } + + /** + * Close the connection to the database + * @returns Promise + */ + async close(): Promise { + logger.info('Disconnecting from PostgreSQL'); + if (this.pool) { + await this.pool.end(); + VectorStore.instance = null; // Reset the singleton instance + } + } + + /** + * Execute a query against the database + * @param q - The query to execute + * @returns Promise + */ + private async query(q: Query): Promise { + const client = await this.pool.connect(); + try { + const result = await client.query(q.query, q.values); + return result.rows as T[]; + } catch (error) { + throw DatabaseError.handlePgError(error as PgError); + } finally { + client.release(); + } + } + + /** + * Execute a transaction against the database + * @param queries - The queries to execute + * @returns Promise + */ + private async transaction(queries: Query[]): Promise { + const client = await this.pool.connect(); + let result; + + try { + await client.query('BEGIN'); + + for (const q of queries) { + result = await client.query(q.query, q.values); + } + + await client.query('COMMIT'); + return result ? result.rows : []; + } catch (error) { + await client.query('ROLLBACK'); + throw DatabaseError.handlePgError(error as PgError); + } finally { + client.release(); + } + } +} \ No newline at end of file diff --git a/packages/agents/src/db/vectorStore.ts b/packages/agents/src/db/vectorStore.ts index 06611743..ac85839d 100644 --- a/packages/agents/src/db/vectorStore.ts +++ b/packages/agents/src/db/vectorStore.ts @@ -1,164 +1,164 @@ -import { MongoDBAtlasVectorSearch } from '@langchain/mongodb'; -import { MongoClient, Collection, ObjectId, Filter } from 'mongodb'; -import { DocumentInterface } from '@langchain/core/documents'; -import { OpenAIEmbeddings } from '@langchain/openai'; -import { Embeddings } from '@langchain/core/embeddings'; -import { logger } from '../utils'; -import { VectorStoreConfig } from '../types'; -import { DocumentSource } from '../types'; - -/** - * VectorStore class for managing document storage and similarity search - */ -export class VectorStore { - private static instance: VectorStore | null = null; - private client: MongoClient; - collection: Collection; - private vectorSearch: MongoDBAtlasVectorSearch; - - private constructor( - client: MongoClient, - collection: Collection, - vectorSearch: MongoDBAtlasVectorSearch, - ) { - this.client = client; - this.collection = collection; - this.vectorSearch = vectorSearch; - } - - static async getInstance( - config: VectorStoreConfig, - embeddings: Embeddings, - ): Promise { - if (!VectorStore.instance) { - const client = new MongoClient(config.MONGODB_URI, { - maxPoolSize: 10, // Adjust this value based on your needs - minPoolSize: 5, - }); - await client.connect(); - logger.info('Connected to MongoDB'); - - const collection = client - .db(config.DB_NAME) - .collection(config.COLLECTION_NAME); - - const vectorSearch = new MongoDBAtlasVectorSearch(embeddings, { - collection, - indexName: 'default', - textKey: 'content', - embeddingKey: 'embedding', - }); - - VectorStore.instance = new VectorStore(client, collection, vectorSearch); - } - return VectorStore.instance; - } - - /** - * Perform similarity search - * @param query - The query string - * @param k - Number of results to return - * @param sources - Optional source filter - * @returns Promise - */ - async similaritySearch( - query: string, - k: number = 5, - sources: DocumentSource | DocumentSource[], - ): Promise { - if (!sources) { - return this.vectorSearch.similaritySearch(query, k); - } - - const sourcesArray = Array.isArray(sources) ? sources : [sources]; - const filter: Filter = { - preFilter: { source: { $in: sourcesArray } }, - }; - - return this.vectorSearch.similaritySearch(query, k, filter); - } - - /** - * Add documents to the vector store - * @param documents - Array of documents to add - * @param uniqueIds - Optional array of unique IDs for the documents - * @returns Promise - */ - async addDocuments(documents: any[], uniqueIds?: string[]): Promise { - logger.info(`Adding ${documents.length} documents to the vector store`); - await this.vectorSearch.addDocuments(documents, { ids: uniqueIds }); - } - - /** - * Find a specific book chunk by name - * @param name - Name of the book chunk - * @returns Promise - */ - async findBookChunk(name: string): Promise { - try { - const match = await this.collection.findOne({ - _id: name as unknown as ObjectId, - }); - if (match) { - return { - metadata: { _id: name, contentHash: match.contentHash }, - pageContent: match.text, - }; - } - return null; - } catch (error) { - logger.error('Error finding book chunk:', error); - throw error; - } - } - - /** - * Remove book pages by their unique IDs - * @param uniqueIds - Array of unique IDs to remove - * @param source - Optional source filter - * @returns Promise - */ - async removeBookPages( - uniqueIds: string[], - source: DocumentSource, - ): Promise { - const filter: Filter = { - uniqueId: { $in: uniqueIds }, - source: { $in: [source] }, - }; - - logger.info('Removing book pages with filter', filter); - await this.collection.deleteMany(filter); - } - - /** - * Get hashes of stored book pages - * @param source - Optional source filter - * @returns Promise> - */ - async getStoredBookPagesHashes( - source: DocumentSource, - ): Promise> { - const filter: Filter = { source: { $in: [source] } }; - const documents = await this.collection - .find(filter, { projection: { uniqueId: 1, contentHash: 1 } }) - .toArray(); - - return documents.map((doc) => ({ - uniqueId: doc.uniqueId, - contentHash: doc.contentHash, - })); - } - - /** - * Close the connection to the database - * @returns Promise - */ - async close(): Promise { - logger.info('Disconnecting from MongoDB'); - if (this.client) { - await this.client.close(true); // Force close all connections in the pool - VectorStore.instance = null; // Reset the singleton instance - } - } -} +// import { MongoDBAtlasVectorSearch } from '@langchain/mongodb'; +// import { MongoClient, Collection, ObjectId, Filter } from 'mongodb'; +// import { DocumentInterface } from '@langchain/core/documents'; +// import { OpenAIEmbeddings } from '@langchain/openai'; +// import { Embeddings } from '@langchain/core/embeddings'; +// import { logger } from '../utils'; +// import { VectorStoreConfig } from '../types'; +// import { DocumentSource } from '../types'; + +// /** +// * VectorStore class for managing document storage and similarity search +// */ +// export class VectorStore { +// private static instance: VectorStore | null = null; +// private client: MongoClient; +// collection: Collection; +// private vectorSearch: MongoDBAtlasVectorSearch; + +// private constructor( +// client: MongoClient, +// collection: Collection, +// vectorSearch: MongoDBAtlasVectorSearch, +// ) { +// this.client = client; +// this.collection = collection; +// this.vectorSearch = vectorSearch; +// } + +// static async getInstance( +// config: VectorStoreConfig, +// embeddings: Embeddings, +// ): Promise { +// if (!VectorStore.instance) { +// const client = new MongoClient(config.MONGODB_URI, { +// maxPoolSize: 10, // Adjust this value based on your needs +// minPoolSize: 5, +// }); +// await client.connect(); +// logger.info('Connected to MongoDB'); + +// const collection = client +// .db(config.DB_NAME) +// .collection(config.COLLECTION_NAME); + +// const vectorSearch = new MongoDBAtlasVectorSearch(embeddings, { +// collection, +// indexName: 'default', +// textKey: 'content', +// embeddingKey: 'embedding', +// }); + +// VectorStore.instance = new VectorStore(client, collection, vectorSearch); +// } +// return VectorStore.instance; +// } + +// /** +// * Perform similarity search +// * @param query - The query string +// * @param k - Number of results to return +// * @param sources - Optional source filter +// * @returns Promise +// */ +// async similaritySearch( +// query: string, +// k: number = 5, +// sources: DocumentSource | DocumentSource[], +// ): Promise { +// if (!sources) { +// return this.vectorSearch.similaritySearch(query, k); +// } + +// const sourcesArray = Array.isArray(sources) ? sources : [sources]; +// const filter: Filter = { +// preFilter: { source: { $in: sourcesArray } }, +// }; + +// return this.vectorSearch.similaritySearch(query, k, filter); +// } + +// /** +// * Add documents to the vector store +// * @param documents - Array of documents to add +// * @param uniqueIds - Optional array of unique IDs for the documents +// * @returns Promise +// */ +// async addDocuments(documents: any[], uniqueIds?: string[]): Promise { +// logger.info(`Adding ${documents.length} documents to the vector store`); +// await this.vectorSearch.addDocuments(documents, { ids: uniqueIds }); +// } + +// /** +// * Find a specific book chunk by name +// * @param name - Name of the book chunk +// * @returns Promise +// */ +// async findBookChunk(name: string): Promise { +// try { +// const match = await this.collection.findOne({ +// _id: name as unknown as ObjectId, +// }); +// if (match) { +// return { +// metadata: { _id: name, contentHash: match.contentHash }, +// pageContent: match.text, +// }; +// } +// return null; +// } catch (error) { +// logger.error('Error finding book chunk:', error); +// throw error; +// } +// } + +// /** +// * Remove book pages by their unique IDs +// * @param uniqueIds - Array of unique IDs to remove +// * @param source - Optional source filter +// * @returns Promise +// */ +// async removeBookPages( +// uniqueIds: string[], +// source: DocumentSource, +// ): Promise { +// const filter: Filter = { +// uniqueId: { $in: uniqueIds }, +// source: { $in: [source] }, +// }; + +// logger.info('Removing book pages with filter', filter); +// await this.collection.deleteMany(filter); +// } + +// /** +// * Get hashes of stored book pages +// * @param source - Optional source filter +// * @returns Promise> +// */ +// async getStoredBookPagesHashes( +// source: DocumentSource, +// ): Promise> { +// const filter: Filter = { source: { $in: [source] } }; +// const documents = await this.collection +// .find(filter, { projection: { uniqueId: 1, contentHash: 1 } }) +// .toArray(); + +// return documents.map((doc) => ({ +// uniqueId: doc.uniqueId, +// contentHash: doc.contentHash, +// })); +// } + +// /** +// * Close the connection to the database +// * @returns Promise +// */ +// async close(): Promise { +// logger.info('Disconnecting from MongoDB'); +// if (this.client) { +// await this.client.close(true); // Force close all connections in the pool +// VectorStore.instance = null; // Reset the singleton instance +// } +// } +// } diff --git a/packages/agents/src/types/index.ts b/packages/agents/src/types/index.ts index 67bbe12f..115a475e 100644 --- a/packages/agents/src/types/index.ts +++ b/packages/agents/src/types/index.ts @@ -1,6 +1,6 @@ import { BaseMessage } from '@langchain/core/messages'; import { Document } from '@langchain/core/documents'; -import { VectorStore } from '../db/vectorStore'; +import { VectorStore } from '../db/postgresVectorStore'; import { BaseChatModel } from '@langchain/core/language_models/chat_models'; export type AvailableAgents = 'cairoCoder'; @@ -10,18 +10,33 @@ export interface LLMConfig { fastLLM?: BaseChatModel; } -export interface VectorStoreConfig { +export interface MongoVectorStoreConfig { + type: 'mongodb'; MONGODB_URI: string; DB_NAME: string; COLLECTION_NAME: string; } +export interface PostgresVectorStoreConfig { + type: 'postgres'; + COLLECTION_NAME: string; +} + +export type VectorStoreConfig = MongoVectorStoreConfig | PostgresVectorStoreConfig; + +export interface VectorDBConfig { + DB_TYPE: 'mongodb' | 'postgres'; + MONGODB_URI?: string; + DB_NAME?: string; + COLLECTION_NAME: string; +} + export interface Config { GENERAL: { PORT: number; SIMILARITY_MEASURE: string; }; - VECTOR_DB: VectorStoreConfig; + VECTOR_DB: VectorDBConfig; API_KEYS: { OPENAI: string; GROQ: string; diff --git a/packages/agents/src/utils/index.ts b/packages/agents/src/utils/index.ts index 1c7a8c14..0eb74878 100644 --- a/packages/agents/src/utils/index.ts +++ b/packages/agents/src/utils/index.ts @@ -17,7 +17,7 @@ export const parseXMLContent = (xml: string, tag: string): string[] => { }; export const logger = winston.createLogger({ - level: process.env.LOG_LEVEL || 'info', + level: process.env.LOG_LEVEL || 'debug', transports: [ new winston.transports.Console({ format: winston.format.combine( diff --git a/packages/backend/src/config/provider/openai.ts b/packages/backend/src/config/provider/openai.ts index 0aab09e7..02fe7c4f 100644 --- a/packages/backend/src/config/provider/openai.ts +++ b/packages/backend/src/config/provider/openai.ts @@ -54,13 +54,13 @@ export const loadOpenAIEmbeddingsModels = async () => { openAIApiKey, modelName: 'text-embedding-3-small', batchSize: 512, - dimensions: 2048, + dimensions: 1536, }), 'Text embedding 3 large': new OpenAIEmbeddings({ openAIApiKey, modelName: 'text-embedding-3-large', batchSize: 512, - dimensions: 2048, + dimensions: 1536, }), }; diff --git a/packages/backend/src/routes/cairocoder.ts b/packages/backend/src/routes/cairocoder.ts index 86d230e5..3a94b051 100644 --- a/packages/backend/src/routes/cairocoder.ts +++ b/packages/backend/src/routes/cairocoder.ts @@ -9,9 +9,9 @@ import { logger, RagAgentFactory, LLMConfig, - VectorStore, } from '@starknet-agent/agents'; import { ChatCompletionRequest } from '../types'; +import { VectorStore } from '@starknet-agent/agents/db/postgresVectorStore'; const router: Router = express.Router(); diff --git a/packages/backend/src/types/index.ts b/packages/backend/src/types/index.ts index dd74accb..f76d7cce 100644 --- a/packages/backend/src/types/index.ts +++ b/packages/backend/src/types/index.ts @@ -2,7 +2,7 @@ import eventEmitter from 'events'; import { BaseMessage } from '@langchain/core/messages'; import { Embeddings } from '@langchain/core/embeddings'; import { LLMConfig } from '@starknet-agent/agents/types/index'; -import { VectorStore } from '@starknet-agent/agents/db/vectorStore'; +import { VectorStore } from '@starknet-agent/agents/db/postgresVectorStore'; import { BaseChatModel } from '@langchain/core/language_models/chat_models'; import { CorsOptions } from 'cors'; import { Express } from 'express'; diff --git a/packages/ingester/src/BaseIngester.ts b/packages/ingester/src/BaseIngester.ts index 17141a1d..3814d698 100644 --- a/packages/ingester/src/BaseIngester.ts +++ b/packages/ingester/src/BaseIngester.ts @@ -1,5 +1,5 @@ import { Document } from '@langchain/core/documents'; -import { VectorStore } from '@starknet-agent/agents/db/vectorStore'; +import { VectorStore } from '@starknet-agent/agents/db/postgresVectorStore'; import { DocumentSource, BookChunk, diff --git a/packages/ingester/src/generateEmbeddings.ts b/packages/ingester/src/generateEmbeddings.ts index fe0dd0f7..4dcb4ad6 100644 --- a/packages/ingester/src/generateEmbeddings.ts +++ b/packages/ingester/src/generateEmbeddings.ts @@ -1,7 +1,7 @@ import dotenv from 'dotenv'; import { createInterface } from 'readline'; import { logger } from '@starknet-agent/agents/utils/index'; -import { VectorStore } from '@starknet-agent/agents/db/vectorStore'; +import { VectorStore } from '@starknet-agent/agents/db/postgresVectorStore'; import { getVectorDbConfig } from '@starknet-agent/agents/config/settings'; import { loadOpenAIEmbeddingsModels } from '@starknet-agent/backend/config/provider/openai'; import { DocumentSource } from '@starknet-agent/agents/types/index'; @@ -37,10 +37,12 @@ async function setupVectorStore(): Promise { try { // Get database configuration const dbConfig = getVectorDbConfig(); - + logger.debug('dbConfig', dbConfig); // Load embedding models const embeddingModels = await loadOpenAIEmbeddingsModels(); + logger.debug('embeddingModels', embeddingModels); const textEmbedding3Large = embeddingModels['Text embedding 3 large']; + logger.debug('textEmbedding3Large', textEmbedding3Large); if (!textEmbedding3Large) { throw new Error('Text embedding 3 large model not found'); diff --git a/packages/ingester/src/shared.ts b/packages/ingester/src/shared.ts index 6a445b1c..8074cf8e 100644 --- a/packages/ingester/src/shared.ts +++ b/packages/ingester/src/shared.ts @@ -3,7 +3,7 @@ import { Document } from '@langchain/core/documents'; import { logger } from '@starknet-agent/agents/utils/index'; import * as fs from 'fs/promises'; import * as path from 'path'; -import { VectorStore } from '@starknet-agent/agents/db/vectorStore'; +import { VectorStore } from '@starknet-agent/agents/db/postgresVectorStore'; import { BookChunk, DocumentSource } from '@starknet-agent/agents/types/index'; export const MAX_SECTION_SIZE = 20000; diff --git a/packages/ingester/src/utils/vectorStoreUtils.ts b/packages/ingester/src/utils/vectorStoreUtils.ts index 9be8c182..641351da 100644 --- a/packages/ingester/src/utils/vectorStoreUtils.ts +++ b/packages/ingester/src/utils/vectorStoreUtils.ts @@ -1,6 +1,6 @@ import { Document } from '@langchain/core/documents'; import { createInterface } from 'readline'; -import { VectorStore } from '@starknet-agent/agents/db/vectorStore'; +import { VectorStore } from '@starknet-agent/agents/db/postgresVectorStore'; import { BookChunk, DocumentSource, @@ -121,7 +121,7 @@ export async function updateVectorStore( if (chunksToUpdate.length > 0) { await vectorStore.addDocuments( chunksToUpdate, - chunksToUpdate.map((chunk) => chunk.metadata.uniqueId), + { ids: chunksToUpdate.map((chunk) => chunk.metadata.uniqueId) } ); } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index f4aadad5..3cf14011 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -10,7 +10,7 @@ importers: devDependencies: turbo: specifier: latest - version: 2.4.4 + version: 2.5.0 typescript: specifier: ^5.7.2 version: 5.7.3 @@ -25,7 +25,7 @@ importers: version: 0.2.18(openai@4.85.3(ws@8.18.1)(zod@3.24.2)) '@langchain/community': specifier: ^0.3.32 - version: 0.3.32(@browserbasehq/sdk@2.3.0)(@browserbasehq/stagehand@1.13.0(@playwright/test@1.50.1)(deepmerge@4.3.1)(dotenv@16.4.7)(openai@4.85.3(ws@8.18.1)(zod@3.24.2))(zod@3.24.2))(@ibm-cloud/watsonx-ai@1.5.0(@langchain/core@0.2.36(openai@4.85.3(ws@8.18.1)(zod@3.24.2))))(@langchain/anthropic@0.2.18(openai@4.85.3(ws@8.18.1)(zod@3.24.2)))(@langchain/core@0.2.36(openai@4.85.3(ws@8.18.1)(zod@3.24.2)))(@langchain/google-genai@0.1.8(@langchain/core@0.2.36(openai@4.85.3(ws@8.18.1)(zod@3.24.2)))(zod@3.24.2))(axios@1.7.9)(fast-xml-parser@4.5.3)(handlebars@4.7.8)(html-to-text@9.0.5)(ibm-cloud-sdk-core@5.1.3)(ignore@5.3.2)(jsonwebtoken@9.0.2)(mongodb@6.13.1)(openai@4.85.3(ws@8.18.1)(zod@3.24.2))(playwright@1.50.1)(ws@8.18.1) + version: 0.3.32(@browserbasehq/sdk@2.3.0)(@browserbasehq/stagehand@1.13.0(@playwright/test@1.50.1)(deepmerge@4.3.1)(dotenv@16.4.7)(openai@4.85.3(ws@8.18.1)(zod@3.24.2))(zod@3.24.2))(@ibm-cloud/watsonx-ai@1.5.0(@langchain/core@0.2.36(openai@4.85.3(ws@8.18.1)(zod@3.24.2))))(@langchain/anthropic@0.2.18(openai@4.85.3(ws@8.18.1)(zod@3.24.2)))(@langchain/core@0.2.36(openai@4.85.3(ws@8.18.1)(zod@3.24.2)))(@langchain/google-genai@0.1.8(@langchain/core@0.2.36(openai@4.85.3(ws@8.18.1)(zod@3.24.2)))(zod@3.24.2))(axios@1.7.9)(fast-xml-parser@4.5.3)(handlebars@4.7.8)(html-to-text@9.0.5)(ibm-cloud-sdk-core@5.1.3)(ignore@5.3.2)(jsonwebtoken@9.0.2)(mongodb@6.13.1)(openai@4.85.3(ws@8.18.1)(zod@3.24.2))(pg@8.14.1)(playwright@1.50.1)(ws@8.18.1) '@langchain/core': specifier: ^0.2.36 version: 0.2.36(openai@4.85.3(ws@8.18.1)(zod@3.24.2)) @@ -50,12 +50,18 @@ importers: compute-dot: specifier: ^1.1.0 version: 1.1.0 + dotenv: + specifier: ^16.4.7 + version: 16.4.7 mongodb: specifier: ^6.13.1 version: 6.13.1 node-fetch: specifier: ^3.3.2 version: 3.3.2 + pg: + specifier: ^8.14.1 + version: 8.14.1 winston: specifier: ^3.17.0 version: 3.17.0 @@ -104,7 +110,7 @@ importers: version: 0.2.18(openai@4.85.4(ws@8.18.1)(zod@3.24.2)) '@langchain/community': specifier: ^0.3.32 - version: 0.3.32(@browserbasehq/sdk@2.3.0)(@browserbasehq/stagehand@1.13.0(@playwright/test@1.50.1)(deepmerge@4.3.1)(dotenv@16.4.7)(openai@4.85.4(ws@8.18.1)(zod@3.24.2))(zod@3.24.2))(@ibm-cloud/watsonx-ai@1.5.0(@langchain/core@0.2.36(openai@4.85.4(ws@8.18.1)(zod@3.24.2))))(@langchain/anthropic@0.2.18(openai@4.85.4(ws@8.18.1)(zod@3.24.2)))(@langchain/core@0.2.36(openai@4.85.4(ws@8.18.1)(zod@3.24.2)))(@langchain/google-genai@0.1.8(@langchain/core@0.2.36(openai@4.85.4(ws@8.18.1)(zod@3.24.2)))(zod@3.24.2))(axios@1.7.9)(fast-xml-parser@4.5.3)(handlebars@4.7.8)(html-to-text@9.0.5)(ibm-cloud-sdk-core@5.1.3)(ignore@5.3.2)(jsonwebtoken@9.0.2)(mongodb@6.13.1)(openai@4.85.4(ws@8.18.1)(zod@3.24.2))(playwright@1.50.1)(ws@8.18.1) + version: 0.3.32(@browserbasehq/sdk@2.3.0)(@browserbasehq/stagehand@1.13.0(@playwright/test@1.50.1)(deepmerge@4.3.1)(dotenv@16.4.7)(openai@4.85.4(ws@8.18.1)(zod@3.24.2))(zod@3.24.2))(@ibm-cloud/watsonx-ai@1.5.0(@langchain/core@0.2.36(openai@4.85.4(ws@8.18.1)(zod@3.24.2))))(@langchain/anthropic@0.2.18(openai@4.85.4(ws@8.18.1)(zod@3.24.2)))(@langchain/core@0.2.36(openai@4.85.4(ws@8.18.1)(zod@3.24.2)))(@langchain/google-genai@0.1.8(@langchain/core@0.2.36(openai@4.85.4(ws@8.18.1)(zod@3.24.2)))(zod@3.24.2))(axios@1.7.9)(fast-xml-parser@4.5.3)(handlebars@4.7.8)(html-to-text@9.0.5)(ibm-cloud-sdk-core@5.1.3)(ignore@5.3.2)(jsonwebtoken@9.0.2)(mongodb@6.13.1)(openai@4.85.4(ws@8.18.1)(zod@3.24.2))(pg@8.14.1)(playwright@1.50.1)(ws@8.18.1) '@langchain/core': specifier: ^0.2.36 version: 0.2.36(openai@4.85.4(ws@8.18.1)(zod@3.24.2)) @@ -2728,6 +2734,40 @@ packages: resolution: {integrity: sha512-ZI3LnwUv5nOGbQzD9c2iDG6toheuXSZP5esSHBjopsXH4dg19soufvpUGA3uohi5anFtGb2lhAVdHzH6R/Evvg==} engines: {node: '>=8'} + pg-cloudflare@1.1.1: + resolution: {integrity: sha512-xWPagP/4B6BgFO+EKz3JONXv3YDgvkbVrGw2mTo3D6tVDQRh1e7cqVGvyR3BE+eQgAvx1XhW/iEASj4/jCWl3Q==} + + pg-connection-string@2.7.0: + resolution: {integrity: sha512-PI2W9mv53rXJQEOb8xNR8lH7Hr+EKa6oJa38zsK0S/ky2er16ios1wLKhZyxzD7jUReiWokc9WK5nxSnC7W1TA==} + + pg-int8@1.0.1: + resolution: {integrity: sha512-WCtabS6t3c8SkpDBUlb1kjOs7l66xsGdKpIPZsg4wR+B3+u9UAum2odSsF9tnvxg80h4ZxLWMy4pRjOsFIqQpw==} + engines: {node: '>=4.0.0'} + + pg-pool@3.8.0: + resolution: {integrity: sha512-VBw3jiVm6ZOdLBTIcXLNdSotb6Iy3uOCwDGFAksZCXmi10nyRvnP2v3jl4d+IsLYRyXf6o9hIm/ZtUzlByNUdw==} + peerDependencies: + pg: '>=8.0' + + pg-protocol@1.8.0: + resolution: {integrity: sha512-jvuYlEkL03NRvOoyoRktBK7+qU5kOvlAwvmrH8sr3wbLrOdVWsRxQfz8mMy9sZFsqJ1hEWNfdWKI4SAmoL+j7g==} + + pg-types@2.2.0: + resolution: {integrity: sha512-qTAAlrEsl8s4OiEQY69wDvcMIdQN6wdz5ojQiOy6YRMuynxenON0O5oCpJI6lshc6scgAY8qvJ2On/p+CXY0GA==} + engines: {node: '>=4'} + + pg@8.14.1: + resolution: {integrity: sha512-0TdbqfjwIun9Fm/r89oB7RFQ0bLgduAhiIqIXOsyKoiC/L54DbuAAzIEN/9Op0f1Po9X7iCPXGoa/Ah+2aI8Xw==} + engines: {node: '>= 8.0.0'} + peerDependencies: + pg-native: '>=3.0.1' + peerDependenciesMeta: + pg-native: + optional: true + + pgpass@1.0.5: + resolution: {integrity: sha512-FdW9r/jQZhSeohs1Z3sI1yxFQNFvMcnmfuj4WBMUTxOrAyLMaTcE1aAMBiTlbMNaXvBCQuVi0R7hd8udDSP7ug==} + picocolors@1.1.1: resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==} @@ -2753,6 +2793,22 @@ packages: engines: {node: '>=18'} hasBin: true + postgres-array@2.0.0: + resolution: {integrity: sha512-VpZrUqU5A69eQyW2c5CA1jtLecCsN2U/bD6VilrFDWq5+5UIEVO7nazS3TEcHf1zuPYO/sqGvUvW62g86RXZuA==} + engines: {node: '>=4'} + + postgres-bytea@1.0.0: + resolution: {integrity: sha512-xy3pmLuQqRBZBXDULy7KbaitYqLcmxigw14Q5sj8QBVLqEwXfeybIKVWiqAXTlcvdvb0+xkOtDbfQMOf4lST1w==} + engines: {node: '>=0.10.0'} + + postgres-date@1.0.7: + resolution: {integrity: sha512-suDmjLVQg78nMK2UZ454hAG+OAW+HQPZ6n++TNDUX+L0+uUlLywnoxJKDou51Zm+zTCjrCl0Nq6J9C5hP9vK/Q==} + engines: {node: '>=0.10.0'} + + postgres-interval@1.2.0: + resolution: {integrity: sha512-9ZhXKM/rw350N1ovuWHbGxnGh/SNJ4cnxHiM0rxE4VN41wsg8P8zWn9hv/buK00RP4WvlOyr/RBDiptyxVbkZQ==} + engines: {node: '>=0.10.0'} + prettier@3.5.2: resolution: {integrity: sha512-lc6npv5PH7hVqozBR7lkBNOGXV9vMwROAPlumdBkX0wTbbzPu/U1hk5yL8p2pt4Xoc+2mkT8t/sow2YrV/M5qg==} engines: {node: '>=14'} @@ -2984,6 +3040,10 @@ packages: sparse-bitfield@3.0.3: resolution: {integrity: sha512-kvzhi7vqKTfkh0PZU+2D2PIllw2ymqJKujUcyPMd9Y75Nv4nPbGJZXNhxsgdQab2BmlDct1YnfQCguEvHr7VsQ==} + split2@4.2.0: + resolution: {integrity: sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==} + engines: {node: '>= 10.x'} + sprintf-js@1.0.3: resolution: {integrity: sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==} @@ -3146,38 +3206,38 @@ packages: '@swc/wasm': optional: true - turbo-darwin-64@2.4.4: - resolution: {integrity: sha512-5kPvRkLAfmWI0MH96D+/THnDMGXlFNmjeqNRj5grLKiry+M9pKj3pRuScddAXPdlxjO5Ptz06UNaOQrrYGTx1g==} + turbo-darwin-64@2.5.0: + resolution: {integrity: sha512-fP1hhI9zY8hv0idym3hAaXdPi80TLovmGmgZFocVAykFtOxF+GlfIgM/l4iLAV9ObIO4SUXPVWHeBZQQ+Hpjag==} cpu: [x64] os: [darwin] - turbo-darwin-arm64@2.4.4: - resolution: {integrity: sha512-/gtHPqbGQXDFhrmy+Q/MFW2HUTUlThJ97WLLSe4bxkDrKHecDYhAjbZ4rN3MM93RV9STQb3Tqy4pZBtsd4DfCw==} + turbo-darwin-arm64@2.5.0: + resolution: {integrity: sha512-p9sYq7kXH7qeJwIQE86cOWv/xNqvow846l6c/qWc26Ib1ci5W7V0sI5thsrP3eH+VA0d+SHalTKg5SQXgNQBWA==} cpu: [arm64] os: [darwin] - turbo-linux-64@2.4.4: - resolution: {integrity: sha512-SR0gri4k0bda56hw5u9VgDXLKb1Q+jrw4lM7WAhnNdXvVoep4d6LmnzgMHQQR12Wxl3KyWPbkz9d1whL6NTm2Q==} + turbo-linux-64@2.5.0: + resolution: {integrity: sha512-1iEln2GWiF3iPPPS1HQJT6ZCFXynJPd89gs9SkggH2EJsj3eRUSVMmMC8y6d7bBbhBFsiGGazwFIYrI12zs6uQ==} cpu: [x64] os: [linux] - turbo-linux-arm64@2.4.4: - resolution: {integrity: sha512-COXXwzRd3vslQIfJhXUklgEqlwq35uFUZ7hnN+AUyXx7hUOLIiD5NblL+ETrHnhY4TzWszrbwUMfe2BYWtaPQg==} + turbo-linux-arm64@2.5.0: + resolution: {integrity: sha512-bKBcbvuQHmsX116KcxHJuAcppiiBOfivOObh2O5aXNER6mce7YDDQJy00xQQNp1DhEfcSV2uOsvb3O3nN2cbcA==} cpu: [arm64] os: [linux] - turbo-windows-64@2.4.4: - resolution: {integrity: sha512-PV9rYNouGz4Ff3fd6sIfQy5L7HT9a4fcZoEv8PKRavU9O75G7PoDtm8scpHU10QnK0QQNLbE9qNxOAeRvF0fJg==} + turbo-windows-64@2.5.0: + resolution: {integrity: sha512-9BCo8oQ7BO7J0K913Czbc3tw8QwLqn2nTe4E47k6aVYkM12ASTScweXPTuaPFP5iYXAT6z5Dsniw704Ixa5eGg==} cpu: [x64] os: [win32] - turbo-windows-arm64@2.4.4: - resolution: {integrity: sha512-403sqp9t5sx6YGEC32IfZTVWkRAixOQomGYB8kEc6ZD+//LirSxzeCHCnM8EmSXw7l57U1G+Fb0kxgTcKPU/Lg==} + turbo-windows-arm64@2.5.0: + resolution: {integrity: sha512-OUHCV+ueXa3UzfZ4co/ueIHgeq9B2K48pZwIxKSm5VaLVuv8M13MhM7unukW09g++dpdrrE1w4IOVgxKZ0/exg==} cpu: [arm64] os: [win32] - turbo@2.4.4: - resolution: {integrity: sha512-N9FDOVaY3yz0YCOhYIgOGYad7+m2ptvinXygw27WPLQvcZDl3+0Sa77KGVlLSiuPDChOUEnTKE9VJwLSi9BPGQ==} + turbo@2.5.0: + resolution: {integrity: sha512-PvSRruOsitjy6qdqwIIyolv99+fEn57gP6gn4zhsHTEcCYgXPhv6BAxzAjleS8XKpo+Y582vTTA9nuqYDmbRuA==} hasBin: true type-detect@4.0.8: @@ -3337,6 +3397,10 @@ packages: utf-8-validate: optional: true + xtend@4.0.2: + resolution: {integrity: sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==} + engines: {node: '>=0.4'} + y18n@5.0.8: resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==} engines: {node: '>=10'} @@ -3914,7 +3978,7 @@ snapshots: - encoding - openai - '@langchain/community@0.3.32(@browserbasehq/sdk@2.3.0)(@browserbasehq/stagehand@1.13.0(@playwright/test@1.50.1)(deepmerge@4.3.1)(dotenv@16.4.7)(openai@4.85.3(ws@8.18.1)(zod@3.24.2))(zod@3.24.2))(@ibm-cloud/watsonx-ai@1.5.0(@langchain/core@0.2.36(openai@4.85.3(ws@8.18.1)(zod@3.24.2))))(@langchain/anthropic@0.2.18(openai@4.85.3(ws@8.18.1)(zod@3.24.2)))(@langchain/core@0.2.36(openai@4.85.3(ws@8.18.1)(zod@3.24.2)))(@langchain/google-genai@0.1.8(@langchain/core@0.2.36(openai@4.85.3(ws@8.18.1)(zod@3.24.2)))(zod@3.24.2))(axios@1.7.9)(fast-xml-parser@4.5.3)(handlebars@4.7.8)(html-to-text@9.0.5)(ibm-cloud-sdk-core@5.1.3)(ignore@5.3.2)(jsonwebtoken@9.0.2)(mongodb@6.13.1)(openai@4.85.3(ws@8.18.1)(zod@3.24.2))(playwright@1.50.1)(ws@8.18.1)': + '@langchain/community@0.3.32(@browserbasehq/sdk@2.3.0)(@browserbasehq/stagehand@1.13.0(@playwright/test@1.50.1)(deepmerge@4.3.1)(dotenv@16.4.7)(openai@4.85.3(ws@8.18.1)(zod@3.24.2))(zod@3.24.2))(@ibm-cloud/watsonx-ai@1.5.0(@langchain/core@0.2.36(openai@4.85.3(ws@8.18.1)(zod@3.24.2))))(@langchain/anthropic@0.2.18(openai@4.85.3(ws@8.18.1)(zod@3.24.2)))(@langchain/core@0.2.36(openai@4.85.3(ws@8.18.1)(zod@3.24.2)))(@langchain/google-genai@0.1.8(@langchain/core@0.2.36(openai@4.85.3(ws@8.18.1)(zod@3.24.2)))(zod@3.24.2))(axios@1.7.9)(fast-xml-parser@4.5.3)(handlebars@4.7.8)(html-to-text@9.0.5)(ibm-cloud-sdk-core@5.1.3)(ignore@5.3.2)(jsonwebtoken@9.0.2)(mongodb@6.13.1)(openai@4.85.3(ws@8.18.1)(zod@3.24.2))(pg@8.14.1)(playwright@1.50.1)(ws@8.18.1)': dependencies: '@browserbasehq/stagehand': 1.13.0(@playwright/test@1.50.1)(deepmerge@4.3.1)(dotenv@16.4.7)(openai@4.85.3(ws@8.18.1)(zod@3.24.2))(zod@3.24.2) '@ibm-cloud/watsonx-ai': 1.5.0(@langchain/core@0.2.36(openai@4.85.3(ws@8.18.1)(zod@3.24.2))) @@ -3938,6 +4002,7 @@ snapshots: ignore: 5.3.2 jsonwebtoken: 9.0.2 mongodb: 6.13.1 + pg: 8.14.1 playwright: 1.50.1 ws: 8.18.1 transitivePeerDependencies: @@ -3959,7 +4024,7 @@ snapshots: - peggy - youtube-transcript - '@langchain/community@0.3.32(@browserbasehq/sdk@2.3.0)(@browserbasehq/stagehand@1.13.0(@playwright/test@1.50.1)(deepmerge@4.3.1)(dotenv@16.4.7)(openai@4.85.4(ws@8.18.1)(zod@3.24.2))(zod@3.24.2))(@ibm-cloud/watsonx-ai@1.5.0(@langchain/core@0.2.36(openai@4.85.4(ws@8.18.1)(zod@3.24.2))))(@langchain/anthropic@0.2.18(openai@4.85.4(ws@8.18.1)(zod@3.24.2)))(@langchain/core@0.2.36(openai@4.85.4(ws@8.18.1)(zod@3.24.2)))(@langchain/google-genai@0.1.8(@langchain/core@0.2.36(openai@4.85.4(ws@8.18.1)(zod@3.24.2)))(zod@3.24.2))(axios@1.7.9)(fast-xml-parser@4.5.3)(handlebars@4.7.8)(html-to-text@9.0.5)(ibm-cloud-sdk-core@5.1.3)(ignore@5.3.2)(jsonwebtoken@9.0.2)(mongodb@6.13.1)(openai@4.85.4(ws@8.18.1)(zod@3.24.2))(playwright@1.50.1)(ws@8.18.1)': + '@langchain/community@0.3.32(@browserbasehq/sdk@2.3.0)(@browserbasehq/stagehand@1.13.0(@playwright/test@1.50.1)(deepmerge@4.3.1)(dotenv@16.4.7)(openai@4.85.4(ws@8.18.1)(zod@3.24.2))(zod@3.24.2))(@ibm-cloud/watsonx-ai@1.5.0(@langchain/core@0.2.36(openai@4.85.4(ws@8.18.1)(zod@3.24.2))))(@langchain/anthropic@0.2.18(openai@4.85.4(ws@8.18.1)(zod@3.24.2)))(@langchain/core@0.2.36(openai@4.85.4(ws@8.18.1)(zod@3.24.2)))(@langchain/google-genai@0.1.8(@langchain/core@0.2.36(openai@4.85.4(ws@8.18.1)(zod@3.24.2)))(zod@3.24.2))(axios@1.7.9)(fast-xml-parser@4.5.3)(handlebars@4.7.8)(html-to-text@9.0.5)(ibm-cloud-sdk-core@5.1.3)(ignore@5.3.2)(jsonwebtoken@9.0.2)(mongodb@6.13.1)(openai@4.85.4(ws@8.18.1)(zod@3.24.2))(pg@8.14.1)(playwright@1.50.1)(ws@8.18.1)': dependencies: '@browserbasehq/stagehand': 1.13.0(@playwright/test@1.50.1)(deepmerge@4.3.1)(dotenv@16.4.7)(openai@4.85.4(ws@8.18.1)(zod@3.24.2))(zod@3.24.2) '@ibm-cloud/watsonx-ai': 1.5.0(@langchain/core@0.2.36(openai@4.85.4(ws@8.18.1)(zod@3.24.2))) @@ -3983,6 +4048,7 @@ snapshots: ignore: 5.3.2 jsonwebtoken: 9.0.2 mongodb: 6.13.1 + pg: 8.14.1 playwright: 1.50.1 ws: 8.18.1 transitivePeerDependencies: @@ -6018,6 +6084,41 @@ snapshots: peek-readable@4.1.0: {} + pg-cloudflare@1.1.1: + optional: true + + pg-connection-string@2.7.0: {} + + pg-int8@1.0.1: {} + + pg-pool@3.8.0(pg@8.14.1): + dependencies: + pg: 8.14.1 + + pg-protocol@1.8.0: {} + + pg-types@2.2.0: + dependencies: + pg-int8: 1.0.1 + postgres-array: 2.0.0 + postgres-bytea: 1.0.0 + postgres-date: 1.0.7 + postgres-interval: 1.2.0 + + pg@8.14.1: + dependencies: + pg-connection-string: 2.7.0 + pg-pool: 3.8.0(pg@8.14.1) + pg-protocol: 1.8.0 + pg-types: 2.2.0 + pgpass: 1.0.5 + optionalDependencies: + pg-cloudflare: 1.1.1 + + pgpass@1.0.5: + dependencies: + split2: 4.2.0 + picocolors@1.1.1: {} picomatch@2.3.1: {} @@ -6036,6 +6137,16 @@ snapshots: optionalDependencies: fsevents: 2.3.2 + postgres-array@2.0.0: {} + + postgres-bytea@1.0.0: {} + + postgres-date@1.0.7: {} + + postgres-interval@1.2.0: + dependencies: + xtend: 4.0.2 + prettier@3.5.2: {} pretty-format@29.7.0: @@ -6307,6 +6418,8 @@ snapshots: dependencies: memory-pager: 1.5.0 + split2@4.2.0: {} + sprintf-js@1.0.3: {} stack-trace@0.0.10: {} @@ -6465,32 +6578,32 @@ snapshots: v8-compile-cache-lib: 3.0.1 yn: 3.1.1 - turbo-darwin-64@2.4.4: + turbo-darwin-64@2.5.0: optional: true - turbo-darwin-arm64@2.4.4: + turbo-darwin-arm64@2.5.0: optional: true - turbo-linux-64@2.4.4: + turbo-linux-64@2.5.0: optional: true - turbo-linux-arm64@2.4.4: + turbo-linux-arm64@2.5.0: optional: true - turbo-windows-64@2.4.4: + turbo-windows-64@2.5.0: optional: true - turbo-windows-arm64@2.4.4: + turbo-windows-arm64@2.5.0: optional: true - turbo@2.4.4: + turbo@2.5.0: optionalDependencies: - turbo-darwin-64: 2.4.4 - turbo-darwin-arm64: 2.4.4 - turbo-linux-64: 2.4.4 - turbo-linux-arm64: 2.4.4 - turbo-windows-64: 2.4.4 - turbo-windows-arm64: 2.4.4 + turbo-darwin-64: 2.5.0 + turbo-darwin-arm64: 2.5.0 + turbo-linux-64: 2.5.0 + turbo-linux-arm64: 2.5.0 + turbo-windows-64: 2.5.0 + turbo-windows-arm64: 2.5.0 type-detect@4.0.8: {} @@ -6623,6 +6736,8 @@ snapshots: ws@8.18.1: {} + xtend@4.0.2: {} + y18n@5.0.8: {} yallist@3.1.1: {} From 33c625ed8a76e1d1d48c54d50352acee2a227a77 Mon Sep 17 00:00:00 2001 From: alvinouille Date: Mon, 21 Apr 2025 15:53:38 +0200 Subject: [PATCH 03/10] core: backend's endpoint working with dockerized postgres --- docker-compose.yml | 17 +++++++++-- ingest.dockerfile | 28 ++++++++++------- packages/agents/src/config/settings.ts | 30 +++++-------------- packages/agents/src/db/postgresVectorStore.ts | 23 +++++++------- packages/agents/src/types/index.ts | 27 +++++------------ packages/backend/src/routes/cairocoder.ts | 2 +- packages/ingester/package.json | 3 +- packages/ingester/src/generateEmbeddings.ts | 1 + pnpm-lock.yaml | 3 ++ 9 files changed, 66 insertions(+), 68 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index dbbd0a88..10916ab2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,7 +3,7 @@ version: '3.8' services: postgres: image: pgvector/pgvector:pg17 - container_name: "posgresql" + container_name: "postgresql" shm_size: 1g ports: - 5432:5432 @@ -23,14 +23,25 @@ services: - 3001:3001 extra_hosts: - host.docker.internal:host-gateway - env_file: - - .env depends_on: postgres: condition: service_started restart: unless-stopped networks: - cairo_coder_network + + # ingester: + # build: + # context: . + # dockerfile: ingest.dockerfile + # env_file: + # - .env + # depends_on: + # postgres: + # condition: service_started + # networks: + # - cairo_coder_network + networks: cairo_coder_network: diff --git a/ingest.dockerfile b/ingest.dockerfile index 019f6f0e..b2795f0b 100644 --- a/ingest.dockerfile +++ b/ingest.dockerfile @@ -1,17 +1,25 @@ -FROM node:18 +FROM node:20 WORKDIR /app -# Copy package.json and package-lock.json (if you have one) -COPY package.json package-lock.json* ./ +# Copy root workspace files +COPY pnpm-workspace.yaml ./ +COPY package.json ./ +COPY pnpm-lock.yaml ./ +COPY turbo.json ./ -# Copy the rest of your application code -COPY . . +# Copy backend & agents packages +COPY packages/backend ./packages/backend +COPY packages/ingester ./packages/ingester +COPY packages/agents ./packages/agents -# Install dependencies -RUN yarn install -# Compile TypeScript to JavaScript -RUN yarn build +# Copy shared TypeScript config +COPY packages/typescript-config ./packages/typescript-config + +RUN npm install -g pnpm@9.10.0 +RUN pnpm install --frozen-lockfile +RUN npm install -g turbo # Set the command to run your script -CMD ["node", "dist/scripts/generateEmbeddings.js"] +# Ensure this path is correct relative to the WORKDIR after build +CMD ["turbo", "run", "generate-embeddings:yes"] \ No newline at end of file diff --git a/packages/agents/src/config/settings.ts b/packages/agents/src/config/settings.ts index 04779eb8..20870513 100644 --- a/packages/agents/src/config/settings.ts +++ b/packages/agents/src/config/settings.ts @@ -3,7 +3,7 @@ import fs from 'fs'; import path from 'path'; import toml from '@iarna/toml'; -import { Config, RecursivePartial, PostgresVectorStoreConfig, MongoVectorStoreConfig } from '../types'; +import { Config, RecursivePartial, VectorStoreConfig } from '../types'; const configFileName = 'config.toml'; @@ -45,28 +45,14 @@ export const getGeminiApiKey = () => loadConfig().API_KEYS.GEMINI; export const getVectorDbConfig = () => { const config = loadConfig(); - const dbType = 'postgres'; - - if (dbType === 'postgres') { - return { - type: 'postgres', - COLLECTION_NAME: config.VECTOR_DB.COLLECTION_NAME || 'documents', - } as PostgresVectorStoreConfig; - } else { - // Default to MongoDB - return { - type: 'mongodb', - MONGODB_URI: config.VECTOR_DB.MONGODB_URI || '', - DB_NAME: config.VECTOR_DB.DB_NAME || '', - COLLECTION_NAME: config.VECTOR_DB.COLLECTION_NAME || 'chunks', - } as MongoVectorStoreConfig; - } -}; -// Check if we're using PostgreSQL -export const isPostgresDb = () => { - const config = loadConfig(); - return config.VECTOR_DB.DB_TYPE === 'postgres'; + return { + POSTGRES_USER: config.VECTOR_DB.POSTGRES_USER || '', + POSTGRES_PASSWORD: config.VECTOR_DB.POSTGRES_PASSWORD || '', + POSTGRES_ROOT_DB: config.VECTOR_DB.POSTGRES_ROOT_DB || '', + POSTGRES_HOST: config.VECTOR_DB.POSTGRES_HOST || '', + POSTGRES_PORT: config.VECTOR_DB.POSTGRES_PORT || '', + } as VectorStoreConfig; }; export const updateConfig = (config: RecursivePartial) => { diff --git a/packages/agents/src/db/postgresVectorStore.ts b/packages/agents/src/db/postgresVectorStore.ts index fa605942..668ce47e 100644 --- a/packages/agents/src/db/postgresVectorStore.ts +++ b/packages/agents/src/db/postgresVectorStore.ts @@ -114,18 +114,19 @@ export class VectorStore { embeddings: Embeddings, ): Promise { if (!VectorStore.instance) { - logger.debug('config', config); - logger.debug('process.env.POSTGRES_USER', process.env.POSTGRES_USER); - logger.debug('process.env.POSTGRES_HOST', process.env.POSTGRES_HOST); - logger.debug('process.env.POSTGRES_ROOT_DB', process.env.POSTGRES_ROOT_DB); - logger.debug('process.env.POSTGRES_PASSWORD', process.env.POSTGRES_PASSWORD); - logger.debug('process.env.POSTGRES_PORT', process.env.POSTGRES_PORT); + logger.debug('config DB :', config); + logger.debug('config.POSTGRES_USER : ', config.POSTGRES_USER); + logger.debug('config.POSTGRES_HOST : ', config.POSTGRES_HOST); + logger.debug('config.POSTGRES_ROOT_DB : ', config.POSTGRES_ROOT_DB); + logger.debug('config.POSTGRES_PASSWORD : ', config.POSTGRES_PASSWORD); + logger.debug('config.POSTGRES_PORT : ', config.POSTGRES_PORT); + const pool = new Pool({ - user: process.env.POSTGRES_USER, - host: process.env.POSTGRES_HOST, - database: process.env.POSTGRES_ROOT_DB, - password: process.env.POSTGRES_PASSWORD, - port: parseInt(process.env.POSTGRES_PORT || '5432'), + user: config.POSTGRES_USER, + host: config.POSTGRES_HOST, + database: config.POSTGRES_ROOT_DB, + password: config.POSTGRES_PASSWORD, + port: parseInt(config.POSTGRES_PORT || '5432'), max: 10, min: 5, }); diff --git a/packages/agents/src/types/index.ts b/packages/agents/src/types/index.ts index 115a475e..cbfb429e 100644 --- a/packages/agents/src/types/index.ts +++ b/packages/agents/src/types/index.ts @@ -10,25 +10,12 @@ export interface LLMConfig { fastLLM?: BaseChatModel; } -export interface MongoVectorStoreConfig { - type: 'mongodb'; - MONGODB_URI: string; - DB_NAME: string; - COLLECTION_NAME: string; -} - -export interface PostgresVectorStoreConfig { - type: 'postgres'; - COLLECTION_NAME: string; -} - -export type VectorStoreConfig = MongoVectorStoreConfig | PostgresVectorStoreConfig; - -export interface VectorDBConfig { - DB_TYPE: 'mongodb' | 'postgres'; - MONGODB_URI?: string; - DB_NAME?: string; - COLLECTION_NAME: string; +export interface VectorStoreConfig { + POSTGRES_USER: string; + POSTGRES_PASSWORD: string; + POSTGRES_ROOT_DB: string; + POSTGRES_HOST: string; + POSTGRES_PORT: string; } export interface Config { @@ -36,7 +23,7 @@ export interface Config { PORT: number; SIMILARITY_MEASURE: string; }; - VECTOR_DB: VectorDBConfig; + VECTOR_DB: VectorStoreConfig; API_KEYS: { OPENAI: string; GROQ: string; diff --git a/packages/backend/src/routes/cairocoder.ts b/packages/backend/src/routes/cairocoder.ts index 3a94b051..86edd0cc 100644 --- a/packages/backend/src/routes/cairocoder.ts +++ b/packages/backend/src/routes/cairocoder.ts @@ -173,7 +173,7 @@ router.post('/', async (req, res) => { } }); } catch (error) { - logger.error('Error in /v1/chat/completions:', error); + logger.error('Error in /generate:', error); // Map common errors to OpenAI error format if (error instanceof Error) { diff --git a/packages/ingester/package.json b/packages/ingester/package.json index 415ff643..fc485979 100644 --- a/packages/ingester/package.json +++ b/packages/ingester/package.json @@ -8,10 +8,11 @@ "generate-embeddings:yes": "node ./dist/src/generateEmbeddings.js -y" }, "dependencies": { - "@starknet-agent/agents": "workspace:*", "@antora/lunr-extension": "1.0.0-alpha.10", "@asciidoctor/tabs": "1.0.0-beta.6", + "@iarna/toml": "^2.2.5", "@langchain/core": "^0.2.36", + "@starknet-agent/agents": "workspace:*", "@starknet-agent/backend": "workspace:*", "adm-zip": "^0.5.16", "asciidoctor": "^3.0.4", diff --git a/packages/ingester/src/generateEmbeddings.ts b/packages/ingester/src/generateEmbeddings.ts index 4dcb4ad6..f6f9e0bf 100644 --- a/packages/ingester/src/generateEmbeddings.ts +++ b/packages/ingester/src/generateEmbeddings.ts @@ -37,6 +37,7 @@ async function setupVectorStore(): Promise { try { // Get database configuration const dbConfig = getVectorDbConfig(); + logger.debug('dbConfig', dbConfig); // Load embedding models const embeddingModels = await loadOpenAIEmbeddingsModels(); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 3cf14011..77913679 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -193,6 +193,9 @@ importers: '@asciidoctor/tabs': specifier: 1.0.0-beta.6 version: 1.0.0-beta.6 + '@iarna/toml': + specifier: ^2.2.5 + version: 2.2.5 '@langchain/core': specifier: ^0.2.36 version: 0.2.36(openai@4.85.4(ws@8.18.1)(zod@3.24.2)) From 32a7319c8d54e2f9c661c87083a58d0550feeb1c Mon Sep 17 00:00:00 2001 From: alvinouille Date: Mon, 21 Apr 2025 16:58:28 +0200 Subject: [PATCH 04/10] core: ingester dockerized --- docker-compose.yml | 21 ++- ingest.dockerfile | 7 + packages/agents/src/db/postgresVectorStore.ts | 20 +-- packages/agents/src/db/vectorStore.ts | 164 ------------------ packages/ingester/src/generateEmbeddings.ts | 9 - 5 files changed, 22 insertions(+), 199 deletions(-) delete mode 100644 packages/agents/src/db/vectorStore.ts diff --git a/docker-compose.yml b/docker-compose.yml index 10916ab2..5abcc817 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -30,17 +30,16 @@ services: networks: - cairo_coder_network - # ingester: - # build: - # context: . - # dockerfile: ingest.dockerfile - # env_file: - # - .env - # depends_on: - # postgres: - # condition: service_started - # networks: - # - cairo_coder_network + ingester: + build: + context: . + dockerfile: ingest.dockerfile + profiles: ["ingester"] + depends_on: + postgres: + condition: service_started + networks: + - cairo_coder_network networks: cairo_coder_network: diff --git a/ingest.dockerfile b/ingest.dockerfile index b2795f0b..d90b2dee 100644 --- a/ingest.dockerfile +++ b/ingest.dockerfile @@ -20,6 +20,13 @@ RUN npm install -g pnpm@9.10.0 RUN pnpm install --frozen-lockfile RUN npm install -g turbo +# Install Antora +RUN npm install -g @antora/cli @antora/site-generator + +# Install mdbook +RUN curl -L https://github.com/rust-lang/mdBook/releases/download/v0.4.36/mdbook-v0.4.36-x86_64-unknown-linux-gnu.tar.gz | tar xz && \ + mv mdbook /usr/local/bin/ + # Set the command to run your script # Ensure this path is correct relative to the WORKDIR after build CMD ["turbo", "run", "generate-embeddings:yes"] \ No newline at end of file diff --git a/packages/agents/src/db/postgresVectorStore.ts b/packages/agents/src/db/postgresVectorStore.ts index 668ce47e..a14ba5c8 100644 --- a/packages/agents/src/db/postgresVectorStore.ts +++ b/packages/agents/src/db/postgresVectorStore.ts @@ -114,23 +114,16 @@ export class VectorStore { embeddings: Embeddings, ): Promise { if (!VectorStore.instance) { - logger.debug('config DB :', config); - logger.debug('config.POSTGRES_USER : ', config.POSTGRES_USER); - logger.debug('config.POSTGRES_HOST : ', config.POSTGRES_HOST); - logger.debug('config.POSTGRES_ROOT_DB : ', config.POSTGRES_ROOT_DB); - logger.debug('config.POSTGRES_PASSWORD : ', config.POSTGRES_PASSWORD); - logger.debug('config.POSTGRES_PORT : ', config.POSTGRES_PORT); - const pool = new Pool({ user: config.POSTGRES_USER, host: config.POSTGRES_HOST, database: config.POSTGRES_ROOT_DB, password: config.POSTGRES_PASSWORD, - port: parseInt(config.POSTGRES_PORT || '5432'), + port: parseInt(config.POSTGRES_PORT), max: 10, min: 5, }); - logger.debug('pool', pool); + pool.on('error', (err) => { logger.error('Postgres pool error:', err); }); @@ -141,7 +134,6 @@ export class VectorStore { // Create instance first, then initialize DB VectorStore.instance = new VectorStore(pool, embeddings, tableName); - logger.debug('VectorStore.instance', VectorStore.instance); await VectorStore.instance.initializeDb(); } return VectorStore.instance; @@ -156,7 +148,6 @@ export class VectorStore { try { // Enable vector extension await client.query('CREATE EXTENSION IF NOT EXISTS vector;'); - logger.debug('CREATE EXTENSION IF NOT EXISTS vector;'); // Create documents table if it doesn't exist await client.query(` CREATE TABLE IF NOT EXISTS ${this.tableName} ( @@ -170,18 +161,17 @@ export class VectorStore { UNIQUE(uniqueId) ); `); - logger.debug('CREATE TABLE IF NOT EXISTS ${this.tableName} (id SERIAL PRIMARY KEY, content TEXT NOT NULL, metadata JSONB NOT NULL, embedding vector(1536) NOT NULL, uniqueId VARCHAR(255), contentHash VARCHAR(255), source VARCHAR(50), UNIQUE(uniqueId));'); + // Create index on source for filtering await client.query(` - CREATE INDEX IF NOT EXISTS idx_${this.tableName}_source ON ${this.tableName} (source); + CREATE INDEX IF NOT EXISTS idxlogger.debug('CREATE INDEX IF NOT EXISTS idx_${this.tableName}_embedding ON ${this.tableName} USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);');_${this.tableName}_source ON ${this.tableName} (source); `); - logger.debug('CREATE INDEX IF NOT EXISTS idx_${this.tableName}_source ON ${this.tableName} (source);'); + // Create vector index for similarity search await client.query(` CREATE INDEX IF NOT EXISTS idx_${this.tableName}_embedding ON ${this.tableName} USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); `); - logger.debug('CREATE INDEX IF NOT EXISTS idx_${this.tableName}_embedding ON ${this.tableName} USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);'); logger.info('PostgreSQL database initialized'); } finally { client.release(); diff --git a/packages/agents/src/db/vectorStore.ts b/packages/agents/src/db/vectorStore.ts deleted file mode 100644 index ac85839d..00000000 --- a/packages/agents/src/db/vectorStore.ts +++ /dev/null @@ -1,164 +0,0 @@ -// import { MongoDBAtlasVectorSearch } from '@langchain/mongodb'; -// import { MongoClient, Collection, ObjectId, Filter } from 'mongodb'; -// import { DocumentInterface } from '@langchain/core/documents'; -// import { OpenAIEmbeddings } from '@langchain/openai'; -// import { Embeddings } from '@langchain/core/embeddings'; -// import { logger } from '../utils'; -// import { VectorStoreConfig } from '../types'; -// import { DocumentSource } from '../types'; - -// /** -// * VectorStore class for managing document storage and similarity search -// */ -// export class VectorStore { -// private static instance: VectorStore | null = null; -// private client: MongoClient; -// collection: Collection; -// private vectorSearch: MongoDBAtlasVectorSearch; - -// private constructor( -// client: MongoClient, -// collection: Collection, -// vectorSearch: MongoDBAtlasVectorSearch, -// ) { -// this.client = client; -// this.collection = collection; -// this.vectorSearch = vectorSearch; -// } - -// static async getInstance( -// config: VectorStoreConfig, -// embeddings: Embeddings, -// ): Promise { -// if (!VectorStore.instance) { -// const client = new MongoClient(config.MONGODB_URI, { -// maxPoolSize: 10, // Adjust this value based on your needs -// minPoolSize: 5, -// }); -// await client.connect(); -// logger.info('Connected to MongoDB'); - -// const collection = client -// .db(config.DB_NAME) -// .collection(config.COLLECTION_NAME); - -// const vectorSearch = new MongoDBAtlasVectorSearch(embeddings, { -// collection, -// indexName: 'default', -// textKey: 'content', -// embeddingKey: 'embedding', -// }); - -// VectorStore.instance = new VectorStore(client, collection, vectorSearch); -// } -// return VectorStore.instance; -// } - -// /** -// * Perform similarity search -// * @param query - The query string -// * @param k - Number of results to return -// * @param sources - Optional source filter -// * @returns Promise -// */ -// async similaritySearch( -// query: string, -// k: number = 5, -// sources: DocumentSource | DocumentSource[], -// ): Promise { -// if (!sources) { -// return this.vectorSearch.similaritySearch(query, k); -// } - -// const sourcesArray = Array.isArray(sources) ? sources : [sources]; -// const filter: Filter = { -// preFilter: { source: { $in: sourcesArray } }, -// }; - -// return this.vectorSearch.similaritySearch(query, k, filter); -// } - -// /** -// * Add documents to the vector store -// * @param documents - Array of documents to add -// * @param uniqueIds - Optional array of unique IDs for the documents -// * @returns Promise -// */ -// async addDocuments(documents: any[], uniqueIds?: string[]): Promise { -// logger.info(`Adding ${documents.length} documents to the vector store`); -// await this.vectorSearch.addDocuments(documents, { ids: uniqueIds }); -// } - -// /** -// * Find a specific book chunk by name -// * @param name - Name of the book chunk -// * @returns Promise -// */ -// async findBookChunk(name: string): Promise { -// try { -// const match = await this.collection.findOne({ -// _id: name as unknown as ObjectId, -// }); -// if (match) { -// return { -// metadata: { _id: name, contentHash: match.contentHash }, -// pageContent: match.text, -// }; -// } -// return null; -// } catch (error) { -// logger.error('Error finding book chunk:', error); -// throw error; -// } -// } - -// /** -// * Remove book pages by their unique IDs -// * @param uniqueIds - Array of unique IDs to remove -// * @param source - Optional source filter -// * @returns Promise -// */ -// async removeBookPages( -// uniqueIds: string[], -// source: DocumentSource, -// ): Promise { -// const filter: Filter = { -// uniqueId: { $in: uniqueIds }, -// source: { $in: [source] }, -// }; - -// logger.info('Removing book pages with filter', filter); -// await this.collection.deleteMany(filter); -// } - -// /** -// * Get hashes of stored book pages -// * @param source - Optional source filter -// * @returns Promise> -// */ -// async getStoredBookPagesHashes( -// source: DocumentSource, -// ): Promise> { -// const filter: Filter = { source: { $in: [source] } }; -// const documents = await this.collection -// .find(filter, { projection: { uniqueId: 1, contentHash: 1 } }) -// .toArray(); - -// return documents.map((doc) => ({ -// uniqueId: doc.uniqueId, -// contentHash: doc.contentHash, -// })); -// } - -// /** -// * Close the connection to the database -// * @returns Promise -// */ -// async close(): Promise { -// logger.info('Disconnecting from MongoDB'); -// if (this.client) { -// await this.client.close(true); // Force close all connections in the pool -// VectorStore.instance = null; // Reset the singleton instance -// } -// } -// } diff --git a/packages/ingester/src/generateEmbeddings.ts b/packages/ingester/src/generateEmbeddings.ts index f6f9e0bf..efea57b4 100644 --- a/packages/ingester/src/generateEmbeddings.ts +++ b/packages/ingester/src/generateEmbeddings.ts @@ -1,4 +1,3 @@ -import dotenv from 'dotenv'; import { createInterface } from 'readline'; import { logger } from '@starknet-agent/agents/utils/index'; import { VectorStore } from '@starknet-agent/agents/db/postgresVectorStore'; @@ -7,10 +6,6 @@ import { loadOpenAIEmbeddingsModels } from '@starknet-agent/backend/config/provi import { DocumentSource } from '@starknet-agent/agents/types/index'; import { IngesterFactory } from './IngesterFactory'; -/** - * Initialize environment variables - */ -dotenv.config(); /** * Global vector store instance @@ -38,12 +33,8 @@ async function setupVectorStore(): Promise { // Get database configuration const dbConfig = getVectorDbConfig(); - logger.debug('dbConfig', dbConfig); - // Load embedding models const embeddingModels = await loadOpenAIEmbeddingsModels(); - logger.debug('embeddingModels', embeddingModels); const textEmbedding3Large = embeddingModels['Text embedding 3 large']; - logger.debug('textEmbedding3Large', textEmbedding3Large); if (!textEmbedding3Large) { throw new Error('Text embedding 3 large model not found'); From cc029b307b6c92732c2748cc254a087aaf9b8141 Mon Sep 17 00:00:00 2001 From: alvinouille Date: Mon, 21 Apr 2025 17:38:56 +0200 Subject: [PATCH 05/10] remove vectorOid --- packages/agents/src/db/postgresVectorStore.ts | 6 ------ 1 file changed, 6 deletions(-) diff --git a/packages/agents/src/db/postgresVectorStore.ts b/packages/agents/src/db/postgresVectorStore.ts index a14ba5c8..a449af3c 100644 --- a/packages/agents/src/db/postgresVectorStore.ts +++ b/packages/agents/src/db/postgresVectorStore.ts @@ -101,12 +101,6 @@ export class VectorStore { this.pool = pool; this.embeddings = embeddings; this.tableName = tableName; - - // Register vector type parser to handle vector data types - const vectorOid = 16428; // pgvector's OID, may need to be adjusted - pg.types.setTypeParser(vectorOid, (val) => { - return val ? JSON.parse(val) : null; - }); } static async getInstance( From 1a2dcdabb9b62d4e680fd858ae162feb1c39a82e Mon Sep 17 00:00:00 2001 From: alvinouille Date: Wed, 23 Apr 2025 18:02:08 +0200 Subject: [PATCH 06/10] fix: little error in postgresVectorStore --- docker-compose.yml | 4 +-- packages/agents/sample.config.toml | 30 ------------------- packages/agents/src/db/postgresVectorStore.ts | 5 +--- packages/agents/src/index.ts | 2 +- 4 files changed, 4 insertions(+), 37 deletions(-) delete mode 100644 packages/agents/sample.config.toml diff --git a/docker-compose.yml b/docker-compose.yml index 5abcc817..31a84ba0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,10 +5,10 @@ services: image: pgvector/pgvector:pg17 container_name: "postgresql" shm_size: 1g + env_file: + - .env ports: - 5432:5432 - env_file: - - .env volumes: - ./data:/var/lib/postgresql/data restart: unless-stopped diff --git a/packages/agents/sample.config.toml b/packages/agents/sample.config.toml deleted file mode 100644 index f86c06b6..00000000 --- a/packages/agents/sample.config.toml +++ /dev/null @@ -1,30 +0,0 @@ -[API_KEYS] -OPENAI = "" -GROQ = "" -ANTHROPIC = "" -DEEPSEEK = "" -GEMINI = "" - - -[VECTOR_DB] -MONGODB_URI = "" -DB_NAME = "" -COLLECTION_NAME = "" - -[GENERAL] -PORT = 3_001 -SIMILARITY_MEASURE = "cosine" - -[HOSTED_MODE] -# deepseek | anthropic | openai | gemini -DEFAULT_CHAT_PROVIDER = "gemini" -# Claude 3.5 Sonnet | DeepSeek Chat | Gemini Flash -DEFAULT_CHAT_MODEL = "Gemini Flash" -DEFAULT_FAST_CHAT_PROVIDER = "gemini" -DEFAULT_FAST_CHAT_MODEL = "Gemini Flash" -DEFAULT_EMBEDDING_PROVIDER = "openai" -DEFAULT_EMBEDDING_MODEL = "Text embedding 3 large" - -[VERSIONS] -STARKNET_FOUNDRY = "0.37.0" -SCARB = "2.9.2" \ No newline at end of file diff --git a/packages/agents/src/db/postgresVectorStore.ts b/packages/agents/src/db/postgresVectorStore.ts index a449af3c..beef5d38 100644 --- a/packages/agents/src/db/postgresVectorStore.ts +++ b/packages/agents/src/db/postgresVectorStore.ts @@ -4,9 +4,6 @@ import { logger } from '../utils'; import { VectorStoreConfig, DocumentSource } from '../types'; import pg, { Pool, PoolClient } from 'pg'; import { DatabaseError as PgError } from 'pg'; -import * as dotenv from 'dotenv'; -import * as path from 'path'; -import * as fs from 'fs'; /** @@ -158,7 +155,7 @@ export class VectorStore { // Create index on source for filtering await client.query(` - CREATE INDEX IF NOT EXISTS idxlogger.debug('CREATE INDEX IF NOT EXISTS idx_${this.tableName}_embedding ON ${this.tableName} USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);');_${this.tableName}_source ON ${this.tableName} (source); + CREATE INDEX IF NOT EXISTS idx_${this.tableName}_source ON ${this.tableName} (source); `); // Create vector index for similarity search diff --git a/packages/agents/src/index.ts b/packages/agents/src/index.ts index 8fa80d54..99ba3979 100644 --- a/packages/agents/src/index.ts +++ b/packages/agents/src/index.ts @@ -2,5 +2,5 @@ export * from './types'; export * from './config/settings'; export * from './config/agent'; export * from './core/agentFactory'; -export * from './db/vectorStore'; +export * from './db/postgresVectorStore'; export * from './utils'; \ No newline at end of file From 13c6f6e873e8d597912ac0a76df775659db1fe2e Mon Sep 17 00:00:00 2001 From: alvinouille Date: Wed, 23 Apr 2025 18:30:03 +0200 Subject: [PATCH 07/10] fix: improve readme with new changes --- README.md | 84 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 48 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index ed52f620..4160a491 100644 --- a/README.md +++ b/README.md @@ -60,41 +60,12 @@ There are mainly 2 ways of installing Cairo Coder - With Docker, Without Docker. pnpm install ``` -5. Setup your database on [MongoDB Atlas](https://www.mongodb.com/products/platform/atlas-vector-search). - - - Create a new cluster. - - Create a new database, e.g. `cairo-coder`. - - Create a new collection inside the database that will store the embeddings. e.g. `chunks`. - - Create a vectorSearch index named **default** on the collection (tab `Atlas Search`). Example index configuration: - ```json - { - "fields": [ - { - "numDimensions": 2048, - "path": "embedding", - "similarity": "cosine", - "type": "vector" - }, - { - "path": "source", - "type": "filter" - } - ] - } - ``` - -6. Inside the packages/agents package, copy the `sample.config.toml` file to a `config.toml`. For development setups, you need only fill in the following fields: + +5. Inside the packages/agents package, copy the `sample.config.toml` file to a `config.toml`. For development setups, you need only fill in the following fields: - `OPENAI`: Your OpenAI API key. **You only need to fill this if you wish to use OpenAI's models**. - `ANTHROPIC`: Your Anthropic API key. **You only need to fill this if you wish to use Anthropic models**. - `SIMILARITY_MEASURE`: The similarity measure to use (This is filled by default; you can leave it as is if you are unsure about it.) - - Databases: `VECTOR_DB` is the database for Cairo documentation. You will need to fill this with your own database URL. Example: - ```toml - [VECTOR_DB] - MONGODB_URI = "mongodb+srv://mongo:..." - DB_NAME = "cairo-coder" - COLLECTION_NAME = "chunks" - ``` - Models: The `[HOSTED_MODE]` table defines the underlying LLM model used. We recommend using: ```toml @@ -105,21 +76,62 @@ There are mainly 2 ways of installing Cairo Coder - With Docker, Without Docker. DEFAULT_EMBEDDING_MODEL = "Text embedding 3 large" ``` -7. Generate the embeddings for the databases. You can do this by running `pnpm generate-embeddings`. If you followed the example above, you will need to run the script with option `6 (Everything)` to generate embeddings for all the documentation sources. +6. **Configure PostgreSQL Database** - ```bash - pnpm generate-embeddings + Cairo Coder uses PostgreSQL with pgvector for storing and retrieving vector embeddings. You need to configure both the database initialization and the application connection settings: + + **a. Database Container Initialization** (`.env` file): + + Create a `.env` file in the root directory with the following PostgreSQL configuration: + + ``` + POSTGRES_USER="YOUR_POSTGRES_USER" + POSTGRES_PASSWORD="YOUR_POSTGRES_PASSWORD" + POSTGRES_ROOT_DB="YOUR_POSTGRES_ROOT_DB" + POSTGRES_HOST="localhost" + POSTGRES_PORT="5432" + ``` + + This file is used by Docker to initialize the PostgreSQL container when it first starts. + The `POSTGRES_HOST` is set to "localhost" because this is from the database's own perspective. + + **b. Application Connection Settings** (`config.toml` file): + + In the `packages/agents/config.toml` file, configure the database connection section: + + ```toml + [VECTOR_DB] + POSTGRES_USER="YOUR_POSTGRES_USER" + POSTGRES_PASSWORD="YOUR_POSTGRES_PASSWORD" + POSTGRES_ROOT_DB="YOUR_POSTGRES_ROOT_DB" + POSTGRES_HOST="postgres" + POSTGRES_PORT="5432" ``` + + This configuration is used by the backend and ingester services to connect to the database. + Note that `POSTGRES_HOST` is set to "postgres", which is the service name in docker-compose.yml. + + **Important:** Make sure to use the same password in both files. The first file initializes the + database, while the second is used by your application to connect to it. -8. Run the application using one of the following methods: +7. Run the application using one of the following methods: ```bash docker-compose up --build ``` -9. The API will be available at http://localhost:3000/generate. +8. The API will be available at http://localhost:3000/generate. + +## Running the Ingester + +After you have the main application running, you might need to run the ingester to process and embed documentation from various sources. The ingester is configured as a separate profile in the docker-compose file and can be executed as follows: + + ```bash + docker-compose --profile ingester up ingester + ``` +Once the ingester completes its task, the vector database will be populated with embeddings from all the supported documentation sources, making them available for RAG-based code generation requests to the API. ## API Usage From c5eb8152c3f470c59a6d96317c3fa5cc3de31673 Mon Sep 17 00:00:00 2001 From: alvinouille Date: Wed, 23 Apr 2025 18:59:45 +0200 Subject: [PATCH 08/10] fix: packahe name to cairo-coder --- .cursor/rules/imports.mdc | 8 ++++---- .cursor/rules/project_instructions.mdc | 8 ++++---- packages/agents/package.json | 4 ++-- packages/agents/tsconfig.json | 2 +- packages/backend/__tests__/server.test.ts | 2 +- packages/backend/package.json | 6 +++--- packages/backend/src/app.ts | 2 +- packages/backend/src/config/http.ts | 2 +- packages/backend/src/config/llm.ts | 4 ++-- packages/backend/src/config/provider/anthropic.ts | 4 ++-- packages/backend/src/config/provider/deepseek.ts | 4 ++-- packages/backend/src/config/provider/gemini.ts | 4 ++-- packages/backend/src/config/provider/groq.ts | 4 ++-- packages/backend/src/config/provider/index.ts | 2 +- packages/backend/src/config/provider/openai.ts | 4 ++-- packages/backend/src/routes/cairocoder.ts | 4 ++-- packages/backend/src/server.ts | 4 ++-- packages/backend/src/types/index.ts | 4 ++-- packages/backend/tsconfig.json | 2 +- packages/ingester/__tests__/AsciiDocIngester.test.ts | 2 +- packages/ingester/__tests__/IngesterFactory.test.ts | 2 +- packages/ingester/__tests__/MarkdownIngester.test.ts | 2 +- packages/ingester/package.json | 8 ++++---- packages/ingester/src/BaseIngester.ts | 6 +++--- packages/ingester/src/IngesterFactory.ts | 2 +- packages/ingester/src/generateEmbeddings.ts | 10 +++++----- packages/ingester/src/ingesters/AsciiDocIngester.ts | 4 ++-- packages/ingester/src/ingesters/CairoBookIngester.ts | 2 +- .../ingester/src/ingesters/CairoByExampleIngester.ts | 2 +- packages/ingester/src/ingesters/MarkdownIngester.ts | 4 ++-- .../src/ingesters/OpenZeppelinDocsIngester.ts | 4 ++-- .../ingester/src/ingesters/StarknetDocsIngester.ts | 2 +- .../src/ingesters/StarknetFoundryIngester.ts | 4 ++-- packages/ingester/src/shared.ts | 6 +++--- packages/ingester/src/utils/fileUtils.ts | 2 +- packages/ingester/src/utils/vectorStoreUtils.ts | 6 +++--- packages/ingester/tsconfig.json | 2 +- packages/typescript-config/package.json | 2 +- pnpm-lock.yaml | 12 ++++++------ 39 files changed, 79 insertions(+), 79 deletions(-) diff --git a/.cursor/rules/imports.mdc b/.cursor/rules/imports.mdc index 4c4fe415..58bee1a0 100644 --- a/.cursor/rules/imports.mdc +++ b/.cursor/rules/imports.mdc @@ -65,7 +65,7 @@ globs: *.ts,*.tsx,*.js,*.jsx - Import: `import { LLMConfig } from '../types';` - `utils`: Utility functions. - Used in: `packages/backend/src/app.ts` - - Import: `import { logger } from '@starknet-agent/agents/utils/index';` + - Import: `import { logger } from '@cairo-coder/agents/utils/index';` ### Backend Modules - `routes`: API route definitions. @@ -74,7 +74,7 @@ globs: *.ts,*.tsx,*.js,*.jsx - `config`: Server configuration. - Used in: `packages/backend/src/server.ts` - Import: `import { initializeLLMConfig } from './config/llm';` - - Import: `import { getPort } from '@starknet-agent/agents/config/settings';` + - Import: `import { getPort } from '@cairo-coder/agents/config/settings';` - `cairocoder`: Main endpoint handler. - Used in: `packages/backend/src/routes/index.ts` - Import: `import cairocoderRouter from './cairocoder';` @@ -104,7 +104,7 @@ import { RagAgentFactory, LLMConfig, VectorStore, -} from '@starknet-agent/agents'; +} from '@cairo-coder/agents'; ``` ### For Agent Core @@ -122,7 +122,7 @@ import { LLMConfig } from '../types'; import * as fs from 'fs/promises'; import * as path from 'path'; import { Document } from '@langchain/core/documents'; -import { BookChunk, DocumentSource } from '@starknet-agent/agents/types/index'; +import { BookChunk, DocumentSource } from '@cairo-coder/agents/types/index'; import { BaseIngester } from '../BaseIngester'; import { BookConfig, BookPageDto, ParsedSection } from '../utils/types'; ``` diff --git a/.cursor/rules/project_instructions.mdc b/.cursor/rules/project_instructions.mdc index b76bb1d0..edb02ece 100644 --- a/.cursor/rules/project_instructions.mdc +++ b/.cursor/rules/project_instructions.mdc @@ -53,11 +53,11 @@ globs: - Add new documentation sources by extending the `BaseIngester` class and registering in `IngesterFactory` ## Commands -- **Build**: `pnpm build`, `pnpm --filter @starknet-agent/agents build` +- **Build**: `pnpm build`, `pnpm --filter @cairo-coder/agents build` - **Dev**: `pnpm dev` (starts all services with auto-reload) -- **Test**: `pnpm --filter @starknet-agent/agents test` -- **Single test**: `pnpm --filter @starknet-agent/agents test -- -t "test name pattern"` -- **Type check**: `pnpm --filter @starknet-agent/backend check-types` +- **Test**: `pnpm --filter @cairo-coder/agents test` +- **Single test**: `pnpm --filter @cairo-coder/agents test -- -t "test name pattern"` +- **Type check**: `pnpm --filter @cairo-coder/backend check-types` ## Testing - Jest is used for all testing diff --git a/packages/agents/package.json b/packages/agents/package.json index ebf21ab0..156ec76f 100644 --- a/packages/agents/package.json +++ b/packages/agents/package.json @@ -1,5 +1,5 @@ { - "name": "@starknet-agent/agents", + "name": "@cairo-coder/agents", "version": "1.0.0", "private": true, "scripts": { @@ -36,7 +36,7 @@ } }, "devDependencies": { - "@starknet-agent/typescript-config": "workspace:*", + "@cairo-coder/typescript-config": "workspace:*", "@types/jest": "^29.5.14", "@types/supertest": "^6.0.2", "jest": "^29.7.0", diff --git a/packages/agents/tsconfig.json b/packages/agents/tsconfig.json index e0095499..09e367f7 100644 --- a/packages/agents/tsconfig.json +++ b/packages/agents/tsconfig.json @@ -1,5 +1,5 @@ { - "extends": "@starknet-agent/typescript-config/base.json", + "extends": "@cairo-coder/typescript-config/base.json", "compilerOptions": { "outDir": "dist", "rootDir": "src", diff --git a/packages/backend/__tests__/server.test.ts b/packages/backend/__tests__/server.test.ts index c876ff14..6c3dbd96 100644 --- a/packages/backend/__tests__/server.test.ts +++ b/packages/backend/__tests__/server.test.ts @@ -14,7 +14,7 @@ describe('Server', () => { })); // Mock config to avoid the getStarknetFoundryVersion issue - jest.mock('@starknet-agent/agents/config/settings', () => ({ + jest.mock('@cairo-coder/agents/config/settings', () => ({ getPort: jest.fn().mockReturnValue(3001), getStarknetFoundryVersion: jest.fn().mockReturnValue('0.1.0'), getScarbVersion: jest.fn().mockReturnValue('0.1.0'), diff --git a/packages/backend/package.json b/packages/backend/package.json index 07252092..8d7ebaaf 100644 --- a/packages/backend/package.json +++ b/packages/backend/package.json @@ -1,5 +1,5 @@ { - "name": "@starknet-agent/backend", + "name": "@cairo-coder/backend", "version": "1.0.0", "private": true, "scripts": { @@ -18,7 +18,7 @@ "@langchain/core": "^0.2.36", "@langchain/google-genai": "^0.1.8", "@langchain/openai": "^0.0.25", - "@starknet-agent/agents": "workspace:*", + "@cairo-coder/agents": "workspace:*", "@types/node": "^20", "cors": "^2.8.5", "express": "^4.21.2", @@ -34,7 +34,7 @@ } }, "devDependencies": { - "@starknet-agent/typescript-config": "workspace:*", + "@cairo-coder/typescript-config": "workspace:*", "@types/cors": "^2.8.17", "@types/express": "^4.17.21", "@types/jest": "^29.5.14", diff --git a/packages/backend/src/app.ts b/packages/backend/src/app.ts index ce3f0d79..41492e3e 100644 --- a/packages/backend/src/app.ts +++ b/packages/backend/src/app.ts @@ -1,5 +1,5 @@ import { createApplication } from './server'; -import { logger } from '@starknet-agent/agents/utils/index'; +import { logger } from '@cairo-coder/agents/utils/index'; // Error handling for uncaught exceptions process.on('uncaughtException', (err, origin) => { diff --git a/packages/backend/src/config/http.ts b/packages/backend/src/config/http.ts index 51584199..677407ab 100644 --- a/packages/backend/src/config/http.ts +++ b/packages/backend/src/config/http.ts @@ -1,6 +1,6 @@ import express from 'express'; import routes from '../routes'; -import { logger } from '@starknet-agent/agents/utils/index'; +import { logger } from '@cairo-coder/agents/utils/index'; import { Container } from './context'; export function initializeHTTP(app: express.Application, container: Container) { diff --git a/packages/backend/src/config/llm.ts b/packages/backend/src/config/llm.ts index f6a91f43..4f6a820f 100644 --- a/packages/backend/src/config/llm.ts +++ b/packages/backend/src/config/llm.ts @@ -2,8 +2,8 @@ import { getAvailableEmbeddingModelProviders, getAvailableChatModelProviders, } from './provider'; -import { getHostedModeConfig } from '@starknet-agent/agents/config/settings'; -import { logger } from '@starknet-agent/agents/utils/index'; +import { getHostedModeConfig } from '@cairo-coder/agents/config/settings'; +import { logger } from '@cairo-coder/agents/utils/index'; import { ModelConfig } from '../types'; let modelConfig: ModelConfig | null = null; diff --git a/packages/backend/src/config/provider/anthropic.ts b/packages/backend/src/config/provider/anthropic.ts index cac61f3d..07e28129 100644 --- a/packages/backend/src/config/provider/anthropic.ts +++ b/packages/backend/src/config/provider/anthropic.ts @@ -1,6 +1,6 @@ import { ChatAnthropic } from '@langchain/anthropic'; -import { getAnthropicApiKey } from '@starknet-agent/agents/config/settings'; -import { logger } from '@starknet-agent/agents/utils/index'; +import { getAnthropicApiKey } from '@cairo-coder/agents/config/settings'; +import { logger } from '@cairo-coder/agents/utils/index'; export const loadAnthropicChatModels = async () => { const anthropicApiKey = getAnthropicApiKey(); diff --git a/packages/backend/src/config/provider/deepseek.ts b/packages/backend/src/config/provider/deepseek.ts index aee50fb1..8adc7270 100644 --- a/packages/backend/src/config/provider/deepseek.ts +++ b/packages/backend/src/config/provider/deepseek.ts @@ -1,5 +1,5 @@ -import { getDeepseekApiKey } from '@starknet-agent/agents/config/settings'; -import { logger } from '@starknet-agent/agents/utils/index'; +import { getDeepseekApiKey } from '@cairo-coder/agents/config/settings'; +import { logger } from '@cairo-coder/agents/utils/index'; import { ChatOpenAI, OpenAI } from '@langchain/openai'; export const loadDeepseekChatModels = async () => { diff --git a/packages/backend/src/config/provider/gemini.ts b/packages/backend/src/config/provider/gemini.ts index 25c4bb53..b4284ebd 100644 --- a/packages/backend/src/config/provider/gemini.ts +++ b/packages/backend/src/config/provider/gemini.ts @@ -1,5 +1,5 @@ -import { getGeminiApiKey } from '@starknet-agent/agents/config/settings'; -import { logger } from '@starknet-agent/agents/utils/index'; +import { getGeminiApiKey } from '@cairo-coder/agents/config/settings'; +import { logger } from '@cairo-coder/agents/utils/index'; import { ChatGoogleGenerativeAI } from '@langchain/google-genai'; export const loadGeminiChatModels = async () => { diff --git a/packages/backend/src/config/provider/groq.ts b/packages/backend/src/config/provider/groq.ts index 59a24951..f46827da 100644 --- a/packages/backend/src/config/provider/groq.ts +++ b/packages/backend/src/config/provider/groq.ts @@ -1,6 +1,6 @@ import { ChatOpenAI } from '@langchain/openai'; -import { getGroqApiKey } from '@starknet-agent/agents/config/settings'; -import { logger } from '@starknet-agent/agents/utils/index'; +import { getGroqApiKey } from '@cairo-coder/agents/config/settings'; +import { logger } from '@cairo-coder/agents/utils/index'; export const loadGroqChatModels = async () => { const groqApiKey = getGroqApiKey(); diff --git a/packages/backend/src/config/provider/index.ts b/packages/backend/src/config/provider/index.ts index 8a70d5a6..a454b9e8 100644 --- a/packages/backend/src/config/provider/index.ts +++ b/packages/backend/src/config/provider/index.ts @@ -2,7 +2,7 @@ import { loadGroqChatModels } from './groq'; import { loadOpenAIChatModels, loadOpenAIEmbeddingsModels } from './openai'; import { loadAnthropicChatModels } from './anthropic'; import { loadDeepseekChatModels } from './deepseek'; -import { getHostedModeConfig } from '@starknet-agent/agents/config/settings'; +import { getHostedModeConfig } from '@cairo-coder/agents/config/settings'; import { loadGeminiChatModels } from './gemini'; const chatModelProviders = { diff --git a/packages/backend/src/config/provider/openai.ts b/packages/backend/src/config/provider/openai.ts index 02fe7c4f..de6a49a1 100644 --- a/packages/backend/src/config/provider/openai.ts +++ b/packages/backend/src/config/provider/openai.ts @@ -1,6 +1,6 @@ import { ChatOpenAI, OpenAIEmbeddings } from '@langchain/openai'; -import { getOpenaiApiKey } from '@starknet-agent/agents/config/settings'; -import { logger } from '@starknet-agent/agents/utils/index'; +import { getOpenaiApiKey } from '@cairo-coder/agents/config/settings'; +import { logger } from '@cairo-coder/agents/utils/index'; export const loadOpenAIChatModels = async () => { const openAIApiKey = getOpenaiApiKey(); diff --git a/packages/backend/src/routes/cairocoder.ts b/packages/backend/src/routes/cairocoder.ts index 86edd0cc..ba0da816 100644 --- a/packages/backend/src/routes/cairocoder.ts +++ b/packages/backend/src/routes/cairocoder.ts @@ -9,9 +9,9 @@ import { logger, RagAgentFactory, LLMConfig, - } from '@starknet-agent/agents'; + } from '@cairo-coder/agents'; import { ChatCompletionRequest } from '../types'; -import { VectorStore } from '@starknet-agent/agents/db/postgresVectorStore'; +import { VectorStore } from '@cairo-coder/agents/db/postgresVectorStore'; const router: Router = express.Router(); diff --git a/packages/backend/src/server.ts b/packages/backend/src/server.ts index bb7b6b59..daff2b66 100644 --- a/packages/backend/src/server.ts +++ b/packages/backend/src/server.ts @@ -2,8 +2,8 @@ import express from 'express'; import http from 'http'; import cors from 'cors'; import { initializeLLMConfig } from './config/llm'; -import { getPort } from '@starknet-agent/agents/config/settings'; -import { logger } from '@starknet-agent/agents/utils/index'; +import { getPort } from '@cairo-coder/agents/config/settings'; +import { logger } from '@cairo-coder/agents/utils/index'; import { initializeHTTP } from './config/http'; import { Container } from './config/context'; import { validateConfig } from './config/validateConfig'; diff --git a/packages/backend/src/types/index.ts b/packages/backend/src/types/index.ts index f76d7cce..98431770 100644 --- a/packages/backend/src/types/index.ts +++ b/packages/backend/src/types/index.ts @@ -1,8 +1,8 @@ import eventEmitter from 'events'; import { BaseMessage } from '@langchain/core/messages'; import { Embeddings } from '@langchain/core/embeddings'; -import { LLMConfig } from '@starknet-agent/agents/types/index'; -import { VectorStore } from '@starknet-agent/agents/db/postgresVectorStore'; +import { LLMConfig } from '@cairo-coder/agents/types/index'; +import { VectorStore } from '@cairo-coder/agents/db/postgresVectorStore'; import { BaseChatModel } from '@langchain/core/language_models/chat_models'; import { CorsOptions } from 'cors'; import { Express } from 'express'; diff --git a/packages/backend/tsconfig.json b/packages/backend/tsconfig.json index 998cd0f2..71370409 100644 --- a/packages/backend/tsconfig.json +++ b/packages/backend/tsconfig.json @@ -1,5 +1,5 @@ { - "extends": "@starknet-agent/typescript-config/base.json", + "extends": "@cairo-coder/typescript-config/base.json", "compilerOptions": { "outDir": "dist", "sourceMap": true, diff --git a/packages/ingester/__tests__/AsciiDocIngester.test.ts b/packages/ingester/__tests__/AsciiDocIngester.test.ts index 1feea99f..cd1cb2eb 100644 --- a/packages/ingester/__tests__/AsciiDocIngester.test.ts +++ b/packages/ingester/__tests__/AsciiDocIngester.test.ts @@ -1,5 +1,5 @@ import { Document } from '@langchain/core/documents'; -import { BookChunk, DocumentSource } from '@starknet-agent/agents/types/index'; +import { BookChunk, DocumentSource } from '@cairo-coder/agents/types/index'; import { AsciiDocIngester, AsciiDocIngesterConfig, diff --git a/packages/ingester/__tests__/IngesterFactory.test.ts b/packages/ingester/__tests__/IngesterFactory.test.ts index dc278df5..d647453c 100644 --- a/packages/ingester/__tests__/IngesterFactory.test.ts +++ b/packages/ingester/__tests__/IngesterFactory.test.ts @@ -5,7 +5,7 @@ import { StarknetFoundryIngester } from '../src/ingesters/StarknetFoundryIngeste import { CairoByExampleIngester } from '../src/ingesters/CairoByExampleIngester'; import { OpenZeppelinDocsIngester } from '../src/ingesters/OpenZeppelinDocsIngester'; import { BaseIngester } from '../src/BaseIngester'; -import { DocumentSource } from '@starknet-agent/agents/types/index'; +import { DocumentSource } from '@cairo-coder/agents/types/index'; // Mock the ingesters jest.mock('../src/ingesters/CairoBookIngester'); diff --git a/packages/ingester/__tests__/MarkdownIngester.test.ts b/packages/ingester/__tests__/MarkdownIngester.test.ts index 36a36268..1b8ed1a0 100644 --- a/packages/ingester/__tests__/MarkdownIngester.test.ts +++ b/packages/ingester/__tests__/MarkdownIngester.test.ts @@ -1,7 +1,7 @@ import { BookPageDto, isInsideCodeBlock } from '../src/shared'; import { Document } from '@langchain/core/documents'; import { MarkdownIngester } from '../src/ingesters/MarkdownIngester'; -import { DocumentSource } from '@starknet-agent/agents/types/index'; +import { DocumentSource } from '@cairo-coder/agents/types/index'; // Create a concrete implementation of the abstract MarkdownIngester for testing class TestMarkdownIngester extends MarkdownIngester { diff --git a/packages/ingester/package.json b/packages/ingester/package.json index fc485979..af408c28 100644 --- a/packages/ingester/package.json +++ b/packages/ingester/package.json @@ -1,5 +1,5 @@ { - "name": "@starknet-agent/ingester", + "name": "@cairo-coder/ingester", "version": "1.0.0", "scripts": { "test": "jest", @@ -12,8 +12,8 @@ "@asciidoctor/tabs": "1.0.0-beta.6", "@iarna/toml": "^2.2.5", "@langchain/core": "^0.2.36", - "@starknet-agent/agents": "workspace:*", - "@starknet-agent/backend": "workspace:*", + "@cairo-coder/agents": "workspace:*", + "@cairo-coder/backend": "workspace:*", "adm-zip": "^0.5.16", "asciidoctor": "^3.0.4", "axios": "^1.7.9", @@ -23,7 +23,7 @@ "winston": "^3.17.0" }, "devDependencies": { - "@starknet-agent/typescript-config": "workspace:*", + "@cairo-coder/typescript-config": "workspace:*", "@types/jest": "^29.5.14", "jest": "^29.7.0", "nodemon": "^3.1.9", diff --git a/packages/ingester/src/BaseIngester.ts b/packages/ingester/src/BaseIngester.ts index 3814d698..5e8c4e69 100644 --- a/packages/ingester/src/BaseIngester.ts +++ b/packages/ingester/src/BaseIngester.ts @@ -1,12 +1,12 @@ import { Document } from '@langchain/core/documents'; -import { VectorStore } from '@starknet-agent/agents/db/postgresVectorStore'; +import { VectorStore } from '@cairo-coder/agents/db/postgresVectorStore'; import { DocumentSource, BookChunk, -} from '@starknet-agent/agents/types/index'; +} from '@cairo-coder/agents/types/index'; import { BookConfig, BookPageDto, ParsedSection } from './utils/types'; import { updateVectorStore as updateVectorStoreUtil } from './utils/vectorStoreUtils'; -import { logger } from '@starknet-agent/agents/utils/index'; +import { logger } from '@cairo-coder/agents/utils/index'; /** * Abstract base class for all document ingesters diff --git a/packages/ingester/src/IngesterFactory.ts b/packages/ingester/src/IngesterFactory.ts index 62eb16b4..48b36645 100644 --- a/packages/ingester/src/IngesterFactory.ts +++ b/packages/ingester/src/IngesterFactory.ts @@ -1,4 +1,4 @@ -import { DocumentSource } from '@starknet-agent/agents/types/index'; +import { DocumentSource } from '@cairo-coder/agents/types/index'; import { BaseIngester } from './BaseIngester'; import { BookConfig } from './utils/types'; diff --git a/packages/ingester/src/generateEmbeddings.ts b/packages/ingester/src/generateEmbeddings.ts index efea57b4..c69c246f 100644 --- a/packages/ingester/src/generateEmbeddings.ts +++ b/packages/ingester/src/generateEmbeddings.ts @@ -1,9 +1,9 @@ import { createInterface } from 'readline'; -import { logger } from '@starknet-agent/agents/utils/index'; -import { VectorStore } from '@starknet-agent/agents/db/postgresVectorStore'; -import { getVectorDbConfig } from '@starknet-agent/agents/config/settings'; -import { loadOpenAIEmbeddingsModels } from '@starknet-agent/backend/config/provider/openai'; -import { DocumentSource } from '@starknet-agent/agents/types/index'; +import { logger } from '@cairo-coder/agents/utils/index'; +import { VectorStore } from '@cairo-coder/agents/db/postgresVectorStore'; +import { getVectorDbConfig } from '@cairo-coder/agents/config/settings'; +import { loadOpenAIEmbeddingsModels } from '@cairo-coder/backend/config/provider/openai'; +import { DocumentSource } from '@cairo-coder/agents/types/index'; import { IngesterFactory } from './IngesterFactory'; diff --git a/packages/ingester/src/ingesters/AsciiDocIngester.ts b/packages/ingester/src/ingesters/AsciiDocIngester.ts index 93ea9df4..2c0a6a7f 100644 --- a/packages/ingester/src/ingesters/AsciiDocIngester.ts +++ b/packages/ingester/src/ingesters/AsciiDocIngester.ts @@ -5,7 +5,7 @@ import { Document } from '@langchain/core/documents'; import { BookChunk, DocumentSource, -} from '@starknet-agent/agents/types/index'; +} from '@cairo-coder/agents/types/index'; import { BaseIngester } from '../BaseIngester'; import { BookConfig, @@ -19,7 +19,7 @@ import { createAnchor, } from '../utils/contentUtils'; import { processDocFiles } from '../utils/fileUtils'; -import { logger } from '@starknet-agent/agents/utils/index'; +import { logger } from '@cairo-coder/agents/utils/index'; import { execSync } from 'child_process'; import * as fsSync from 'fs'; diff --git a/packages/ingester/src/ingesters/CairoBookIngester.ts b/packages/ingester/src/ingesters/CairoBookIngester.ts index ef9d9602..c1183eda 100644 --- a/packages/ingester/src/ingesters/CairoBookIngester.ts +++ b/packages/ingester/src/ingesters/CairoBookIngester.ts @@ -1,7 +1,7 @@ import * as path from 'path'; import { BookConfig } from '../utils/types'; import { MarkdownIngester } from './MarkdownIngester'; -import { DocumentSource } from '@starknet-agent/agents/types/index'; +import { DocumentSource } from '@cairo-coder/agents/types/index'; /** * Ingester for the Cairo Book documentation diff --git a/packages/ingester/src/ingesters/CairoByExampleIngester.ts b/packages/ingester/src/ingesters/CairoByExampleIngester.ts index 8c0358f0..865c46cd 100644 --- a/packages/ingester/src/ingesters/CairoByExampleIngester.ts +++ b/packages/ingester/src/ingesters/CairoByExampleIngester.ts @@ -1,7 +1,7 @@ import * as path from 'path'; import { BookConfig } from '../utils/types'; import { MarkdownIngester } from './MarkdownIngester'; -import { DocumentSource } from '@starknet-agent/agents/types/index'; +import { DocumentSource } from '@cairo-coder/agents/types/index'; /** * Ingester for the Cairo By Example documentation diff --git a/packages/ingester/src/ingesters/MarkdownIngester.ts b/packages/ingester/src/ingesters/MarkdownIngester.ts index 021276cf..4416914d 100644 --- a/packages/ingester/src/ingesters/MarkdownIngester.ts +++ b/packages/ingester/src/ingesters/MarkdownIngester.ts @@ -6,7 +6,7 @@ import { Document } from '@langchain/core/documents'; import { BookChunk, DocumentSource, -} from '@starknet-agent/agents/types/index'; +} from '@cairo-coder/agents/types/index'; import { BaseIngester } from '../BaseIngester'; import { BookConfig, BookPageDto, ParsedSection } from '../utils/types'; import { processDocFiles } from '../utils/fileUtils'; @@ -16,7 +16,7 @@ import { createAnchor, addSectionWithSizeLimit, } from '../utils/contentUtils'; -import { logger } from '@starknet-agent/agents/utils/index'; +import { logger } from '@cairo-coder/agents/utils/index'; /** * Abstract ingester for Markdown-based documentation diff --git a/packages/ingester/src/ingesters/OpenZeppelinDocsIngester.ts b/packages/ingester/src/ingesters/OpenZeppelinDocsIngester.ts index 769acaa9..a27b1289 100644 --- a/packages/ingester/src/ingesters/OpenZeppelinDocsIngester.ts +++ b/packages/ingester/src/ingesters/OpenZeppelinDocsIngester.ts @@ -5,9 +5,9 @@ import { Document } from '@langchain/core/documents'; import { BookChunk, DocumentSource, -} from '@starknet-agent/agents/types/index'; +} from '@cairo-coder/agents/types/index'; import { BookConfig, BookPageDto } from '../utils/types'; -import { logger } from '@starknet-agent/agents/utils/index'; +import { logger } from '@cairo-coder/agents/utils/index'; import { AsciiDocIngesterConfig } from './AsciiDocIngester'; import { AsciiDocIngester } from './AsciiDocIngester'; diff --git a/packages/ingester/src/ingesters/StarknetDocsIngester.ts b/packages/ingester/src/ingesters/StarknetDocsIngester.ts index c82259bc..990e82c5 100644 --- a/packages/ingester/src/ingesters/StarknetDocsIngester.ts +++ b/packages/ingester/src/ingesters/StarknetDocsIngester.ts @@ -4,7 +4,7 @@ import { BookConfig, BookPageDto, ParsedSection } from '../utils/types'; import { AsciiDocIngester, AsciiDocIngesterConfig } from './AsciiDocIngester'; import { processDocFiles } from '../utils/fileUtils'; import * as fs from 'fs'; -import { BookChunk, DocumentSource } from '@starknet-agent/agents/types/index'; +import { BookChunk, DocumentSource } from '@cairo-coder/agents/types/index'; /** * Ingester for the Starknet documentation diff --git a/packages/ingester/src/ingesters/StarknetFoundryIngester.ts b/packages/ingester/src/ingesters/StarknetFoundryIngester.ts index a87ad7d7..f977c8de 100644 --- a/packages/ingester/src/ingesters/StarknetFoundryIngester.ts +++ b/packages/ingester/src/ingesters/StarknetFoundryIngester.ts @@ -6,10 +6,10 @@ import { Document } from '@langchain/core/documents'; import { BookChunk, DocumentSource, -} from '@starknet-agent/agents/types/index'; +} from '@cairo-coder/agents/types/index'; import { BookConfig, BookPageDto, ParsedSection } from '../utils/types'; import { processDocFiles } from '../utils/fileUtils'; -import { logger } from '@starknet-agent/agents/utils/index'; +import { logger } from '@cairo-coder/agents/utils/index'; import { exec as execCallback } from 'child_process'; import { promisify } from 'util'; import { MarkdownIngester } from './MarkdownIngester'; diff --git a/packages/ingester/src/shared.ts b/packages/ingester/src/shared.ts index 8074cf8e..c31227c4 100644 --- a/packages/ingester/src/shared.ts +++ b/packages/ingester/src/shared.ts @@ -1,10 +1,10 @@ import { createHash } from 'crypto'; import { Document } from '@langchain/core/documents'; -import { logger } from '@starknet-agent/agents/utils/index'; +import { logger } from '@cairo-coder/agents/utils/index'; import * as fs from 'fs/promises'; import * as path from 'path'; -import { VectorStore } from '@starknet-agent/agents/db/postgresVectorStore'; -import { BookChunk, DocumentSource } from '@starknet-agent/agents/types/index'; +import { VectorStore } from '@cairo-coder/agents/db/postgresVectorStore'; +import { BookChunk, DocumentSource } from '@cairo-coder/agents/types/index'; export const MAX_SECTION_SIZE = 20000; diff --git a/packages/ingester/src/utils/fileUtils.ts b/packages/ingester/src/utils/fileUtils.ts index 653d5625..31996c98 100644 --- a/packages/ingester/src/utils/fileUtils.ts +++ b/packages/ingester/src/utils/fileUtils.ts @@ -1,7 +1,7 @@ import * as fs from 'fs/promises'; import * as path from 'path'; import { BookConfig, BookPageDto } from './types'; -import { logger } from '@starknet-agent/agents/utils/index'; +import { logger } from '@cairo-coder/agents/utils/index'; /** * Process documentation files from a directory diff --git a/packages/ingester/src/utils/vectorStoreUtils.ts b/packages/ingester/src/utils/vectorStoreUtils.ts index 641351da..00b3ff1c 100644 --- a/packages/ingester/src/utils/vectorStoreUtils.ts +++ b/packages/ingester/src/utils/vectorStoreUtils.ts @@ -1,11 +1,11 @@ import { Document } from '@langchain/core/documents'; import { createInterface } from 'readline'; -import { VectorStore } from '@starknet-agent/agents/db/postgresVectorStore'; +import { VectorStore } from '@cairo-coder/agents/db/postgresVectorStore'; import { BookChunk, DocumentSource, -} from '@starknet-agent/agents/types/index'; -import { logger } from '@starknet-agent/agents/utils/index'; +} from '@cairo-coder/agents/types/index'; +import { logger } from '@cairo-coder/agents/utils/index'; import { YES_MODE } from '../generateEmbeddings'; /** diff --git a/packages/ingester/tsconfig.json b/packages/ingester/tsconfig.json index 50d8e5fe..d5b19fd9 100644 --- a/packages/ingester/tsconfig.json +++ b/packages/ingester/tsconfig.json @@ -1,5 +1,5 @@ { - "extends": "@starknet-agent/typescript-config/base.json", + "extends": "@cairo-coder/typescript-config/base.json", "compilerOptions": { "outDir": "dist", "sourceMap": true, diff --git a/packages/typescript-config/package.json b/packages/typescript-config/package.json index f8d9d5f1..768290c7 100644 --- a/packages/typescript-config/package.json +++ b/packages/typescript-config/package.json @@ -1,3 +1,3 @@ { - "name": "@starknet-agent/typescript-config" + "name": "@cairo-coder/typescript-config" } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 77913679..16117a5b 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -66,7 +66,7 @@ importers: specifier: ^3.17.0 version: 3.17.0 devDependencies: - '@starknet-agent/typescript-config': + '@cairo-coder/typescript-config': specifier: workspace:* version: link:../typescript-config '@types/jest': @@ -120,7 +120,7 @@ importers: '@langchain/openai': specifier: ^0.0.25 version: 0.0.25(ws@8.18.1) - '@starknet-agent/agents': + '@cairo-coder/agents': specifier: workspace:* version: link:../agents '@types/node': @@ -145,7 +145,7 @@ importers: specifier: ^8.18.1 version: 8.18.1 devDependencies: - '@starknet-agent/typescript-config': + '@cairo-coder/typescript-config': specifier: workspace:* version: link:../typescript-config '@types/cors': @@ -199,10 +199,10 @@ importers: '@langchain/core': specifier: ^0.2.36 version: 0.2.36(openai@4.85.4(ws@8.18.1)(zod@3.24.2)) - '@starknet-agent/agents': + '@cairo-coder/agents': specifier: workspace:* version: link:../agents - '@starknet-agent/backend': + '@cairo-coder/backend': specifier: workspace:* version: link:../backend adm-zip: @@ -227,7 +227,7 @@ importers: specifier: ^3.17.0 version: 3.17.0 devDependencies: - '@starknet-agent/typescript-config': + '@cairo-coder/typescript-config': specifier: workspace:* version: link:../typescript-config '@types/jest': From 5611e4f29f8c50dd155478e1f47863e1031abf16 Mon Sep 17 00:00:00 2001 From: alvinouille Date: Wed, 23 Apr 2025 21:06:27 +0200 Subject: [PATCH 09/10] fix: endpoint and readme --- .cursor/rules/imports.mdc | 2 +- .cursor/rules/navigation.mdc | 6 +++--- README.md | 4 ++-- docker-compose.yml | 8 ++++---- packages/backend/src/routes/cairocoder.ts | 2 +- packages/backend/src/routes/index.ts | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.cursor/rules/imports.mdc b/.cursor/rules/imports.mdc index 58bee1a0..6b2e3e33 100644 --- a/.cursor/rules/imports.mdc +++ b/.cursor/rules/imports.mdc @@ -84,7 +84,7 @@ globs: *.ts,*.tsx,*.js,*.jsx - Used in: `packages/ingester/src/ingesters/MarkdownIngester.ts` - Import: `import { BaseIngester } from '../BaseIngester';` - `IngesterFactory`: Factory for creating ingesters. - - Used in: `packages/ingester/src/generateEmbeddings.ts` + - Used in: `packages/ingester/src/chat/completionsEmbeddings.ts` - Import: `import { IngesterFactory } from './IngesterFactory';` - `utils`: Utility functions for ingestion. - Used in: `packages/ingester/src/ingesters/MarkdownIngester.ts` diff --git a/.cursor/rules/navigation.mdc b/.cursor/rules/navigation.mdc index b73994ea..7b9f3adc 100644 --- a/.cursor/rules/navigation.mdc +++ b/.cursor/rules/navigation.mdc @@ -71,7 +71,7 @@ The Cairo Coder codebase is organized as a monorepo with multiple packages: - **To understand the RAG process**: Follow the pipeline components in `packages/agents/src/core/pipeline/` - **To see how user queries are processed**: Start at the API handlers in `packages/backend/src/api/cairocoder.ts` - **To understand data ingestion**: Check the ingester implementations in `packages/ingester/src/ingesters/` -- **To modify embeddings generation**: Look at `packages/ingester/src/generateEmbeddings.ts` +- **To modify embeddings generation**: Look at `packages/ingester/src/chat/completionsEmbeddings.ts` ## Key Files for Common Tasks @@ -89,12 +89,12 @@ The Cairo Coder codebase is organized as a monorepo with multiple packages: 2. Implement handlers in `packages/backend/src/api/cairocoder.ts` or add new handlers ### Running Ingestion -1. Use the script at `packages/ingester/src/generateEmbeddings.ts` +1. Use the script at `packages/ingester/src/chat/completionsEmbeddings.ts` 2. Or run `pnpm generate-embeddings` from the project root ## Key Endpoints -- `/generate`: Main endpoint for Cairo code generation +- `/chat/completions`: Main endpoint for Cairo code generation - Accepts POST requests with messages array in OpenAI format - Returns generated Cairo code diff --git a/README.md b/README.md index 4160a491..9f3fba30 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ There are mainly 2 ways of installing Cairo Coder - With Docker, Without Docker. docker-compose up --build ``` -8. The API will be available at http://localhost:3000/generate. +8. The API will be available at http://localhost:3000/chat/completions. ## Running the Ingester @@ -140,7 +140,7 @@ Cairo Coder provides a simple REST API compatible with the OpenAI format for eas ### Endpoint ``` -POST /generate +POST /chat/completions ``` ### Request Format diff --git a/docker-compose.yml b/docker-compose.yml index 31a84ba0..5db2513c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,7 +13,7 @@ services: - ./data:/var/lib/postgresql/data restart: unless-stopped networks: - - cairo_coder_network + - cairo_coder backend: build: @@ -28,7 +28,7 @@ services: condition: service_started restart: unless-stopped networks: - - cairo_coder_network + - cairo_coder ingester: build: @@ -39,10 +39,10 @@ services: postgres: condition: service_started networks: - - cairo_coder_network + - cairo_coder networks: - cairo_coder_network: + cairo_coder: volumes: postgres_data: diff --git a/packages/backend/src/routes/cairocoder.ts b/packages/backend/src/routes/cairocoder.ts index ba0da816..8a05d597 100644 --- a/packages/backend/src/routes/cairocoder.ts +++ b/packages/backend/src/routes/cairocoder.ts @@ -173,7 +173,7 @@ router.post('/', async (req, res) => { } }); } catch (error) { - logger.error('Error in /generate:', error); + logger.error('Error in /chat/completions:', error); // Map common errors to OpenAI error format if (error instanceof Error) { diff --git a/packages/backend/src/routes/index.ts b/packages/backend/src/routes/index.ts index c647f34d..0bdcc02b 100644 --- a/packages/backend/src/routes/index.ts +++ b/packages/backend/src/routes/index.ts @@ -3,6 +3,6 @@ import cairocoderRouter from './cairocoder'; const router: Router = express.Router(); -router.use('/generate', cairocoderRouter); +router.use('/chat/completions', cairocoderRouter); export default router; From 2e517bdb4fbcf39c7565ceba18ad362039d43ec3 Mon Sep 17 00:00:00 2001 From: alvinouille Date: Thu, 24 Apr 2025 10:17:52 +0200 Subject: [PATCH 10/10] fix: project instructions --- .cursor/rules/project_instructions.mdc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cursor/rules/project_instructions.mdc b/.cursor/rules/project_instructions.mdc index edb02ece..58e60960 100644 --- a/.cursor/rules/project_instructions.mdc +++ b/.cursor/rules/project_instructions.mdc @@ -1,6 +1,6 @@ --- description: Project Instructions -globs: +globs: --- # Starknet Agent Project Instructions