diff --git a/python/src/cairo_coder/dspy/document_retriever.py b/python/src/cairo_coder/dspy/document_retriever.py index 2c8025a..0595dad 100644 --- a/python/src/cairo_coder/dspy/document_retriever.py +++ b/python/src/cairo_coder/dspy/document_retriever.py @@ -534,7 +534,6 @@ def __init__( vector_db: SourceFilteredPgVectorRM | None = None, max_source_count: int = 5, similarity_threshold: float = 0.4, - embedding_model: str = "gemini-embedding-001", ): """ Initialize the DocumentRetrieverProgram. @@ -544,12 +543,8 @@ def __init__( vector_db: Optional pre-initialized vector database instance max_source_count: Maximum number of documents to retrieve similarity_threshold: Minimum similarity score for document inclusion - embedding_model: Gemini embedding model to use for reranking """ super().__init__() - # TODO: These should not be literal constants like this. - # TODO: if the vector_db is setup upon startup, then this should not be done here. - self.embedder = dspy.Embedder("gemini/gemini-embedding-001", dimensions=3072, batch_size=512) self.vector_store_config = vector_store_config if vector_db is None: @@ -558,18 +553,15 @@ def __init__( self.vector_db = SourceFilteredPgVectorRM( db_url=db_url, pg_table_name=pg_table_name, - embedding_func=self.embedder, content_field="content", fields=["id", "content", "metadata"], k=max_source_count, - embedding_model='gemini-embedding-001', include_similarity=True, ) else: self.vector_db = vector_db self.max_source_count = max_source_count self.similarity_threshold = similarity_threshold - self.embedding_model = embedding_model async def aforward( self, processed_query: ProcessedQuery, sources: list[DocumentSource] | None = None @@ -591,7 +583,6 @@ async def aforward( # Step 1: Fetch documents from vector store documents = await self._afetch_documents(processed_query, sources) - # TODO: No source found means no answer can be given! if not documents: return [] @@ -621,7 +612,6 @@ def forward( sync_retriever = SourceFilteredPgVectorRM( db_url=db_url, pg_table_name=pg_table_name, - embedding_func=self.embedder, content_field="content", fields=["id", "content", "metadata"], k=self.max_source_count, @@ -665,7 +655,6 @@ async def _afetch_documents( search_queries = processed_query.search_queries if not search_queries or len(search_queries) == 0: - # TODO: revert search_queries = [processed_query.original] @@ -740,7 +729,6 @@ def create_document_retriever( vector_db: SourceFilteredPgVectorRM | None = None, max_source_count: int = 5, similarity_threshold: float = 0.4, - embedding_model: str = "text-embedding-3-large", ) -> DocumentRetrieverProgram: """ Factory function to create a DocumentRetrieverProgram instance. @@ -750,7 +738,6 @@ def create_document_retriever( vector_db: Optional pre-initialized vector database instance max_source_count: Maximum number of documents to retrieve similarity_threshold: Minimum similarity score for document inclusion - embedding_model: OpenAI embedding model to use for reranking Returns: Configured DocumentRetrieverProgram instance @@ -760,5 +747,4 @@ def create_document_retriever( vector_db=vector_db, max_source_count=max_source_count, similarity_threshold=similarity_threshold, - embedding_model=embedding_model, ) diff --git a/python/src/cairo_coder/dspy/generation_program.py b/python/src/cairo_coder/dspy/generation_program.py index 42888ce..16d83ff 100644 --- a/python/src/cairo_coder/dspy/generation_program.py +++ b/python/src/cairo_coder/dspy/generation_program.py @@ -20,7 +20,6 @@ logger = structlog.get_logger(__name__) -# TODO: Find a way to properly "erase" common mistakes like PrintTrait imports. class CairoCodeGeneration(Signature): """ Analyze a Cairo programming query and use the context to generate a high-quality Cairo code solution and explanations. diff --git a/python/src/cairo_coder/dspy/pgvector_rm.py b/python/src/cairo_coder/dspy/pgvector_rm.py index 747c4f8..9813b06 100644 --- a/python/src/cairo_coder/dspy/pgvector_rm.py +++ b/python/src/cairo_coder/dspy/pgvector_rm.py @@ -1,4 +1,3 @@ -import warnings from collections.abc import Callable from typing import Optional @@ -12,13 +11,6 @@ raise ImportError( "The 'pgvector' extra is required to use PgVectorRM. Install it with `pip install dspy-ai[pgvector]`. Also, try `pip install pgvector psycopg2`.", ) from e -try: - import openai -except ImportError: - warnings.warn( - "`openai` is not installed. Install it with `pip install openai` to use OpenAI embedding models.", - stacklevel=2, category=ImportWarning, - ) class PgVectorRM(dspy.Retrieve): @@ -33,37 +25,40 @@ class PgVectorRM(dspy.Retrieve): Args: db_url (str): A PostgreSQL database URL in psycopg2's DSN format pg_table_name (Optional[str]): name of the table containing passages - openai_client (openai.OpenAI): OpenAI client to use for computing query embeddings. Either openai_client or embedding_func must be provided. - embedding_func (Callable): A function to use for computing query embeddings. Either openai_client or embedding_func must be provided. + embedding_func (Callable): A function to use for computing query embeddings. If not provided, uses dspy.settings.embedder. content_field (str = "text"): Field containing the passage text. Defaults to "text" k (Optional[int]): Default number of top passages to retrieve. Defaults to 20 embedding_field (str = "embedding"): Field containing passage embeddings. Defaults to "embedding" fields (List[str] = ['text']): Fields to retrieve from the table. Defaults to "text" - embedding_model (str = "text-embedding-ada-002"): Field containing the OpenAI embedding model to use. Defaults to "text-embedding-ada-002" Examples: Below is a code snippet that shows how to use PgVector as the default retriever ```python import dspy - import openai - import psycopg2 - openai.api_key = os.environ.get("OPENAI_API_KEY", None) - openai_client = openai.OpenAI() + # Configure embedder at startup + embedder = dspy.Embedder("gemini/gemini-embedding-001", dimensions=3072) + dspy.configure(embedder=embedder) - llm = dspy.OpenAI(model="gpt-3.5-turbo") + llm = dspy.LM("gemini/gemini-flash-latest") + dspy.configure(lm=llm) - DATABASE_URL should be in the format postgresql://user:password@host/database - db_url=os.getenv("DATABASE_URL") + # DATABASE_URL should be in the format postgresql://user:password@host/database + db_url = os.getenv("DATABASE_URL") - retriever_model = PgVectorRM(conn, openai_client=openai_client, "paragraphs", fields=["text", "document_id"], k=20) - dspy.settings.configure(lm=llm, rm=retriever_model) + # embedding_func will default to dspy.settings.embedder + retriever_model = PgVectorRM(db_url, "paragraphs", fields=["text", "document_id"], k=20) + dspy.configure(rm=retriever_model) ``` - Below is a code snippet that shows how to use PgVector in the forward() function of a module + Below is a code snippet that shows how to use PgVector with a custom embedding function ```python - self.retrieve = PgVectorRM(db_url, openai_client=openai_client, "paragraphs", fields=["text", "document_id"], k=20) + def my_embedder(text: str) -> list[float]: + # Your custom embedding logic + return embeddings + + self.retrieve = PgVectorRM(db_url, "paragraphs", embedding_func=my_embedder, fields=["text", "document_id"], k=20) ``` """ @@ -71,23 +66,26 @@ def __init__( self, db_url: str, pg_table_name: str, - openai_client: Optional[openai.OpenAI] = None, embedding_func: Optional[Callable] = None, k: int = 20, embedding_field: str = "embedding", fields: Optional[list[str]] = None, content_field: str = "text", - embedding_model: str = "text-embedding-ada-002", include_similarity: bool = False, ): """ k = 20 is the number of paragraphs to retrieve """ - assert ( - openai_client or embedding_func - ), "Either openai_client or embedding_func must be provided." - self.openai_client = openai_client - self.embedding_func = embedding_func + # Use provided embedding_func or fall back to dspy.settings.embedder + if embedding_func is None: + if dspy.settings.embedder is None: + raise ValueError( + "No embedding_func provided and no embedder configured in dspy.settings. " + "Either pass embedding_func or configure with: dspy.configure(embedder=...)" + ) + self.embedding_func = dspy.settings.embedder + else: + self.embedding_func = embedding_func self.conn = psycopg2.connect(db_url) register_vector(self.conn) @@ -95,7 +93,6 @@ def __init__( self.fields = fields or ["text"] self.content_field = content_field self.embedding_field = embedding_field - self.embedding_model = embedding_model self.include_similarity = include_similarity super().__init__(k=k) @@ -144,14 +141,5 @@ def forward(self, query: str, k: int = None): return retrieved_docs def _get_embeddings(self, query: str) -> list[float]: - if self.openai_client is not None: - return ( - self.openai_client.embeddings.create( - model=self.embedding_model, - input=query, - encoding_format="float", - ) - .data[0] - .embedding - ) + """Get embeddings for a query using the configured embedding function.""" return self.embedding_func(query) diff --git a/python/src/cairo_coder/dspy/query_processor.py b/python/src/cairo_coder/dspy/query_processor.py index f49b47c..4b83822 100644 --- a/python/src/cairo_coder/dspy/query_processor.py +++ b/python/src/cairo_coder/dspy/query_processor.py @@ -187,7 +187,6 @@ def _validate_resources(self, resources: list[str]) -> list[DocumentSource]: continue # Return valid resources or default fallback - # TODO: Upon failure, this should return an error message to the user. return valid_resources if valid_resources else list(DocumentSource) def _is_contract_query(self, query: str) -> bool: diff --git a/python/src/cairo_coder/optimizers/generation_optimizer_cairo-coder.py b/python/src/cairo_coder/optimizers/generation_optimizer_cairo-coder.py index 3619618..63302e3 100644 --- a/python/src/cairo_coder/optimizers/generation_optimizer_cairo-coder.py +++ b/python/src/cairo_coder/optimizers/generation_optimizer_cairo-coder.py @@ -31,23 +31,21 @@ def _(): # mlflow.set_experiment("DSPy") # mlflow.dspy.autolog() - ## Setup VectorDB for document retrieval + ## Setup embedder and LM in dspy.configure embedder = dspy.Embedder("gemini/gemini-embedding-001", dimensions=3072, batch_size=512) + lm = dspy.LM("gemini/gemini-flash-latest", max_tokens=30000, cache=False) + dspy.configure(lm=lm, adapter=XMLAdapter(), embedder=embedder) + + ## Setup VectorDB for document retrieval - will use dspy.settings.embedder vector_store_config = get_vector_store_config() vector_db = SourceFilteredPgVectorRM( db_url=vector_store_config.dsn, pg_table_name=vector_store_config.table_name, - embedding_func=embedder, content_field="content", fields=["id", "content", "metadata"], k=5, # Default k, will be overridden by retriever - embedding_model="gemini-embedding-001", include_similarity=True, ) - - # Programs to be optimized: QueryProcessing --> OptimizedQuery --> Document retrieval - lm = dspy.LM("gemini/gemini-flash-latest", max_tokens=30000, cache=False) - dspy.configure(lm=lm, adapter=XMLAdapter()) return XMLAdapter, dspy, os, vector_db, vector_store_config diff --git a/python/src/cairo_coder/optimizers/generation_optimizer_starknet-agent.py b/python/src/cairo_coder/optimizers/generation_optimizer_starknet-agent.py index 2337f16..19e75ad 100644 --- a/python/src/cairo_coder/optimizers/generation_optimizer_starknet-agent.py +++ b/python/src/cairo_coder/optimizers/generation_optimizer_starknet-agent.py @@ -31,23 +31,21 @@ def _(): # mlflow.set_experiment("DSPy") # mlflow.dspy.autolog() - ## Setup VectorDB for document retrieval + ## Setup embedder and LM in dspy.configure embedder = dspy.Embedder("gemini/gemini-embedding-001", dimensions=3072, batch_size=512) + lm = dspy.LM("gemini/gemini-flash-latest", max_tokens=30000, cache=False) + dspy.configure(lm=lm, adapter=XMLAdapter(), embedder=embedder) + + ## Setup VectorDB for document retrieval - will use dspy.settings.embedder vector_store_config = get_vector_store_config() vector_db = SourceFilteredPgVectorRM( db_url=vector_store_config.dsn, pg_table_name=vector_store_config.table_name, - embedding_func=embedder, content_field="content", fields=["id", "content", "metadata"], k=5, # Default k, will be overridden by retriever - embedding_model="gemini-embedding-001", include_similarity=True, ) - - # Programs to be optimized: QueryProcessing --> OptimizedQuery --> Document retrieval - lm = dspy.LM("gemini/gemini-flash-latest", max_tokens=30000, cache=False) - dspy.configure(lm=lm, adapter=XMLAdapter()) return XMLAdapter, dspy, os, vector_db, vector_store_config diff --git a/python/src/cairo_coder/optimizers/retrieval_optimizer.py b/python/src/cairo_coder/optimizers/retrieval_optimizer.py index 176eb71..1ca7d14 100644 --- a/python/src/cairo_coder/optimizers/retrieval_optimizer.py +++ b/python/src/cairo_coder/optimizers/retrieval_optimizer.py @@ -31,23 +31,21 @@ def _(): # mlflow.set_experiment("DSPy") # mlflow.dspy.autolog() - ## Setup VectorDB for document retrieval + ## Setup embedder and LM in dspy.configure embedder = dspy.Embedder("gemini/gemini-embedding-001", dimensions=3072, batch_size=512) + lm = dspy.LM("gemini/gemini-flash-lite-latest", max_tokens=15000, cache=False) + dspy.configure(lm=lm, adapter=XMLAdapter(), embedder=embedder) + + ## Setup VectorDB for document retrieval - will use dspy.settings.embedder vector_store_config = get_vector_store_config() vector_db = SourceFilteredPgVectorRM( db_url=vector_store_config.dsn, pg_table_name=vector_store_config.table_name, - embedding_func=embedder, content_field="content", fields=["id", "content", "metadata"], k=5, # Default k, will be overridden by retriever - embedding_model="text-embedding-3-large", include_similarity=True, ) - - # Programs to be optimized: QueryProcessing --> OptimizedQuery --> Document retrieval - lm = dspy.LM("gemini/gemini-flash-lite-latest", max_tokens=15000, cache=False) - dspy.configure(lm=lm, adapter=XMLAdapter()) return XMLAdapter, dspy, os, vector_db, vector_store_config diff --git a/python/src/cairo_coder/server/app.py b/python/src/cairo_coder/server/app.py index 35bb31e..514be3c 100644 --- a/python/src/cairo_coder/server/app.py +++ b/python/src/cairo_coder/server/app.py @@ -182,11 +182,14 @@ def __init__( # Setup routes self._setup_routes() - # TODO: This is the place where we should select the proper LLM configuration. - # TODO: For now we just Hard-code DSPY - GEMINI - dspy.configure(lm=dspy.LM("gemini/gemini-flash-latest", max_tokens=30000, cache=False), adapter=XMLAdapter()) - dspy.configure(callbacks=[AgentLoggingCallback()]) - dspy.configure(track_usage=True) + embedder = dspy.Embedder("gemini/gemini-embedding-001", dimensions=3072, batch_size=512) + dspy.configure( + lm=dspy.LM("gemini/gemini-flash-latest", max_tokens=30000, cache=False), + adapter=XMLAdapter(), + embedder=embedder, + callbacks=[AgentLoggingCallback()], + track_usage=True, + ) def _setup_routes(self): """Setup FastAPI routes matching TypeScript backend.""" @@ -641,17 +644,13 @@ async def lifespan(app: FastAPI): config = ConfigManager.load_config() vector_store_config = config.vector_store - # TODO: These should not be literal constants like this. - embedder = dspy.Embedder("gemini/gemini-embedding-001", dimensions=3072, batch_size=512) - + # embedding_func will default to dspy.settings.embedder (configured in __init__) _vector_db = SourceFilteredPgVectorRM( db_url=vector_store_config.dsn, pg_table_name=vector_store_config.table_name, - embedding_func=embedder, content_field="content", fields=["id", "content", "metadata"], k=5, # Default k, will be overridden by retriever - embedding_model='gemini-embedding-001', include_similarity=True, ) @@ -686,9 +685,6 @@ def main(): parser.add_argument("--workers", type=int, default=5, help="Number of workers to run") args = parser.parse_args() - # TODO: configure DSPy with the proper LM. - # TODO: Find a proper pattern for it? - # TODO: multi-model management? uvicorn.run( "cairo_coder.server.app:create_app_factory", host="0.0.0.0",