Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/add kg agent search to pipeline rebased 2 #483

Merged
merged 15 commits into from
Jun 19, 2024
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.html linguist-documentation
6 changes: 3 additions & 3 deletions docs/pages/cookbooks/knowledge-graph.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ from r2r import (
GenerationConfig,
Pipeline,
R2RAppBuilder,
KGAgentPipe,
KGAgentSearchPipe,
Relation,
run_pipeline,
)
Expand Down Expand Up @@ -445,13 +445,13 @@ Results:
Finally, we are in a position to automatically answer difficult to manage queries with a knowledge agent. The snippet below injects our custom schema into a generic few-shot prompt and uses gpt-4o to create a relevant query

```python filename="r2r/examples/scripts/advanced_kg_cookbook.py"
kg_agent_pipe = KGAgentPipe(
kg_agent_search_pipe = KGAgentSearchPipe(
r2r_app.providers.kg, r2r_app.providers.llm, r2r_app.providers.prompt
)

# Define the pipeline
kg_pipe = Pipeline()
kg_pipe.add_pipe(kg_agent_pipe)
kg_pipe.add_pipe(kg_agent_search_pipe)

kg.update_agent_prompt(prompt_provider, entity_types, relations)

Expand Down
4 changes: 2 additions & 2 deletions docs/pages/deep-dive/ingestion.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ The **R2RVectorStoragePipe** stores the generated embeddings in a vector databas
### Knowledge Graph Pipes
When the knowledge graph provider settings are non-null, the pipeline includes pipes for generating and storing knowledge graph data.

- **KGAgentPipe**: Generates Cypher queries to interact with a Neo4j knowledge graph.
- **KGAgentSearchPipe**: Generates Cypher queries to interact with a Neo4j knowledge graph.
- **KGStoragePipe**: Stores the generated knowledge graph data in the specified knowledge graph database.


Expand Down Expand Up @@ -72,7 +72,7 @@ custom_ingestion_pipeline = CustomIngestionPipeline()
pipelines = R2RPipelineFactory(config, pipes).create_pipelines(
ingestion_pipeline = custom_ingestion_pipeline
)
r2r = R2RApp(config, providers, pipelines)
r2r = R2RApp(config=config, providers=providers, pipes=pipes, pipelines=pipelines)
```

### Conclusion
Expand Down
2 changes: 1 addition & 1 deletion docs/public/swagger.json

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions r2r/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@
"VectorEntry",
"VectorType",
"Vector",
"SearchRequest",
"SearchResult",
"VectorSearchRequest",
"VectorSearchResult",
"AsyncPipe",
"PipeType",
"AsyncState",
Expand Down Expand Up @@ -98,7 +98,7 @@
"R2RPromptProvider",
"WebSearchPipe",
"R2RAppBuilder",
"KGAgentPipe",
"KGAgentSearchPipe",
# Prebuilts
"MultiSearchPipe",
"R2RPipeFactoryWithMultiSearch",
Expand Down
38 changes: 26 additions & 12 deletions r2r/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,21 @@
extract_triples,
)
from .abstractions.llama_abstractions import VectorStoreQuery
from .abstractions.llm import LLMChatCompletion, LLMChatCompletionChunk
from .abstractions.llm import (
GenerationConfig,
LLMChatCompletion,
LLMChatCompletionChunk,
)
from .abstractions.prompt import Prompt
from .abstractions.search import SearchRequest, SearchResult
from .abstractions.search import (
AggregateSearchResult,
KGSearchRequest,
KGSearchResult,
KGSearchSettings,
VectorSearchRequest,
VectorSearchResult,
VectorSearchSettings,
)
from .abstractions.user import UserStats
from .abstractions.vector import Vector, VectorEntry, VectorType
from .logging.kv_logger import (
Expand Down Expand Up @@ -51,19 +63,16 @@
TextParser,
XLSXParser,
)
from .pipeline.base_pipeline import (
EvalPipeline,
IngestionPipeline,
Pipeline,
RAGPipeline,
SearchPipeline,
)
from .pipeline.base_pipeline import EvalPipeline, Pipeline
from .pipeline.ingestion_pipeline import IngestionPipeline
from .pipeline.rag_pipeline import RAGPipeline
from .pipeline.search_pipeline import SearchPipeline
from .pipes.base_pipe import AsyncPipe, AsyncState, PipeType
from .pipes.loggable_pipe import LoggableAsyncPipe
from .providers.embedding_provider import EmbeddingConfig, EmbeddingProvider
from .providers.eval_provider import EvalConfig, EvalProvider
from .providers.kg_provider import KGConfig, KGProvider
from .providers.llm_provider import GenerationConfig, LLMConfig, LLMProvider
from .providers.llm_provider import LLMConfig, LLMProvider
from .providers.prompt_provider import PromptConfig, PromptProvider
from .providers.vector_db_provider import VectorDBConfig, VectorDBProvider
from .utils import (
Expand Down Expand Up @@ -99,8 +108,13 @@
"VectorEntry",
"VectorType",
"Vector",
"SearchRequest",
"SearchResult",
"VectorSearchRequest",
"VectorSearchResult",
"VectorSearchSettings",
"KGSearchRequest",
"KGSearchResult",
"KGSearchSettings",
"AggregateSearchResult",
"AsyncPipe",
"PipeType",
"AsyncState",
Expand Down
21 changes: 21 additions & 0 deletions r2r/core/abstractions/llm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,27 @@
"""Abstractions for the LLM model."""

from typing import Optional

from openai.types.chat import ChatCompletion, ChatCompletionChunk
from pydantic import BaseModel

LLMChatCompletion = ChatCompletion
LLMChatCompletionChunk = ChatCompletionChunk


class GenerationConfig(BaseModel):
temperature: float = 0.1
top_p: float = 1.0
top_k: int = 100
max_tokens_to_sample: int = 1_024
model: str = "gpt-4o"
stream: bool = False
functions: Optional[list[dict]] = None
skip_special_tokens: bool = False
stop_token: Optional[str] = None
num_beams: int = 1
do_sample: bool = True
# Additional args to pass to the generation config
generate_with_chat: bool = False
add_generation_kwargs: Optional[dict] = {}
api_base: Optional[str] = None
58 changes: 52 additions & 6 deletions r2r/core/abstractions/search.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,81 @@
"""Abstractions for search functionality."""

import uuid
from typing import Any, Optional
from typing import Any, Dict, List, Optional

from pydantic import BaseModel
from pydantic import BaseModel, Field

from .llm import GenerationConfig

class SearchRequest(BaseModel):

class VectorSearchRequest(BaseModel):
"""Request for a search operation."""

query: str
limit: int
filters: Optional[dict[str, Any]] = None


class SearchResult(BaseModel):
class VectorSearchResult(BaseModel):
"""Result of a search operation."""

id: uuid.UUID
score: float
metadata: dict[str, Any]

def __str__(self) -> str:
return f"SearchResult(id={self.id}, score={self.score}, metadata={self.metadata})"
return f"VectorSearchResult(id={self.id}, score={self.score}, metadata={self.metadata})"

def __repr__(self) -> str:
return f"SearchResult(id={self.id}, score={self.score}, metadata={self.metadata})"
return f"VectorSearchResult(id={self.id}, score={self.score}, metadata={self.metadata})"

def dict(self) -> dict:
return {
"id": self.id,
"score": self.score,
"metadata": self.metadata,
}


class KGSearchRequest(BaseModel):
"""Request for a knowledge graph search operation."""

query: str


KGSearchResult = List[List[Dict[str, Any]]]


class AggregateSearchResult(BaseModel):
"""Result of an aggregate search operation."""

vector_search_results: Optional[List[VectorSearchResult]]
kg_search_results: Optional[KGSearchResult] = None

def __str__(self) -> str:
return f"AggregateSearchResult(vector_search_results={self.vector_search_results}, kg_search_results={self.kg_search_results})"

def __repr__(self) -> str:
return f"AggregateSearchResult(vector_search_results={self.vector_search_results}, kg_search_results={self.kg_search_results})"

def dict(self) -> dict:
return {
"vector_search_results": [
result.dict() for result in self.vector_search_results
],
"kg_search_results": self.kg_search_results,
}


class VectorSearchSettings(BaseModel):
use_vector_search: bool = True
search_filters: Optional[dict[str, Any]] = Field(default_factory=dict)
search_limit: int = 10
do_hybrid_search: bool = False


class KGSearchSettings(BaseModel):
use_kg: bool = False
agent_generation_config: Optional[GenerationConfig] = Field(
default_factory=GenerationConfig
)
Loading
Loading