Skip to content

Commit

Permalink
Feature/dev merge rebased (#329)
Browse files Browse the repository at this point in the history
* Update local_rag.mdx

* Update llms.mdx (#322)

* update the default embd. (#310)

* Feature/add agent provider (#317)

* update pipeline (#315)

* Update CONTRIBUTING.md

* Delete CONTRIBUTOR.md

* Adding agent provider

* Feature/modify get all uniq values (#325)

* refine

* update

* format

* fix

* dev merge
  • Loading branch information
emrgnt-cmplxty committed Apr 23, 2024
1 parent ccfaf7c commit 9ea380a
Show file tree
Hide file tree
Showing 19 changed files with 121 additions and 96 deletions.
7 changes: 4 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ env/
node_modules


**/*.sqlite
**/*.sqlite3
**/*.sqlite*
**/*.sqlite3*

qdrant_storage/
qdrant_storage/
r2r/examples/data/*
2 changes: 1 addition & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ eval = ["parea-ai"]
ionic = ["ionic-api-sdk"]
reducto = ["boto3"]
exa = ["exa-py"]
sentence_transformers = ["sentence-transformers", "tokenizers"]
local_llm = ["llama-cpp-python", "sentence-transformers", "tokenizers"]
sentence-transformers-only = ["sentence-transformers"]
local-llm = ["llama-cpp-python", "sentence-transformers"]
all = ["tiktoken", "datasets", "qdrant_client", "psycopg2-binary", "sentry-sdk", "parea-ai", "boto3", "exa-py", "llama-cpp-python", "ionic-api-sdk"]

[tool.poetry.group.dev.dependencies]
Expand All @@ -78,4 +78,4 @@ exclude = 'playground/.*|deprecated/.*|dump/.*|docs/source|vecs/*'

[[tool.mypy.overrides]]
module = "yaml"
ignore_missing_imports = true
ignore_missing_imports = true
6 changes: 5 additions & 1 deletion r2r/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from .abstractions.document import DocumentPage
from .abstractions.output import RAGPipelineOutput
from .abstractions.output import (
LLMChatCompletion,
LLMChatCompletionChunk,
RAGPipelineOutput,
)
from .abstractions.vector import VectorEntry, VectorSearchResult
from .agent.base import Agent
from .pipelines.embedding import EmbeddingPipeline
Expand Down
7 changes: 5 additions & 2 deletions r2r/core/abstractions/output.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
from typing import Optional

from openai.types.chat import ChatCompletion
from openai.types.chat import ChatCompletion, ChatCompletionChunk

LLMChatCompletion = ChatCompletion
LLMChatCompletionChunk = ChatCompletionChunk


class RAGPipelineOutput:
def __init__(
self,
search_results: list,
context: Optional[str] = None,
completion: Optional[ChatCompletion] = None,
completion: Optional[LLMChatCompletion] = None,
):
self.search_results = search_results
self.context = context
Expand Down
4 changes: 3 additions & 1 deletion r2r/core/agent/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
from abc import ABC, abstractmethod
from typing import Sequence

from ..abstractions.output import LLMChatCompletion
from ..abstractions.tool import Tool
from ..providers.llm import LLMChatCompletion, LLMProvider
from ..providers.llm import LLMProvider
from ..providers.prompt import PromptProvider

logger = logging.getLogger(__name__)
Expand All @@ -22,6 +23,7 @@ class Agent(ABC):
def __init__(
self, prompt_provider: PromptProvider, llm_provider: LLMProvider
) -> None:
self.llm_provider = llm_provider
self.prompt_provider = prompt_provider

self._initialized = False
Expand Down
7 changes: 3 additions & 4 deletions r2r/core/pipelines/rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@
from abc import abstractmethod
from typing import Any, Generator, Optional, Union

from openai.types.chat import ChatCompletion

from ..abstractions.output import RAGPipelineOutput
from ..abstractions.output import RAGPipelineOutput, LLMChatCompletion
from ..providers.embedding import EmbeddingProvider
from ..providers.llm import GenerationConfig, LLMProvider
from ..providers.prompt import PromptProvider
Expand Down Expand Up @@ -120,7 +119,7 @@ def generate_completion(
prompt: str,
generation_config: GenerationConfig,
conversation: list[dict] = None,
) -> Union[Generator[str, None, None], ChatCompletion]:
) -> Union[Generator[str, None, None], LLMChatCompletion]:
"""
Generates a completion based on the prompt.
"""
Expand Down Expand Up @@ -173,7 +172,7 @@ def run(
generation_config: Optional[GenerationConfig] = None,
*args,
**kwargs,
) -> Union[RAGPipelineOutput, ChatCompletion]:
) -> Union[RAGPipelineOutput, LLMChatCompletion]:
"""
Runs the completion pipeline for non-streaming execution.
"""
Expand Down
10 changes: 3 additions & 7 deletions r2r/core/providers/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,9 @@
from dataclasses import dataclass, field
from typing import List, Optional

from openai.types.chat import ChatCompletion, ChatCompletionChunk

from ..abstractions.output import LLMChatCompletion, LLMChatCompletionChunk
from .base import Provider, ProviderConfig

LLMChatCompletion = ChatCompletion
LLMChatCompletionChunk = ChatCompletionChunk


@dataclass
class GenerationConfig(ABC):
Expand Down Expand Up @@ -69,7 +65,7 @@ def get_completion(
messages: list[dict],
generation_config: GenerationConfig,
**kwargs,
) -> ChatCompletion:
) -> LLMChatCompletion:
"""Abstract method to get a chat completion from the provider."""
pass

Expand All @@ -79,6 +75,6 @@ def get_completion_stream(
messages: list[dict],
generation_config: GenerationConfig,
**kwargs,
) -> ChatCompletionChunk:
) -> LLMChatCompletionChunk:
"""Abstract method to get a completion stream from the provider."""
pass
6 changes: 3 additions & 3 deletions r2r/embeddings/setence_transformer/base.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import logging

from r2r.core import EmbeddingProvider, VectorSearchResult
from r2r.core import EmbeddingProvider, EmbeddingConfig, VectorSearchResult

logger = logging.getLogger(__name__)


class SentenceTransformerEmbeddingProvider(EmbeddingProvider):
def __init__(
self,
config: dict,
config: EmbeddingConfig,
):
super().__init__(config)
logger.info(
Expand Down Expand Up @@ -45,7 +45,7 @@ def __init__(
config, EmbeddingProvider.PipelineStage.RERANK
)

def _init_model(self, config: dict, stage: str):
def _init_model(self, config: EmbeddingConfig, stage: str):
stage_name = stage.name.lower()
model = config.get(f"{stage_name}_model", None)
dimension = config.get(f"{stage_name}_dimension", None)
Expand Down
2 changes: 1 addition & 1 deletion r2r/examples/clients/run_qna_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


class QnAClient:
def __init__(self, base_url="http://localhost:8001", user_id=None):
def __init__(self, base_url="http://localhost:8000", user_id=None):
self.client = R2RClient(base_url)
if not user_id:
self.user_id = generate_id_from_label("user_id")
Expand Down
20 changes: 10 additions & 10 deletions r2r/examples/configs/local_llama_cpp.json
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
{
"language_model": {
"provider": "llama-cpp"
"provider": "litellm"
},
"vector_database": {
"provider": "qdrant",
"provider": "local",
"collection_name": "demo_vecs"
},
"ingestion":{
"provider": "local",
"text_splitter": {
"type": "recursive_character",
"chunk_size": 512,
"chunk_overlap": 20
}
"provider": "llama-cpp"
},
"embedding": {
"provider": "sentence-transformers",
"search_model": "mixedbread-ai/mxbai-embed-large-v1",
"search_dimension": 512,
"batch_size": 32
"batch_size": 32,
"text_splitter": {
"type": "recursive_character",
"chunk_size": 512,
"chunk_overlap": 20
}
},
"evals": {
"provider": "none",
"provider": "parea",
"frequency": 0.0
},
"app": {
Expand Down
16 changes: 8 additions & 8 deletions r2r/examples/configs/local_ollama.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,21 @@
"collection_name": "demo_vecs"
},
"ingestion":{
"provider": "local",
"text_splitter": {
"type": "recursive_character",
"chunk_size": 512,
"chunk_overlap": 20
}
"provider": "local"
},
"embedding": {
"provider": "sentence-transformers",
"search_model": "mixedbread-ai/mxbai-embed-large-v1",
"search_dimension": 512,
"batch_size": 32
"batch_size": 32,
"text_splitter": {
"type": "recursive_character",
"chunk_size": 512,
"chunk_overlap": 20
}
},
"evals": {
"provider": "none",
"provider": "parea",
"frequency": 0.0
},
"app": {
Expand Down
16 changes: 8 additions & 8 deletions r2r/examples/configs/local_ollama_qdrant.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,21 @@
"collection_name": "demo_vecs"
},
"ingestion":{
"provider": "local",
"text_splitter": {
"type": "recursive_character",
"chunk_size": 512,
"chunk_overlap": 20
}
"provider": "local"
},
"embedding": {
"provider": "sentence-transformers",
"search_model": "mixedbread-ai/mxbai-embed-large-v1",
"search_dimension": 512,
"batch_size": 32
"batch_size": 32,
"text_splitter": {
"type": "recursive_character",
"chunk_size": 512,
"chunk_overlap": 20
}
},
"evals": {
"provider": "none",
"provider": "parea",
"frequency": 0.0
},
"app": {
Expand Down
34 changes: 17 additions & 17 deletions r2r/examples/configs/local_ollama_with_rerank.json
Original file line number Diff line number Diff line change
@@ -1,32 +1,32 @@
{
"language_model": {
"provider": "litellm"
},
"vector_database": {
"provider": "local",
"collection_name": "demo_vecs"
},
"ingestion":{
"provider": "local",
"text_splitter": {
"type": "recursive_character",
"chunk_size": 512,
"chunk_overlap": 20
}
},
"embedding": {
"provider": "sentence-transformers",
"search_model": "mixedbread-ai/mxbai-embed-large-v1",
"search_dimension": 512,
"rerank_model": "jinaai/jina-reranker-v1-turbo-en",
"rerank_dimension": 384,
"rerank_transformer_type": "CrossEncoder",
"batch_size": 32
"batch_size": 32,
"text_splitter": {
"type": "recursive_character",
"chunk_size": 512,
"chunk_overlap": 20
}
},
"evals": {
"provider": "none",
"frequency": 0.0
},
"language_model": {
"provider": "llama-cpp"
},
"vector_database": {
"provider": "qdrant",
"collection_name": "demo_vecs"
},
"ingestion":{
"provider": "local"
},
"app": {
"max_logs": 100,
"max_file_size_in_mb": 100
Expand All @@ -36,4 +36,4 @@
"collection_name": "demo_logs",
"level": "INFO"
}
}
}
Loading

0 comments on commit 9ea380a

Please sign in to comment.