From fdb847524ccd0469f0b652a4a65e08f32a6b938b Mon Sep 17 00:00:00 2001 From: Sudhanshu Pandey Date: Mon, 25 Mar 2024 18:26:30 +0000 Subject: [PATCH 01/15] fix: upate the basic example client to communiate with server on the correct code --- r2r/examples/basic/run_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r2r/examples/basic/run_client.py b/r2r/examples/basic/run_client.py index 9f1aefea..4a4bcd0f 100644 --- a/r2r/examples/basic/run_client.py +++ b/r2r/examples/basic/run_client.py @@ -5,7 +5,7 @@ from r2r.core.utils import generate_id_from_label # Initialize the client with the base URL of your API -base_url = "http://localhost:8010" +base_url = "http://localhost:8000" client = R2RClient(base_url) print("Upserting entry to remote db...") From da8f46c33c02dc03c20053ee7a2ca1def65836ec Mon Sep 17 00:00:00 2001 From: Sudhanshu Pandey Date: Mon, 25 Mar 2024 21:42:25 -0400 Subject: [PATCH 02/15] chore: add lanceDB as optional dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 94c863a6..9889d841 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ sentry-sdk = {version = "^1.40.4", optional = true} deepeval = {version ="^0.20.88", optional = true} parea-ai = {version = "^0.2.86", optional = true} ionic-api-sdk = {version = "0.9.3", optional = true} +lancedb = {version = "^0.6.5", optional = true} [tool.poetry.extras] parsing = ["bs4", "pypdf"] From 1fdf426f2eeafde59e3f64552b46ddf790e4cf59 Mon Sep 17 00:00:00 2001 From: Sudhanshu Pandey Date: Mon, 25 Mar 2024 21:45:28 -0400 Subject: [PATCH 03/15] chore: add lanceDB as optional dependency part 2 --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9889d841..25dffcca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,8 @@ local_vectordb = ["numpy", "scikit-learn"] monitoring = ["sentry-sdk"] eval = ["deepeval", "parea-ai"] ionic = ["ionic-api-sdk"] -all = ["bs4", "pypdf", "tiktoken", "datasets", "qdrant_client", "psycopg2-binary", "numpy", "scikit-learn", "sentry-sdk", "protobuf", "deepeval", "parea-ai", "ionic"] +lancedb = ["lancedb"] +all = ["bs4", "pypdf", "tiktoken", "datasets", "qdrant_client", "psycopg2-binary", "numpy", "scikit-learn", "sentry-sdk", "protobuf", "deepeval", "parea-ai", "ionic", "lancedb"] [tool.poetry.group.dev.dependencies] black = "^23.3.0" From 0d085e0a0710d44f3bc64956649924da5d74603d Mon Sep 17 00:00:00 2001 From: Sudhanshu Pandey Date: Tue, 26 Mar 2024 08:25:12 -0400 Subject: [PATCH 04/15] feat: Add support for LanceDB as Vector DB provider --- r2r/core/providers/vector_db.py | 2 +- r2r/vector_dbs/lancedb/base.py | 54 +++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 r2r/vector_dbs/lancedb/base.py diff --git a/r2r/core/providers/vector_db.py b/r2r/core/providers/vector_db.py index 59f69e16..4fa7a310 100644 --- a/r2r/core/providers/vector_db.py +++ b/r2r/core/providers/vector_db.py @@ -64,7 +64,7 @@ def to_dict(self) -> dict: class VectorDBProvider(ABC): - supported_providers = ["local", "pgvector", "qdrant"] + supported_providers = ["local", "pgvector", "qdrant", "lancedb"] def __init__(self, provider: str): if provider not in VectorDBProvider.supported_providers: diff --git a/r2r/vector_dbs/lancedb/base.py b/r2r/vector_dbs/lancedb/base.py new file mode 100644 index 00000000..5ba815e4 --- /dev/null +++ b/r2r/vector_dbs/lancedb/base.py @@ -0,0 +1,54 @@ +import logging +import os +from typing import Optional, Union + +from r2r.core import VectorDBProvider, VectorEntry, VectorSearchResult + +logger = logging.getLogger(__name__) + + +class LanceDB(VectorDBProvider): + def __init__( + self, provider: str = "lancedb", db_path: Optional[str] = None + ) -> None: + logger.info("Initializing `LanceDB` to store and retrieve embeddings.") + + super().__init__(provider) + if provider != "lancedb": + raise ValueError( + "LanceDB must be initialized with provider `lancedb`." + ) + try: + import lancedb + except ImportError: + raise ValueError( + f"Error, `lancedb` is not installed. Please install it using `pip install lancedb`." + ) + self.db_path = db_path + try: + self.client = lancedb.connect(db_path=self.db_path) + except Exception as e: + raise ValueError( + f"Error {e} occurred while attempting to connect to the lancedb provider." + ) + self.collection_name: Optional[str] = None + + def initialize_collection( + self, collection_name: str, dimension: int + ) -> None: + self.collection_name = collection_name + try: + import pyarrow # TODO ADD Dependency + except ImportError: + raise ValueError( + f"Error, `pyarrow` is not installed. Please install it using `pip install pyarrow`." + ) + try: + result = self.client.create_table( + name=f"{collection_name}", on_bad_vectors="error", schema=[] + ) + except Exception: + # TODO - Handle more appropriately - create collection fails when it already exists + # https://lancedb.github.io/lancedb/python/python/#lancedb.db.DBConnection.create_table + print(Exception) + pass From 53a3dd3c7ec3ac85c2d58bf7f2f1c338c4c3c089 Mon Sep 17 00:00:00 2001 From: Sudhanshu Pandey Date: Tue, 26 Mar 2024 13:25:18 -0400 Subject: [PATCH 05/15] chore: added pyarrow as the dependency --- pyproject.toml | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 25dffcca..acc845db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,19 +30,20 @@ uvicorn = "^0.27.0.post1" vecs = "^0.4.0" # Optional dependencies -bs4 = {version = "^0.0.2", optional = true} -pypdf = {version = "^4.0.2", optional = true} -tiktoken = {version = "^0.5.2", optional = true} -datasets = {version = "^2.16.1", optional = true} -qdrant_client = {version = "^1.7.0", optional = true} -psycopg2-binary = {version = "^2.9.9", optional = true} -numpy = {version = "^1.26.4", optional = true} -scikit-learn = {version = "^1.4.1.post1", optional = true} -sentry-sdk = {version = "^1.40.4", optional = true} -deepeval = {version ="^0.20.88", optional = true} -parea-ai = {version = "^0.2.86", optional = true} -ionic-api-sdk = {version = "0.9.3", optional = true} -lancedb = {version = "^0.6.5", optional = true} +bs4 = { version = "^0.0.2", optional = true } +pypdf = { version = "^4.0.2", optional = true } +tiktoken = { version = "^0.5.2", optional = true } +datasets = { version = "^2.16.1", optional = true } +qdrant_client = { version = "^1.7.0", optional = true } +psycopg2-binary = { version = "^2.9.9", optional = true } +numpy = { version = "^1.26.4", optional = true } +scikit-learn = { version = "^1.4.1.post1", optional = true } +sentry-sdk = { version = "^1.40.4", optional = true } +deepeval = { version = "^0.20.88", optional = true } +parea-ai = { version = "^0.2.86", optional = true } +ionic-api-sdk = { version = "0.9.3", optional = true } +lancedb = { version = "^0.6.5", optional = true } +pyarrow = { version = "^15.0.2", optional = true } [tool.poetry.extras] parsing = ["bs4", "pypdf"] From 081acc5a57de3a513635ddc84602c337341695d1 Mon Sep 17 00:00:00 2001 From: Sudhanshu Pandey Date: Tue, 26 Mar 2024 15:02:23 -0400 Subject: [PATCH 06/15] chore: added implementation for lancedb initialize_collection --- r2r/vector_dbs/lancedb/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/r2r/vector_dbs/lancedb/base.py b/r2r/vector_dbs/lancedb/base.py index 5ba815e4..ba1016ec 100644 --- a/r2r/vector_dbs/lancedb/base.py +++ b/r2r/vector_dbs/lancedb/base.py @@ -1,13 +1,15 @@ import logging import os -from typing import Optional, Union +from typing import Optional, Any, Union from r2r.core import VectorDBProvider, VectorEntry, VectorSearchResult +from lancedb.pydantic import LanceModel, Vector logger = logging.getLogger(__name__) class LanceDB(VectorDBProvider): + # TODO enable LanceDB provider to support lanceDB Cloud def __init__( self, provider: str = "lancedb", db_path: Optional[str] = None ) -> None: From 7f63a6fb60063c0698a25c8f934f9e884babd6ba Mon Sep 17 00:00:00 2001 From: Sudhanshu Pandey Date: Tue, 26 Mar 2024 16:34:24 -0400 Subject: [PATCH 07/15] chore: updated the database schema for lancedb --- r2r/vector_dbs/lancedb/base.py | 55 +++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/r2r/vector_dbs/lancedb/base.py b/r2r/vector_dbs/lancedb/base.py index ba1016ec..e1a1abda 100644 --- a/r2r/vector_dbs/lancedb/base.py +++ b/r2r/vector_dbs/lancedb/base.py @@ -1,9 +1,7 @@ import logging -import os -from typing import Optional, Any, Union +from typing import Optional -from r2r.core import VectorDBProvider, VectorEntry, VectorSearchResult -from lancedb.pydantic import LanceModel, Vector +from r2r.core import VectorDBProvider, VectorEntry logger = logging.getLogger(__name__) @@ -16,19 +14,23 @@ def __init__( logger.info("Initializing `LanceDB` to store and retrieve embeddings.") super().__init__(provider) + if provider != "lancedb": raise ValueError( "LanceDB must be initialized with provider `lancedb`." ) + try: import lancedb except ImportError: raise ValueError( f"Error, `lancedb` is not installed. Please install it using `pip install lancedb`." ) + self.db_path = db_path + try: - self.client = lancedb.connect(db_path=self.db_path) + self.client = lancedb.connect(uri=self.db_path) except Exception as e: raise ValueError( f"Error {e} occurred while attempting to connect to the lancedb provider." @@ -39,18 +41,49 @@ def initialize_collection( self, collection_name: str, dimension: int ) -> None: self.collection_name = collection_name + try: - import pyarrow # TODO ADD Dependency + import pyarrow except ImportError: raise ValueError( f"Error, `pyarrow` is not installed. Please install it using `pip install pyarrow`." ) + + table_schema = pyarrow.schema( + [ + pyarrow.field("id", pyarrow.string()), + pyarrow.field( + "vector", pyarrow.list_(pyarrow.float32(), dimension) + ), + # TODO Handle storing metadata + ] + ) + try: - result = self.client.create_table( - name=f"{collection_name}", on_bad_vectors="error", schema=[] + self.client.create_table( + name=f"{collection_name}", + on_bad_vectors="error", + schema=table_schema, ) - except Exception: + except Exception as e: # TODO - Handle more appropriately - create collection fails when it already exists - # https://lancedb.github.io/lancedb/python/python/#lancedb.db.DBConnection.create_table - print(Exception) pass + + def copy(self, entry: VectorEntry, commit=True) -> None: + raise NotImplementedError( + "LanceDB does not support the `copy` method." + ) + + def upsert(self, entry: VectorEntry, commit=True) -> None: + if self.collection_name is None: + raise ValueError( + "Please call `initialize_collection` before attempting to run `upsert`." + ) + self.client.open_table(self.collection_name).add( + { + "vector": entry.vector, + "id": entry.id, + # TODO ADD metadata storage + }, + mode="overwrite", + ) From e378820d945211013db8f03427d77e979b000ae8 Mon Sep 17 00:00:00 2001 From: Sudhanshu Pandey Date: Tue, 26 Mar 2024 16:36:26 -0400 Subject: [PATCH 08/15] chore: update dependency for pyarrow --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index acc845db..5193b30b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,7 +56,8 @@ monitoring = ["sentry-sdk"] eval = ["deepeval", "parea-ai"] ionic = ["ionic-api-sdk"] lancedb = ["lancedb"] -all = ["bs4", "pypdf", "tiktoken", "datasets", "qdrant_client", "psycopg2-binary", "numpy", "scikit-learn", "sentry-sdk", "protobuf", "deepeval", "parea-ai", "ionic", "lancedb"] +pyarrow = ["pyarrow"] +all = ["bs4", "pypdf", "tiktoken", "datasets", "qdrant_client", "psycopg2-binary", "numpy", "scikit-learn", "sentry-sdk", "protobuf", "deepeval", "parea-ai", "ionic", "lancedb", "pyarrow"] [tool.poetry.group.dev.dependencies] black = "^23.3.0" From a76bf1600f96fde014cd4f89f03ad3c74ce4e2f3 Mon Sep 17 00:00:00 2001 From: Sudhanshu Pandey Date: Thu, 4 Apr 2024 15:19:02 -0400 Subject: [PATCH 09/15] chore: support lancedb selection from config.json --- r2r/main/factory.py | 30 +++++++++++++++++------------- r2r/vector_dbs/__init__.py | 3 ++- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/r2r/main/factory.py b/r2r/main/factory.py index a38e1cc6..5b8a3052 100644 --- a/r2r/main/factory.py +++ b/r2r/main/factory.py @@ -34,6 +34,10 @@ def get_vector_db(database_config: dict[str, Any]): from r2r.vector_dbs import LocalVectorDB return LocalVectorDB() + elif database_config["provider"] == "lancedb": + from r2r.vector_dbs import LanceDB + + return LanceDB() @staticmethod def get_embeddings_provider(embedding_config: dict[str, Any]): @@ -70,28 +74,28 @@ def get_text_splitter(text_splitter_config: dict[str, Any]): @staticmethod def create_pipeline( - config: R2RConfig, - db=None, - embeddings_provider=None, - llm=None, - text_splitter=None, - ingestion_pipeline_impl=BasicIngestionPipeline, - embedding_pipeline_impl=BasicEmbeddingPipeline, - rag_pipeline_impl=BasicRAGPipeline, - eval_pipeline_impl=BasicEvalPipeline, - app_fn=create_app, + config: R2RConfig, + db=None, + embeddings_provider=None, + llm=None, + text_splitter=None, + ingestion_pipeline_impl=BasicIngestionPipeline, + embedding_pipeline_impl=BasicEmbeddingPipeline, + rag_pipeline_impl=BasicRAGPipeline, + eval_pipeline_impl=BasicEvalPipeline, + app_fn=create_app, ): logging.basicConfig(level=config.logging_database["level"]) embeddings_provider = ( - embeddings_provider - or E2EPipelineFactory.get_embeddings_provider(config.embedding) + embeddings_provider + or E2EPipelineFactory.get_embeddings_provider(config.embedding) ) embedding_model = config.embedding["model"] embedding_dimension = config.embedding["dimension"] embedding_batch_size = config.embedding["batch_size"] - db = db or E2EPipelineFactory.get_vector_db(config.vector_database) + db = E2EPipelineFactory.get_vector_db(config.vector_database) collection_name = config.vector_database["collection_name"] db.initialize_collection(collection_name, embedding_dimension) diff --git a/r2r/vector_dbs/__init__.py b/r2r/vector_dbs/__init__.py index 419a0431..6f7ad4ac 100644 --- a/r2r/vector_dbs/__init__.py +++ b/r2r/vector_dbs/__init__.py @@ -1,5 +1,6 @@ from .local.base import LocalVectorDB from .pg_vector.base import PGVectorDB from .qdrant.base import QdrantDB +from .lancedb.base import LanceDB -__all__ = ["LocalVectorDB", "PGVectorDB", "QdrantDB"] +__all__ = ["LocalVectorDB", "PGVectorDB", "QdrantDB", "LanceDB"] From 5c0f8f0cde495d8b653b049dba4cd686b22f507e Mon Sep 17 00:00:00 2001 From: Sudhanshu Pandey Date: Thu, 4 Apr 2024 15:23:33 -0400 Subject: [PATCH 10/15] Update factory.py --- r2r/main/factory.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/r2r/main/factory.py b/r2r/main/factory.py index c06d9a4b..fc02c4ac 100644 --- a/r2r/main/factory.py +++ b/r2r/main/factory.py @@ -103,14 +103,14 @@ def create_pipeline( logging.basicConfig(level=config.logging_database["level"]) embeddings_provider = ( - embeddings_provider - or E2EPipelineFactory.get_embeddings_provider(config.embedding) + embeddings_provider + or E2EPipelineFactory.get_embeddings_provider(config.embedding) ) embedding_model = config.embedding["model"] embedding_dimension = config.embedding["dimension"] embedding_batch_size = config.embedding["batch_size"] - db = E2EPipelineFactory.get_vector_db(config.vector_database) + db = db or E2EPipelineFactory.get_vector_db(config.vector_database) collection_name = config.vector_database["collection_name"] db.initialize_collection(collection_name, embedding_dimension) From 9c276cfb47e42659ac2c610fc22c351e1a2c50a5 Mon Sep 17 00:00:00 2001 From: Sudhanshu Pandey Date: Thu, 4 Apr 2024 16:30:04 -0400 Subject: [PATCH 11/15] chore: add skeleton code for lancedb provider support --- r2r/vector_dbs/lancedb/base.py | 40 +++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/r2r/vector_dbs/lancedb/base.py b/r2r/vector_dbs/lancedb/base.py index e1a1abda..9d476895 100644 --- a/r2r/vector_dbs/lancedb/base.py +++ b/r2r/vector_dbs/lancedb/base.py @@ -1,7 +1,7 @@ import logging -from typing import Optional +from typing import Optional, Union -from r2r.core import VectorDBProvider, VectorEntry +from r2r.core import VectorDBProvider, VectorEntry, VectorSearchResult logger = logging.getLogger(__name__) @@ -9,7 +9,7 @@ class LanceDB(VectorDBProvider): # TODO enable LanceDB provider to support lanceDB Cloud def __init__( - self, provider: str = "lancedb", db_path: Optional[str] = None + self, provider: str = "lancedb", db_path: Optional[str] = None ) -> None: logger.info("Initializing `LanceDB` to store and retrieve embeddings.") @@ -28,7 +28,6 @@ def __init__( ) self.db_path = db_path - try: self.client = lancedb.connect(uri=self.db_path) except Exception as e: @@ -38,7 +37,7 @@ def __init__( self.collection_name: Optional[str] = None def initialize_collection( - self, collection_name: str, dimension: int + self, collection_name: str, dimension: int ) -> None: self.collection_name = collection_name @@ -87,3 +86,34 @@ def upsert(self, entry: VectorEntry, commit=True) -> None: }, mode="overwrite", ) + + def upsert_entries( + self, entries: list[VectorEntry], commit: bool = True + ) -> None: + pass + + def search( + self, + query_vector: list[float], + filters: dict[str, Union[bool, int, str]] = {}, + limit: int = 10, + *args, + **kwargs, + ) -> list[VectorSearchResult]: + pass + + def create_index(self, index_type, column_name, index_options): + pass + + def close(self): + pass + + def filtered_deletion( + self, key: str, value: Union[bool, int, str] + ) -> None: + pass + + def get_all_unique_values( + self, collection_name: str, metadata_field: str, filters: dict = {} + ) -> list: + pass From b371e69c0cbb4707361712804890448da1cf1408 Mon Sep 17 00:00:00 2001 From: Sudhanshu Pandey Date: Mon, 8 Apr 2024 12:01:07 -0400 Subject: [PATCH 12/15] chore: update .env.example for lancedb --- .env.example | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.env.example b/.env.example index 90bec0aa..a611afb2 100644 --- a/.env.example +++ b/.env.example @@ -16,6 +16,9 @@ LOCAL_DB_PATH=local.sqlite ## QDRANT_PORT=your_qdrant_port ## QDRANT_API_KEY=your_qdrant_api_key +# ## lancedb +## LANCEDB_URI=your_lancedb_uri + # LLM Providers ## openai From 92fb033769772511f97ac1261342ba489beb59fa Mon Sep 17 00:00:00 2001 From: Sudhanshu Pandey Date: Mon, 8 Apr 2024 12:02:13 -0400 Subject: [PATCH 13/15] chore: update lancedb implementation to set db uri from env --- r2r/vector_dbs/lancedb/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/r2r/vector_dbs/lancedb/base.py b/r2r/vector_dbs/lancedb/base.py index 9d476895..c96636cf 100644 --- a/r2r/vector_dbs/lancedb/base.py +++ b/r2r/vector_dbs/lancedb/base.py @@ -1,4 +1,5 @@ import logging +import os from typing import Optional, Union from r2r.core import VectorDBProvider, VectorEntry, VectorSearchResult @@ -29,7 +30,7 @@ def __init__( self.db_path = db_path try: - self.client = lancedb.connect(uri=self.db_path) + self.client = lancedb.connect(uri=self.db_path or os.environ.get("LANCEDB_URI")) except Exception as e: raise ValueError( f"Error {e} occurred while attempting to connect to the lancedb provider." From a8b0fe6aee6cf895aee93ec8a399ffdb8281af18 Mon Sep 17 00:00:00 2001 From: Sudhanshu Pandey Date: Mon, 8 Apr 2024 12:02:47 -0400 Subject: [PATCH 14/15] feat: update lancedb implementation to support upsert_entries and search --- r2r/vector_dbs/lancedb/base.py | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/r2r/vector_dbs/lancedb/base.py b/r2r/vector_dbs/lancedb/base.py index c96636cf..7d2b63a4 100644 --- a/r2r/vector_dbs/lancedb/base.py +++ b/r2r/vector_dbs/lancedb/base.py @@ -91,7 +91,20 @@ def upsert(self, entry: VectorEntry, commit=True) -> None: def upsert_entries( self, entries: list[VectorEntry], commit: bool = True ) -> None: - pass + if self.collection_name is None: + raise ValueError( + "Please call `initialize_collection` before attempting to run `upsert_entries`." + ) + + self.client.open_table(self.collection_name).add( + [{"vector": entry.vector, + "id": entry.id + # TODO ADD metadata storage + } + for entry in entries], + mode="overwrite", + ) + def search( self, @@ -101,7 +114,22 @@ def search( *args, **kwargs, ) -> list[VectorSearchResult]: - pass + if self.collection_name is None: + raise ValueError( + "Please call `initialize_collection` before attempting to run `search`." + ) + + results = self.client.open_table(self.collection_name).search( + query=query_vector, + # TODO implement metadata filter + ).limit(limit).to_list() + + return [ + VectorSearchResult( + str(idx), result.get("_distance"), {} # TODO Handle metadata + ) + for idx, result in enumerate(results) + ] def create_index(self, index_type, column_name, index_options): pass From 024c98145b4ac52df87343498bb7797baf694ddb Mon Sep 17 00:00:00 2001 From: Sudhanshu Pandey Date: Mon, 8 Apr 2024 12:44:17 -0400 Subject: [PATCH 15/15] feat: update lancedb implementation to support lancedb cloud --- .env.example | 4 +++- r2r/vector_dbs/lancedb/base.py | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.env.example b/.env.example index a611afb2..4b829165 100644 --- a/.env.example +++ b/.env.example @@ -17,7 +17,9 @@ LOCAL_DB_PATH=local.sqlite ## QDRANT_API_KEY=your_qdrant_api_key # ## lancedb -## LANCEDB_URI=your_lancedb_uri +## LANCEDB_URI=your_lancedb_uri_local_or_cloud +## LANCEDB_API_KEY=your_lancedb_cloud_api_key +## LANCEDB_REGION=your_lancedb_cloud_region # LLM Providers diff --git a/r2r/vector_dbs/lancedb/base.py b/r2r/vector_dbs/lancedb/base.py index 7d2b63a4..8585d1a5 100644 --- a/r2r/vector_dbs/lancedb/base.py +++ b/r2r/vector_dbs/lancedb/base.py @@ -8,7 +8,6 @@ class LanceDB(VectorDBProvider): - # TODO enable LanceDB provider to support lanceDB Cloud def __init__( self, provider: str = "lancedb", db_path: Optional[str] = None ) -> None: @@ -30,7 +29,7 @@ def __init__( self.db_path = db_path try: - self.client = lancedb.connect(uri=self.db_path or os.environ.get("LANCEDB_URI")) + self.client = lancedb.connect(uri=self.db_path or os.environ.get("LANCEDB_URI"), api_key=os.environ.get("LANCEDB_API_KEY") or None, region=os.environ.get("LANCEDB_REGION") or None) except Exception as e: raise ValueError( f"Error {e} occurred while attempting to connect to the lancedb provider."