From 07a893f25580bf80a61a5a5211a32f2fd506e7c4 Mon Sep 17 00:00:00 2001 From: ThomasRochefortB Date: Mon, 23 Dec 2024 15:06:22 -0500 Subject: [PATCH 1/5] Converting `add_texts_and_embeddings` to async --- paperqa/llms.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/paperqa/llms.py b/paperqa/llms.py index 92083601a..ca1b97b22 100644 --- a/paperqa/llms.py +++ b/paperqa/llms.py @@ -72,7 +72,8 @@ def __len__(self) -> int: return len(self.texts_hashes) @abstractmethod - def add_texts_and_embeddings(self, texts: Iterable[Embeddable]) -> None: + async def add_texts_and_embeddings(self, texts: Iterable[Embeddable]) -> None: + """Add texts and their embeddings to the store.""" self.texts_hashes.update(hash(t) for t in texts) @abstractmethod @@ -198,8 +199,8 @@ def clear(self) -> None: self._embeddings_matrix = None self._texts_filter = None - def add_texts_and_embeddings(self, texts: Iterable[Embeddable]) -> None: - super().add_texts_and_embeddings(texts) + async def add_texts_and_embeddings(self, texts: Iterable[Embeddable]) -> None: + await super().add_texts_and_embeddings(texts) self.texts.extend(texts) self._embeddings_matrix = np.array([t.embedding for t in self.texts]) @@ -328,8 +329,8 @@ def clear(self) -> None: ) self._point_ids = None - def add_texts_and_embeddings(self, texts: Iterable[Embeddable]) -> None: - super().add_texts_and_embeddings(texts) + async def add_texts_and_embeddings(self, texts: Iterable[Embeddable]) -> None: + await super().add_texts_and_embeddings(texts) texts_list = list(texts) From a53d28857ee5007d0c7062c4535b3a34ced6ce8d Mon Sep 17 00:00:00 2001 From: ThomasRochefortB Date: Mon, 30 Dec 2024 14:33:24 -0500 Subject: [PATCH 2/5] Added await --- paperqa/docs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paperqa/docs.py b/paperqa/docs.py index 7c13098dd..545920708 100644 --- a/paperqa/docs.py +++ b/paperqa/docs.py @@ -513,7 +513,7 @@ async def _build_texts_index(self, embedding_model: EmbeddingModel) -> None: strict=True, ): t.embedding = t_embedding - self.texts_index.add_texts_and_embeddings(texts) + await self.texts_index.add_texts_and_embeddings(texts) async def retrieve_texts( self, From 80d877bfd4ee6b020813b73cf33e92ab620bd46c Mon Sep 17 00:00:00 2001 From: ThomasRochefortB Date: Mon, 30 Dec 2024 15:09:41 -0500 Subject: [PATCH 3/5] Fixed the mypy type error --- paperqa/llms.py | 1 + 1 file changed, 1 insertion(+) diff --git a/paperqa/llms.py b/paperqa/llms.py index ca1b97b22..40155f5bf 100644 --- a/paperqa/llms.py +++ b/paperqa/llms.py @@ -335,6 +335,7 @@ async def add_texts_and_embeddings(self, texts: Iterable[Embeddable]) -> None: texts_list = list(texts) if texts_list and not self.client.collection_exists(self.collection_name): + assert texts_list[0].embedding is not None # This helps mypy understand the type params = models.VectorParams( size=len(texts_list[0].embedding), distance=models.Distance.COSINE ) From 8ad6849b255d4fadc9b991e64a3cbf8aabc082d6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Mon, 30 Dec 2024 20:20:15 +0000 Subject: [PATCH 4/5] [pre-commit.ci lite] apply automatic fixes --- paperqa/llms.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paperqa/llms.py b/paperqa/llms.py index 40155f5bf..9cfe693ec 100644 --- a/paperqa/llms.py +++ b/paperqa/llms.py @@ -335,7 +335,9 @@ async def add_texts_and_embeddings(self, texts: Iterable[Embeddable]) -> None: texts_list = list(texts) if texts_list and not self.client.collection_exists(self.collection_name): - assert texts_list[0].embedding is not None # This helps mypy understand the type + assert ( + texts_list[0].embedding is not None + ) # This helps mypy understand the type params = models.VectorParams( size=len(texts_list[0].embedding), distance=models.Distance.COSINE ) From 4a4ee575c036149f8aa2497d26bc510292c292c3 Mon Sep 17 00:00:00 2001 From: James Braza Date: Mon, 30 Dec 2024 17:58:59 -0500 Subject: [PATCH 5/5] Revert "Fixed the mypy type error" This reverts commit 80d877bfd4ee6b020813b73cf33e92ab620bd46c. --- paperqa/llms.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/paperqa/llms.py b/paperqa/llms.py index 9cfe693ec..ca1b97b22 100644 --- a/paperqa/llms.py +++ b/paperqa/llms.py @@ -335,9 +335,6 @@ async def add_texts_and_embeddings(self, texts: Iterable[Embeddable]) -> None: texts_list = list(texts) if texts_list and not self.client.collection_exists(self.collection_name): - assert ( - texts_list[0].embedding is not None - ) # This helps mypy understand the type params = models.VectorParams( size=len(texts_list[0].embedding), distance=models.Distance.COSINE )