From 950b8ca1a3e986047e2b2cae725ced68a20e8873 Mon Sep 17 00:00:00 2001 From: Oleg Kostromin Date: Tue, 15 Aug 2023 16:50:16 +0200 Subject: [PATCH] Make annoy an optional dependency --- README.md | 6 ++++++ pyproject.toml | 4 ++-- skllm/memory/_annoy.py | 10 +++++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7b08ffe..f9123e5 100644 --- a/README.md +++ b/README.md @@ -203,6 +203,12 @@ Note: as the model is not being re-trained, but uses the training data during in ### Dynamic Few-Shot Text Classification +*To use this feature, you need to install `annoy` library:* + +```bash +pip install scikit-llm[annoy] +``` + `DynamicFewShotGPTClassifier` dynamically selects N samples per class to include in the prompt. This allows the few-shot classifier to scale to datasets that are too large for the standard context window of LLMs. *How does it work?* diff --git a/pyproject.toml b/pyproject.toml index 033cfbd..eeb262c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,11 +8,10 @@ dependencies = [ "pandas>=1.5.0", "openai>=0.27.0", "tqdm>=4.60.0", - "annoy>=1.17.2", "google-cloud-aiplatform>=1.27.0" ] name = "scikit-llm" -version = "0.3.3" +version = "0.3.4" authors = [ { name="Oleg Kostromin", email="kostromin97@gmail.com" }, { name="Iryna Kondrashchenko", email="iryna230520@gmail.com" }, @@ -29,6 +28,7 @@ classifiers = [ [project.optional-dependencies] gpt4all = ["gpt4all>=1.0.0"] +annoy = ["annoy>=1.17.2"] [tool.ruff] select = [ diff --git a/skllm/memory/_annoy.py b/skllm/memory/_annoy.py index 0d63e07..c6820d4 100644 --- a/skllm/memory/_annoy.py +++ b/skllm/memory/_annoy.py @@ -2,7 +2,11 @@ import tempfile from typing import Any, List -from annoy import AnnoyIndex +try: + from annoy import AnnoyIndex +except (ImportError, ModuleNotFoundError): + AnnoyIndex = None + from numpy import ndarray from skllm.memory.base import _BaseMemoryIndex @@ -20,6 +24,10 @@ class AnnoyMemoryIndex(_BaseMemoryIndex): """ def __init__(self, dim: int, metric: str = "euclidean", **kwargs: Any) -> None: + if AnnoyIndex is None: + raise ImportError( + "Annoy is not installed. Please install annoy by running `pip install scikit-llm[annoy]`." + ) self._index = AnnoyIndex(dim, metric) self.metric = metric self.dim = dim