Skip to content

Commit

Permalink
Add Annoy to the list of supported ANN frameworks (#556)
Browse files Browse the repository at this point in the history
  • Loading branch information
tqtg committed Dec 3, 2023
1 parent c4f32da commit b25ceb4
Show file tree
Hide file tree
Showing 5 changed files with 192 additions and 34 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ One important aspect of deploying recommender model is efficient retrieval via A

| Supported framework | Cornac wrapper | Examples |
| :---: | :---: | :---: |
| [spotify/annoy](https://github.com/spotify/annoy) | [AnnoyANN](cornac/models/ann/recom_ann_annoy.py) | [ann_all.ipynb](examples/ann_all.ipynb)
| [meta/faiss](https://github.com/facebookresearch/faiss) | [FaissANN](cornac/models/ann/recom_ann_faiss.py) | [ann_all.ipynb](examples/ann_all.ipynb)
| [nmslib/hnswlib](https://github.com/nmslib/hnswlib) | [HNSWLibANN](cornac/models/ann/recom_ann_hnswlib.py) | [ann_hnswlib.ipynb](tutorials/ann_hnswlib.ipynb), [ann_all.ipynb](examples/ann_all.ipynb)
| [google/scann](https://github.com/google-research/google-research/tree/master/scann) | [ScaNNANN](cornac/models/ann/recom_ann_scann.py) | [ann_all.ipynb](examples/ann_all.ipynb)
Expand Down
1 change: 1 addition & 0 deletions cornac/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from .recommender import NextBasketRecommender

from .amr import AMR
from .ann import AnnoyANN
from .ann import FaissANN
from .ann import HNSWLibANN
from .ann import ScaNNANN
Expand Down
1 change: 1 addition & 0 deletions cornac/models/ann/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .recom_ann_annoy import AnnoyANN
from .recom_ann_faiss import FaissANN
from .recom_ann_hnswlib import HNSWLibANN
from .recom_ann_scann import ScaNNANN
134 changes: 134 additions & 0 deletions cornac/models/ann/recom_ann_annoy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# Copyright 2023 The Cornac Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================


import numpy as np

from ..recommender import MEASURE_L2, MEASURE_DOT, MEASURE_COSINE
from .recom_ann_base import BaseANN


SUPPORTED_MEASURES = {
MEASURE_L2: "euclidean",
MEASURE_DOT: "dot",
MEASURE_COSINE: "angular",
}


class AnnoyANN(BaseANN):
"""Approximate Nearest Neighbor Search with Annoy (https://github.com/spotify/annoy).
Parameters
----------------
model: object: :obj:`cornac.models.Recommender`, required
Trained recommender model which to get user/item vectors from.
n_trees: int, default: 100
The number of trees used to build index. It affects the build time and the
index size. A larger value will give more accurate results, but larger indexes.
search_k: int, default: 50
Parameter controls the search performance and runtime. A larger value will
give more accurate results, but will take longer time to return.
num_threads: int, optional, default: -1
Default number of threads used for building index. If num_threads = -1,
all cores will be used.
seed: int, optional, default: None
Random seed for reproducibility.
name: str, required
Name of the recommender model.
verbose: boolean, optional, default: False
When True, running logs are displayed.
"""

def __init__(
self,
model,
n_trees=100,
search_k=50,
num_threads=-1,
seed=None,
name="AnnoyANN",
verbose=False,
):
super().__init__(model=model, name=name, verbose=verbose)

self.model = model
self.n_trees = n_trees
self.search_k = search_k
self.num_threads = num_threads
self.seed = seed

self.index = None
self.ignored_attrs.extend(
[
"index", # will be saved separately
"item_vectors", # redundant after index is built
]
)

def build_index(self):
"""Building index from the base recommender model."""
from annoy import AnnoyIndex

assert self.measure in SUPPORTED_MEASURES

self.index = AnnoyIndex(
self.item_vectors.shape[1], SUPPORTED_MEASURES[self.measure]
)
self.index.set_seed(self.seed)

for i, v in enumerate(self.item_vectors):
self.index.add_item(i, v)

self.index.build(self.n_trees, n_jobs=self.num_threads)

def knn_query(self, query, k):
"""Implementing ANN search for a given query.
Returns
-------
neighbors, distances: numpy.array and numpy.array
Array of k-nearest neighbors and corresponding distances for the given query.
"""
result = [
self.index.get_nns_by_vector(
q, k, search_k=self.search_k, include_distances=True
)
for q in query
]
neighbors = np.array([r[0] for r in result], dtype="int")
distances = np.array([r[1] for r in result], dtype="float32")
return neighbors, distances

def save(self, save_dir=None):
saved_path = super().save(save_dir)
self.index.save(saved_path + ".index")
return saved_path

@staticmethod
def load(model_path, trainable=False):
from annoy import AnnoyIndex

ann = BaseANN.load(model_path, trainable)
ann.index = AnnoyIndex(
ann.user_vectors.shape[1], SUPPORTED_MEASURES[ann.measure]
)
ann.index.load(ann.load_from + ".index")
return ann

0 comments on commit b25ceb4

Please sign in to comment.