From fc7ea207efa2a78899053219a8ab4d24967435bb Mon Sep 17 00:00:00 2001 From: Vadim Vetrov Date: Fri, 1 Nov 2024 21:04:39 +0300 Subject: [PATCH] GPU support for ImplicitRanker Implement use_gpu argument to ImplicitRanker.rank method. Adapt tests to it. --- CHANGELOG.md | 1 + rectools/models/rank.py | 87 +++++++++++++++++++++++++++++++++------ tests/models/test_rank.py | 63 +++++++++++++++++----------- 3 files changed, 114 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 546a7706..40c42e29 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Optional `epochs` argument to `ImplicitALSWrapperModel.fit` method ([#203](https://github.com/MobileTeleSystems/RecTools/pull/203)) - `save` and `load` methods to all of the models ([#206](https://github.com/MobileTeleSystems/RecTools/pull/206)) - Model configs example ([#207](https://github.com/MobileTeleSystems/RecTools/pull/207)) +- `use_gpu` argument to `ImplicitRanker.rank` method ([#201](https://github.com/MobileTeleSystems/RecTools/pull/201)) ## [0.8.0] - 28.08.2024 diff --git a/rectools/models/rank.py b/rectools/models/rank.py index a8ce6549..fabc389d 100644 --- a/rectools/models/rank.py +++ b/rectools/models/rank.py @@ -15,11 +15,14 @@ """Implicit ranker model.""" import typing as tp +import warnings from enum import Enum import implicit.cpu +import implicit.gpu import numpy as np from implicit.cpu.matrix_factorization_base import _filter_items_from_sparse_matrix as filter_items_from_sparse_matrix +from implicit.gpu import HAS_CUDA from scipy import sparse from rectools import InternalIds @@ -74,8 +77,14 @@ def __init__( self.subjects_dots = self._calc_dots(self.subjects_factors) def _get_neginf_score(self) -> float: - # Adding 1 to avoid float calculation errors (we're comparing `scores <= neginf_score`) - return float(-np.finfo(np.float32).max + 1) + # neginf_score computed according to implicit gpu FLT_FILTER_DISTANCE + # https://github.com/benfred/implicit/blob/main/implicit/gpu/knn.cu#L36 + # we're comparing `scores <= neginf_score` + return float( + np.asarray( + np.asarray(-np.finfo(np.float32).max, dtype=np.float32).view(np.uint32) - 1, dtype=np.uint32 + ).view(np.float32) + ) @staticmethod def _calc_dots(factors: np.ndarray) -> np.ndarray: @@ -132,13 +141,50 @@ def _process_implicit_scores( return all_target_ids, np.concatenate(all_reco_ids), np.concatenate(all_scores) - def rank( + def _rank_on_gpu( + self, + object_factors: np.ndarray, + subject_factors: tp.Union[np.ndarray, sparse.csr_matrix], + k: int, + object_norms: tp.Optional[np.ndarray], + filter_query_items: tp.Optional[tp.Union[sparse.csr_matrix, sparse.csr_array]], + ) -> tp.Tuple[np.ndarray, np.ndarray]: # pragma: no cover + object_factors = implicit.gpu.Matrix(object_factors.astype(np.float32)) + + if isinstance(subject_factors, sparse.spmatrix): + warnings.warn("Sparse subject factors converted to Dense matrix") + subject_factors = subject_factors.todense() + + subject_factors = implicit.gpu.Matrix(subject_factors.astype(np.float32)) + + if object_norms is not None: + if len(np.shape(object_norms)) == 1: + object_norms = np.expand_dims(object_norms, axis=0) + object_norms = implicit.gpu.Matrix(object_norms) + + if filter_query_items is not None: + filter_query_items = implicit.gpu.COOMatrix(filter_query_items.tocoo()) + + ids, scores = implicit.gpu.KnnQuery().topk( # pylint: disable=c-extension-no-member + items=object_factors, + m=subject_factors, + k=k, + item_norms=object_norms, + query_filter=filter_query_items, + item_filter=None, + ) + + scores = scores.astype(np.float64) + return ids, scores + + def rank( # pylint: disable=too-many-branches self, subject_ids: InternalIds, k: int, filter_pairs_csr: tp.Optional[sparse.csr_matrix] = None, sorted_object_whitelist: tp.Optional[InternalIdsArray] = None, num_threads: int = 0, + use_gpu: bool = False, ) -> tp.Tuple[InternalIds, InternalIds, Scores]: """Rank objects to proceed inference using implicit library topk cpu method. @@ -156,7 +202,9 @@ def rank( If given, only these items will be used for recommendations. Otherwise all items from dataset will be used. num_threads : int, default 0 - Will be used as `num_threads` parameter for `implicit.cpu.topk.topk`. + Will be used as `num_threads` parameter for `implicit.cpu.topk.topk`. Omitted if use_gpu is True + use_gpu : bool, default False + If True `implicit.gpu.KnnQuery().topk` will be used instead of classic cpu version. Returns ------- @@ -191,15 +239,28 @@ def rank( real_k = min(k, object_factors.shape[0]) - ids, scores = implicit.cpu.topk.topk( # pylint: disable=c-extension-no-member - items=object_factors, - query=subject_factors, - k=real_k, - item_norms=object_norms, # query norms for COSINE distance are applied afterwards - filter_query_items=filter_query_items, # queries x objects csr matrix for getting neginf scores - filter_items=None, # rectools doesn't support blacklist for now - num_threads=num_threads, - ) + if use_gpu and not HAS_CUDA: + warnings.warn("Forced rank() on CPU") + use_gpu = False + + if use_gpu: # pragma: no cover + ids, scores = self._rank_on_gpu( + object_factors=object_factors, + subject_factors=subject_factors, + k=real_k, + object_norms=object_norms, + filter_query_items=filter_query_items, + ) + else: + ids, scores = implicit.cpu.topk.topk( # pylint: disable=c-extension-no-member + items=object_factors, + query=subject_factors, + k=real_k, + item_norms=object_norms, # query norms for COSINE distance are applied afterwards + filter_query_items=filter_query_items, # queries x objects csr matrix for getting neginf scores + filter_items=None, # rectools doesn't support blacklist for now + num_threads=num_threads, + ) if sorted_object_whitelist is not None: ids = sorted_object_whitelist[ids] diff --git a/tests/models/test_rank.py b/tests/models/test_rank.py index 9bc4da37..56f86af7 100644 --- a/tests/models/test_rank.py +++ b/tests/models/test_rank.py @@ -29,15 +29,15 @@ class TestImplicitRanker: # pylint: disable=protected-access @pytest.fixture def subject_factors(self) -> np.ndarray: - return np.array([[-4, 0, 3], [0, 0, 0]]) + return np.array([[-4, 0, 3], [0, 1, 2]]) @pytest.fixture def object_factors(self) -> np.ndarray: return np.array( [ [-4, 0, 3], - [0, 0, 0], - [1, 1, 1], + [0, 2, 4], + [1, 10, 100], ] ) @@ -60,7 +60,7 @@ def test_neginf_score(self, subject_factors: np.ndarray, object_factors: np.ndar k=1, filter_items=np.array([0]), )[1][0][0] - assert neginf == implicit_ranker._get_neginf_score() + assert neginf <= implicit_ranker._get_neginf_score() <= -1e38 @pytest.mark.parametrize( "dense", @@ -94,12 +94,18 @@ def test_mask_for_correct_scores( @pytest.mark.parametrize( "distance, expected_recs, expected_scores, dense", ( - (Distance.DOT, [0, 1, 2, 2, 1, 0], [25, 0, -1, 0, 0, 0], True), - (Distance.COSINE, [0, 1, 2, 2, 1, 0], [1, 0, -1 / (5 * 3**0.5), 0, 0, 0], True), - (Distance.EUCLIDEAN, [0, 1, 2, 1, 2, 0], [0, 5, 30**0.5, 0, 3**0.5, 5], True), - (Distance.DOT, [0, 1, 2, 2, 1, 0], [25, 0, -1, 0, 0, 0], False), + (Distance.DOT, [2, 0, 1, 2, 1, 0], [296, 25, 12, 210, 10, 6], True), + (Distance.COSINE, [0, 2, 1, 1, 2, 0], [1, 0.5890328, 0.5366563, 1, 0.9344414, 0.5366563], True), + ( + Distance.EUCLIDEAN, + [0, 1, 2, 1, 0, 2], + [0, 4.58257569, 97.64220399, 2.23606798, 4.24264069, 98.41747812], + True, + ), + (Distance.DOT, [2, 0, 1, 2, 1, 0], [296, 25, 12, 210, 10, 6], False), ), ) + @pytest.mark.parametrize("use_gpu", (False, True)) def test_rank( self, distance: Distance, @@ -108,24 +114,26 @@ def test_rank( subject_factors: np.ndarray, object_factors: np.ndarray, dense: bool, + use_gpu: bool, ) -> None: if not dense: subject_factors = sparse.csr_matrix(subject_factors) ranker = ImplicitRanker(distance, subject_factors, object_factors) - _, actual_recs, actual_scores = ranker.rank(subject_ids=[0, 1], k=3) + _, actual_recs, actual_scores = ranker.rank(subject_ids=[0, 1], k=3, use_gpu=use_gpu) np.testing.assert_equal(actual_recs, expected_recs) np.testing.assert_almost_equal(actual_scores, expected_scores) @pytest.mark.parametrize( "distance, expected_recs, expected_scores, dense", ( - (Distance.DOT, [0, 2, 2, 1, 0], [25, -1, 0, 0, 0], True), - (Distance.COSINE, [0, 2, 2, 1, 0], [1, -1 / (5 * 3**0.5), 0, 0, 0], True), - (Distance.EUCLIDEAN, [0, 2, 1, 2, 0], [0, 30**0.5, 0, 3**0.5, 5], True), - (Distance.DOT, [0, 2, 2, 1, 0], [25, -1, 0, 0, 0], False), + (Distance.DOT, [2, 0, 2, 1, 0], [296, 25, 210, 10, 6], True), + (Distance.COSINE, [0, 2, 1, 2, 0], [1, 0.5890328, 1, 0.9344414, 0.5366563], True), + (Distance.EUCLIDEAN, [0, 2, 1, 0, 2], [0, 97.64220399, 2.23606798, 4.24264069, 98.41747812], True), + (Distance.DOT, [2, 0, 2, 1, 0], [296, 25, 210, 10, 6], False), ), ) + @pytest.mark.parametrize("use_gpu", (False, True)) def test_rank_with_filtering_viewed_items( self, distance: Distance, @@ -134,6 +142,7 @@ def test_rank_with_filtering_viewed_items( subject_factors: np.ndarray, object_factors: np.ndarray, dense: bool, + use_gpu: bool, ) -> None: if not dense: subject_factors = sparse.csr_matrix(subject_factors) @@ -145,19 +154,20 @@ def test_rank_with_filtering_viewed_items( ] ) ranker = ImplicitRanker(distance, subject_factors, object_factors) - _, actual_recs, actual_scores = ranker.rank(subject_ids=[0, 1], k=3, filter_pairs_csr=ui_csr) + _, actual_recs, actual_scores = ranker.rank(subject_ids=[0, 1], k=3, filter_pairs_csr=ui_csr, use_gpu=use_gpu) np.testing.assert_equal(actual_recs, expected_recs) np.testing.assert_almost_equal(actual_scores, expected_scores) @pytest.mark.parametrize( "distance, expected_recs, expected_scores, dense", ( - (Distance.DOT, [0, 2, 2, 0], [25, -1, 0, 0], True), - (Distance.COSINE, [0, 2, 2, 0], [1, -1 / (5 * 3**0.5), 0, 0], True), - (Distance.EUCLIDEAN, [0, 2, 2, 0], [0, 30**0.5, 3**0.5, 5], True), - (Distance.DOT, [0, 2, 2, 0], [25, -1, 0, 0], False), + (Distance.DOT, [2, 0, 2, 0], [296, 25, 210, 6], True), + (Distance.COSINE, [0, 2, 2, 0], [1, 0.5890328, 0.9344414, 0.5366563], True), + (Distance.EUCLIDEAN, [0, 2, 0, 2], [0, 97.64220399, 4.24264069, 98.41747812], True), + (Distance.DOT, [2, 0, 2, 0], [296, 25, 210, 6], False), ), ) + @pytest.mark.parametrize("use_gpu", (False, True)) def test_rank_with_objects_whitelist( self, distance: Distance, @@ -166,25 +176,29 @@ def test_rank_with_objects_whitelist( subject_factors: np.ndarray, object_factors: np.ndarray, dense: bool, + use_gpu: bool, ) -> None: if not dense: subject_factors = sparse.csr_matrix(subject_factors) ranker = ImplicitRanker(distance, subject_factors, object_factors) - _, actual_recs, actual_scores = ranker.rank(subject_ids=[0, 1], k=3, sorted_object_whitelist=np.array([0, 2])) + _, actual_recs, actual_scores = ranker.rank( + subject_ids=[0, 1], k=3, sorted_object_whitelist=np.array([0, 2]), use_gpu=use_gpu + ) np.testing.assert_equal(actual_recs, expected_recs) np.testing.assert_almost_equal(actual_scores, expected_scores) @pytest.mark.parametrize( "distance, expected_recs, expected_scores, dense", ( - (Distance.DOT, [2, 2, 0], [-1, 0, 0], True), - (Distance.COSINE, [2, 2, 0], [-1 / (5 * 3**0.5), 0, 0], True), - (Distance.EUCLIDEAN, [2, 2, 0], [30**0.5, 3**0.5, 5], True), - (Distance.DOT, [2, 2, 0], [-1, 0, 0], False), + (Distance.DOT, [2, 2, 0], [296, 210, 6], True), + (Distance.COSINE, [2, 2, 0], [0.5890328, 0.9344414, 0.5366563], True), + (Distance.EUCLIDEAN, [2, 0, 2], [97.64220399, 4.24264069, 98.41747812], True), + (Distance.DOT, [2, 2, 0], [296, 210, 6], False), ), ) + @pytest.mark.parametrize("use_gpu", (False, True)) def test_rank_with_objects_whitelist_and_filtering_viewed_items( self, distance: Distance, @@ -193,6 +207,7 @@ def test_rank_with_objects_whitelist_and_filtering_viewed_items( subject_factors: np.ndarray, object_factors: np.ndarray, dense: bool, + use_gpu: bool, ) -> None: if not dense: subject_factors = sparse.csr_matrix(subject_factors) @@ -205,7 +220,7 @@ def test_rank_with_objects_whitelist_and_filtering_viewed_items( ) ranker = ImplicitRanker(distance, subject_factors, object_factors) _, actual_recs, actual_scores = ranker.rank( - subject_ids=[0, 1], k=3, sorted_object_whitelist=np.array([0, 2]), filter_pairs_csr=ui_csr + subject_ids=[0, 1], k=3, sorted_object_whitelist=np.array([0, 2]), filter_pairs_csr=ui_csr, use_gpu=use_gpu ) np.testing.assert_equal(actual_recs, expected_recs) np.testing.assert_almost_equal(actual_scores, expected_scores)