Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- Used the latest version of `lightfm` that allows to install it using `poetry>=1.5.0` ([#141](https://github.com/MobileTeleSystems/RecTools/pull/141))
- Added restriction to `pytorch` version for MacOSX + x86_64 that allows to install it on such platforms ([#142](https://github.com/MobileTeleSystems/RecTools/pull/142))
- `PopularInCategoryModel` fitting for multiple times, `cross_validate` compatibility, behaviour with empty category interactions ([#163](https://github.com/MobileTeleSystems/RecTools/pull/163))


## [0.6.0] - 13.05.2024
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
<a href="https://rectools.readthedocs.io/en/stable/">Documentation</a> |
<a href="https://github.com/MobileTeleSystems/RecTools/tree/main/examples">Examples</a> |
<a href="https://github.com/MobileTeleSystems/RecTools/tree/main/examples/tutorials">Tutorials</a> |
<a href="https://github.com/MobileTeleSystems/RecTools/blob/main/CONTRIBUTING.rst">Contribution Guide</a> |
<a href="https://github.com/MobileTeleSystems/RecTools/releases">Release Notes</a>
<a href="https://github.com/MobileTeleSystems/RecTools/blob/main/CONTRIBUTING.rst">Contributing</a> |
<a href="https://github.com/MobileTeleSystems/RecTools/releases">Releases</a> |
<a href="https://github.com/orgs/MobileTeleSystems/projects/1">Developers Board</a>
</p>

RecTools is an easy-to-use Python library which makes the process of building recommendation systems easier,
Expand Down
19 changes: 15 additions & 4 deletions rectools/models/popular_in_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,15 +160,19 @@ def _check_category_feature(self, dataset: Dataset) -> None:

def _calc_category_scores(self, dataset: Dataset, interactions: pd.DataFrame) -> None:
scores_dict = {}
empty_columns = []
for column_num in self.category_columns:
item_idx = dataset.item_features.values.getcol(column_num).nonzero()[0] # type: ignore
self.category_interactions[column_num] = interactions[interactions[Columns.Item].isin(item_idx)].copy()
category_interactions = interactions[interactions[Columns.Item].isin(item_idx)]
# Category interactions might be empty
if self.category_interactions[column_num].shape[0] == 0:
self.category_columns.remove(column_num)
if category_interactions.shape[0] == 0:
empty_columns.append(column_num)
else:
self.category_interactions[column_num] = category_interactions.copy()
col, func = self._get_groupby_col_and_agg_func(self.popularity)
scores_dict[column_num] = self.category_interactions[column_num][col].apply(func)

self.category_columns = [col for col in self.category_columns if col not in empty_columns]
self.category_scores = pd.Series(scores_dict).sort_values(ascending=False)

def _define_categories_for_analysis(self) -> None:
Expand All @@ -177,7 +181,7 @@ def _define_categories_for_analysis(self) -> None:
self.n_effective_categories = self.n_categories
relevant_categories = self.category_scores.head(self.n_categories).index
self.category_scores = self.category_scores.loc[relevant_categories]
self.category_columns = relevant_categories
self.category_columns = relevant_categories.to_list()
else:
self.n_effective_categories = len(self.category_columns)
warnings.warn(
Expand All @@ -188,6 +192,13 @@ def _define_categories_for_analysis(self) -> None:
self.n_effective_categories = len(self.category_columns)

def _fit(self, dataset: Dataset) -> None: # type: ignore

self.category_columns = []
self.category_interactions = {}
self.models = {}
self.category_scores = pd.Series()
self.n_effective_categories = 0

self._check_category_feature(dataset)
interactions = self._filter_interactions(dataset.interactions.df)
self._calc_category_scores(dataset, interactions)
Expand Down
8 changes: 6 additions & 2 deletions tests/model_selection/test_cross_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from rectools.metrics.base import MetricAtK
from rectools.model_selection import LastNSplitter, cross_validate
from rectools.model_selection.cross_validate import _gen_2x_internal_ids_dataset
from rectools.models import ImplicitALSWrapperModel, PopularModel, RandomModel
from rectools.models import ImplicitALSWrapperModel, PopularInCategoryModel, PopularModel, RandomModel
from rectools.models.base import ModelBase
from tests.testing_utils import assert_sparse_matrix_equal

Expand Down Expand Up @@ -146,6 +146,7 @@ def setup_method(self) -> None:
[14, "f2", 1],
[11, "f1", "y"],
[11, "f2", 2],
[12, "f1", "y"],
],
columns=["id", "feature", "value"],
)
Expand Down Expand Up @@ -247,6 +248,7 @@ def test_happy_path_with_features(self, prefer_warm_inference_over_cold: bool) -

models: tp.Dict[str, ModelBase] = {
"als": ImplicitALSWrapperModel(AlternatingLeastSquares(factors=2, iterations=2, random_state=42)),
"pop_in_cat": PopularInCategoryModel(category_feature="f1", n_categories=2),
}

actual = cross_validate(
Expand Down Expand Up @@ -282,7 +284,9 @@ def test_happy_path_with_features(self, prefer_warm_inference_over_cold: bool) -
],
"metrics": [
{"model": "als", "i_split": 0, "precision@2": 0.5, "recall@1": 0.0},
{"model": "als", "i_split": 1, "precision@2": 0.375, "recall@1": 0.25},
{"model": "pop_in_cat", "i_split": 0, "precision@2": 0.5, "recall@1": 0.5},
{"model": "als", "i_split": 1, "precision@2": 0.375, "recall@1": 0.0},
{"model": "pop_in_cat", "i_split": 1, "precision@2": 0.375, "recall@1": 0.25},
],
}

Expand Down
24 changes: 19 additions & 5 deletions tests/models/test_popular_in_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,11 +422,25 @@ def test_i2i(
actual,
)

def test_second_fit_refits_model(self, dataset: Dataset) -> None:
@pytest.mark.parametrize("popularity", ("mean_weight", "n_users", "n_interactions"))
@pytest.mark.parametrize("category_feature", ("f1", "f2"))
@pytest.mark.parametrize("mixing_strategy", ("group", "rotate"))
@pytest.mark.parametrize("ratio_strategy", ("equal", "proportional"))
@pytest.mark.parametrize("n_categories", (2, None))
def test_second_fit_refits_model(
self,
dataset: Dataset,
popularity: str,
category_feature: str,
mixing_strategy: str,
ratio_strategy: str,
n_categories: tp.Optional[int],
) -> None:
model = PopularInCategoryModel(
category_feature="f2",
popularity="mean_weight",
mixing_strategy="group",
ratio_strategy="proportional",
category_feature=category_feature,
popularity=popularity,
mixing_strategy=mixing_strategy,
ratio_strategy=ratio_strategy,
n_categories=n_categories,
)
assert_second_fit_refits_model(model, dataset)