# Example of building custom model with ModelBase class

- Building custom model
- Visual recommendations checking

In [40]:
import numpy as np
import pandas as pd
from rectools.models.base import ModelBase, ModelConfig, Scores, ScoresArray
from rectools import Columns
from rectools.dataset import Dataset
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import typing as tp
import typing_extensions as tpe
from rectools.models.base import InternalIdsArray
from rectools.types import *
from tqdm import tqdm
from rectools.utils import fast_isin_for_sorted_test_elements
from rectools.models.utils import get_viewed_item_ids
from rectools.visuals.visual_app import ItemToItemVisualApp, VisualApp
import random

## Load data

In [2]:
%%time
!wget -q https://files.grouplens.org/datasets/movielens/ml-1m.zip -O ml-1m.zip
!unzip -o ml-1m.zip
!rm ml-1m.zip

Archive:  ml-1m.zip
  inflating: ml-1m/movies.dat        
  inflating: ml-1m/ratings.dat       
  inflating: ml-1m/README            
  inflating: ml-1m/users.dat         
CPU times: user 27.2 ms, sys: 23 ms, total: 50.2 ms
Wall time: 3.21 s


In [3]:
%%time
ratings = pd.read_csv(
    "ml-1m/ratings.dat", 
    sep="::",
    engine="python",  # Because of 2-chars separators
    header=None,
    names=[Columns.User, Columns.Item, Columns.Weight, Columns.Datetime],
)
print(ratings.shape)
ratings.head()

(1000209, 4)
CPU times: user 2.04 s, sys: 93.3 ms, total: 2.13 s
Wall time: 2.13 s


Unnamed: 0,user_id,item_id,weight,datetime
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [4]:
%%time
movies = pd.read_csv(
    "ml-1m/movies.dat", 
    sep="::",
    engine="python",  # Because of 2-chars separators
    header=None,
    names=[Columns.Item, "title", "genres"],
    encoding_errors="ignore",
)
print(movies.shape)
movies.head()

(3883, 3)
CPU times: user 4.71 ms, sys: 792 μs, total: 5.5 ms
Wall time: 4.99 ms


Unnamed: 0,item_id,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


## Build model

### Write a model config inherited from `ModelConfig`

In [5]:
class MixedKnnRandomModelConfig(ModelConfig):
    """Config for `KNN` model."""

    # KNN algorithm hyperparams
    metric: tp.Optional[str] = None
    algorithm: tp.Optional[str] = None
    n_neighbors: tp.Optional[int] = None
    n_jobs: tp.Optional[int] = None
    random_state: tp.Optional[int] = None

### Define a `_RandomSampler` and `_RandomGen` class for random recommendations

In [6]:
class _RandomGen:
    def __init__(self, random_state: tp.Optional[int] = None) -> None:
        self.python_gen = random.Random(random_state)  # nosec
        self.np_gen = np.random.default_rng(random_state)

In [7]:
class _RandomSampler:
    def __init__(self, values: np.ndarray, random_gen: _RandomGen) -> None:
        self.python_gen = random_gen.python_gen
        self.np_gen = random_gen.np_gen
        self.values = values
        self.values_list = list(values)  # for random.sample

    def sample(self, n: int) -> np.ndarray:
        if n < 25:  # Empiric value, for optimization
            sampled = np.asarray(self.python_gen.sample(self.values_list, n))
        else:
            sampled = self.np_gen.choice(self.values, n, replace=False)
        return sampled


### Write a model logic in class inherited from `ModelBase`

In [28]:
class MixedKnnRandomModel(ModelBase[MixedKnnRandomModelConfig]):
    # There is a sample recsys model inherited from ModelBase, model is mixed KNN wrapper (i2i) and random model (u2i)
    # You can mix other models as well.
    # Define is able to make cold and warm recommendations
    # Set config class to defined above
    recommends_for_warm = False
    recommends_for_cold = False
    config_class = MixedKnnRandomModelConfig

    # Set all hyperparams in __init__
    def __init__(self,
                 metric: tp.Optional[str] = None,
                 algorithm: tp.Optional[str] = None,
                 n_neighbors: tp.Optional[int] = None,
                 n_jobs: tp.Optional[int] = None,
                 random_state: tp.Optional[int] = None,
                 verbose: int = 0):
        super().__init__(verbose=verbose)
        self.metric = metric
        self.algorithm = algorithm
        self.n_neighbors = n_neighbors
        self.n_jobs = n_jobs
        self.knn_model = NearestNeighbors(metric = self.metric,
                       algorithm = self.algorithm,
                       n_neighbors = self.n_neighbors,
                       n_jobs = self.n_jobs)
        self.random_state = random_state
        self.random_gen = _RandomGen(random_state)
        self.all_item_ids: np.ndarray
        self.ui_csr: csr_matrix

    # Method used to save hyperparams in config
    def _get_config(self) -> MixedKnnRandomModelConfig:
        return MixedKnnRandomModelConfig(metric=self.metric, algorithm=self.algorithm, n_neighbors=self.n_neighbors, random_state=self.random_state, verbose=self.verbose)

    # Method used to load model params from config
    @classmethod
    def _from_config(cls, config: MixedKnnRandomModelConfig) -> tpe.Self:
        return cls(metric=config.metric, algorithm=config.algorithm, n_neighbors=config.n_neighbors, random_state=config.random_state, verbose=config.verbose)

    # Method used to fit model, there is a sklearn KNN wrapper, so we need to fit KNN model with dataset csr matrix
    def _fit(self, dataset: Dataset) -> None:  # type: ignore
        self.all_item_ids = dataset.item_id_map.internal_ids
        self.ui_csr = dataset.get_user_item_matrix(include_weights=False, dtype=np.float64)
        self.knn_model.fit(self.ui_csr)

    # Method used to make item-item recommendations, not for direct invokation, used in recommend_to_items method of base class
    # Params:
    # target_ids - InternalIdsArray of item ids for which predictions need to be made
    # dataset - instance of Dataset class
    # k - maximum count of top rated elements presented in recommendations
    # sorted_item_ids_to_recommend - optional InternalIdsArray of item ids from which predictions are made
    # Returns:
    # Equaly sized arrays of target ids, predictions ids, scores
    # in this method you need to ensure, that your realization handles all parameters correctly i.e. 
    # it can limit k predictions and limit the set of allowed items.
    def _recommend_i2i(self,
                       target_ids: InternalIdsArray,
                       dataset: Dataset,
                       k: int,
                       sorted_item_ids_to_recommend: tp.Optional[InternalIdsArray]) -> tp.Tuple[InternalIds, InternalIds, Scores]:
        sorted_item_ids_to_recommend = dataset.get_user_item_matrix(include_weights=False,
                                                                    dtype=np.float64)[sorted_item_ids_to_recommend] if sorted_item_ids_to_recommend is not None else self.all_item_ids

        all_target_ids = []
        all_reco_ids: tp.List[np.ndarray] = []
        all_scores: tp.List[np.ndarray] = []
        for target_id in tqdm(target_ids, disable=self.verbose == 0):
            reco_scores, reco_ids = self.knn_model.kneighbors(self.ui_csr[target_id], n_neighbors = k + 1)
            all_target_ids.extend([target_id] * len(reco_ids.tolist()[0]))
            all_reco_ids.extend(reco_ids.tolist())
            all_scores.extend(reco_scores.tolist())

        all_target_ids = np.array(all_target_ids) 
        all_reco_ids_arr = np.concatenate(all_reco_ids)
        all_reco_scores_array = np.concatenate(all_scores)
        valid_indices = all_reco_ids_arr < len(sorted_item_ids_to_recommend)

        all_reco_ids_arr = all_reco_ids_arr[valid_indices]
        all_target_ids = all_target_ids[valid_indices]
        all_reco_scores_array = all_reco_scores_array[valid_indices]
        
        if sorted_item_ids_to_recommend is not None:
            items_indeces = np.isin(all_reco_ids_arr, sorted_item_ids_to_recommend)
            all_reco_ids_arr = all_reco_ids_arr[items_indeces]
            all_target_ids = all_target_ids[items_indeces]
            all_reco_scores_array = all_reco_scores_array[items_indeces]

        return all_target_ids, all_reco_ids_arr, all_reco_scores_array
    
    # Method used to make user-item recommendations, not for direct invokation, used in recommend method of base class
    # Params:
    # target_ids - InternalIdsArray of user ids for which predictions need to be made
    # dataset - instance of Dataset class
    # k - maximum count of top rated elements presented in recommendations
    # sorted_item_ids_to_recommend - optional InternalIdsArray of item ids from which predictions are made
    # Returns:
    # Equaly sized arrays of target ids, predictions ids, scores
    # in this method you need to ensure, that your realization handles all parameters correctly i.e. 
    # it can limit k predictions and limit the set of allowed items.
    def _recommend_u2i(
        self,
        user_ids: InternalIdsArray,
        dataset: Dataset,
        k: int,
        filter_viewed: bool,
        sorted_item_ids_to_recommend: tp.Optional[InternalIdsArray],
    ) -> tp.Tuple[InternalIds, InternalIds, Scores]:
        if filter_viewed:
            user_items = dataset.get_user_item_matrix(include_weights=False)

        item_ids = sorted_item_ids_to_recommend if sorted_item_ids_to_recommend is not None else self.all_item_ids
        sampler = _RandomSampler(item_ids, self.random_gen)

        all_user_ids = []
        all_reco_ids: tp.List[InternalId] = []
        all_scores: tp.List[float] = []
        for user_id in tqdm(user_ids, disable=self.verbose == 0):
            if filter_viewed:
                viewed_ids = get_viewed_item_ids(user_items, user_id)  # sorted
                n_reco = k + viewed_ids.size
            else:
                n_reco = k

            n_reco = min(n_reco, item_ids.size)
            reco_ids = sampler.sample(n_reco)

            if filter_viewed:
                reco_ids = reco_ids[fast_isin_for_sorted_test_elements(reco_ids, viewed_ids, invert=True)][:k]

            reco_scores = np.arange(reco_ids.size, 0, -1)

            all_user_ids.extend([user_id] * len(reco_ids))
            all_reco_ids.extend(reco_ids.tolist())
            all_scores.extend(reco_scores.tolist())

        return all_user_ids, all_reco_ids, all_scores

In [29]:
model = MixedKnnRandomModel(metric="cosine", algorithm="brute", n_neighbors=20, n_jobs=-1, random_state=20)

In [30]:
dataset = Dataset.construct(ratings)

In [31]:
model.fit(dataset)

<__main__.MixedKnnRandomModel at 0x7b349d4f63e0>

## Use model to recommend similar items

In [36]:
reco = model.recommend_to_items([1,7,6,2,3,5], dataset, 10)
reco[Columns.Model] = "KnnCustomModel"

In [37]:
reco.head()

Unnamed: 0,target_item_id,item_id,score,rank,model
0,1,1895,0.749783,1,KnnCustomModel
1,1,1989,0.750385,2,KnnCustomModel
2,1,458,0.759902,3,KnnCustomModel
3,1,1906,0.759902,4,KnnCustomModel
4,1,877,0.76827,5,KnnCustomModel


In [38]:
selected_items = {"item_one": 3}
formatters = {"item_id": lambda x: f"<b>{x}</b>"}

In [42]:
app = ItemToItemVisualApp.construct(
    reco=reco,
    item_data=movies,
    selected_items=selected_items,
    formatters=formatters,
    auto_display=True
)



# Use model to recommend movies for a specific users

In [44]:
reco = model.recommend([1,7,6,2,3,5], dataset, 10, filter_viewed=True)
reco[Columns.Model] = "KnnCustomModel"

In [51]:
reco.head()

Unnamed: 0,user_id,item_id,score,rank,model
0,1,2138,10.0,1,KnnCustomModel
1,1,60,9.0,2,KnnCustomModel
2,1,572,8.0,3,KnnCustomModel
3,1,3155,7.0,4,KnnCustomModel
4,1,1760,6.0,5,KnnCustomModel


In [52]:
selected_users = {"user_one": 3}
app = VisualApp.construct(
    reco=reco,
    interactions=ratings,
    item_data=movies,
    selected_users=selected_users,
    formatters=formatters 
)



# Conclusion
You can create custom models with any requirements by inheriting from BaseModel and implementing necessary methods.