# Example of building custom model with ModelBase class

- Building custom model
- Visual recommendations checking

In [275]:
import numpy as np
import pandas as pd
from rectools.models.base import ModelBase, ModelConfig
from rectools import Columns
from rectools.dataset import Dataset
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import typing as tp
import typing_extensions as tpe
from rectools.models.base import InternalIdsArray
from rectools.types import *
from tqdm import tqdm
from rectools.models.base import Scores
from rectools.visuals.visual_app import ItemToItemVisualApp

## Load data

In [2]:
%%time
!wget -q https://files.grouplens.org/datasets/movielens/ml-1m.zip -O ml-1m.zip
!unzip -o ml-1m.zip
!rm ml-1m.zip

Archive:  ml-1m.zip
  inflating: ml-1m/movies.dat        
  inflating: ml-1m/ratings.dat       
  inflating: ml-1m/README            
  inflating: ml-1m/users.dat         
CPU times: user 46 ms, sys: 22.8 ms, total: 68.8 ms
Wall time: 5.5 s


In [2]:
%%time
ratings = pd.read_csv(
    "ml-1m/ratings.dat", 
    sep="::",
    engine="python",  # Because of 2-chars separators
    header=None,
    names=[Columns.User, Columns.Item, Columns.Weight, Columns.Datetime],
)
print(ratings.shape)
ratings.head()

(1000209, 4)
CPU times: user 1.99 s, sys: 99.9 ms, total: 2.09 s
Wall time: 2.09 s


Unnamed: 0,user_id,item_id,weight,datetime
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [3]:
%%time
movies = pd.read_csv(
    "ml-1m/movies.dat", 
    sep="::",
    engine="python",  # Because of 2-chars separators
    header=None,
    names=[Columns.Item, "title", "genres"],
    encoding_errors="ignore",
)
print(movies.shape)
movies.head()

(3883, 3)
CPU times: user 5.53 ms, sys: 400 μs, total: 5.93 ms
Wall time: 5.36 ms


Unnamed: 0,item_id,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


## Build model

### Write a model config inherited from `ModelConfig`

In [4]:
class KnnModelConfig(ModelConfig):
    """Config for `KNN` model."""

    # KNN algorithm hyperparams
    metric: tp.Optional[str] = None
    algorithm: tp.Optional[str] = None
    n_neighbors: tp.Optional[int] = None
    n_jobs: tp.Optional[int] = None

### Write a model logic in class inherited from `ModelBase`

In [266]:
class KnnModel(ModelBase[KnnModelConfig]):
    # There is a sample recsys model inherited from ModelBase
    # Define is able to make cold and warm recommendations
    # Set config class to defined above
    recommends_for_warm = False
    recommends_for_cold = False
    config_class = KnnModelConfig

    # Set all hyperparams in __init__
    def __init__(self,
                 metric: tp.Optional[str] = None,
                 algorithm: tp.Optional[str] = None,
                 n_neighbors: tp.Optional[int] = None,
                 n_jobs: tp.Optional[int] = None,
                 verbose: int = 0):
        super().__init__(verbose=verbose)
        self.metric = metric
        self.algorithm = algorithm
        self.n_neighbors = n_neighbors
        self.n_jobs = n_jobs
        self.knn_model = NearestNeighbors(metric = self.metric,
                       algorithm = self.algorithm,
                       n_neighbors = self.n_neighbors,
                       n_jobs = self.n_jobs)
        self.all_item_ids: np.ndarray
        self.ui_csr: csr_matrix

    # Method used to save hyperparams in config
    def _get_config(self) -> KnnModelConfig:
        return KnnModelConfig(metric=self.metric, algorithm=self.algorithm, n_neighbors=self.n_neighbors, verbose=self.verbose)

    # Method used to load model params from config
    @classmethod
    def _from_config(cls, config: KnnModelConfig) -> tpe.Self:
        return cls(metric=config.metric, algorithm=config.algorithm, n_neighbors=config.n_neighbors, verbose=config.verbose)

    # Method used to fit model, there is a sklearn KNN wrapper, so we need to fit KNN model with dataset csr matrix
    def _fit(self, dataset: Dataset) -> None:  # type: ignore
        self.all_item_ids = dataset.item_id_map.internal_ids
        self.ui_csr = dataset.get_user_item_matrix(include_weights=False, dtype=np.float64)
        self.knn_model.fit(self.ui_csr)

    # Method used to make item-item recommendations, not for direct invokation, used in recommend_to_items method of base class
    # Params:
    # target_ids - InternalIdsArray of item ids for which predictions need to be made
    # dataset - instance of Dataset class
    # k - maximum count of top rated elements presented in recommendations
    # sorted_item_ids_to_recommend - optional InternalIdsArray of item ids from which predictions are made
    # Returns:
    # Equaly sized arrays of target ids, predictions ids, scores
    # in this method you need to ensure, that your realization handles all parameters correctly i.e. 
    # it can limit k predictions and limit the set of allowed items.
    def _recommend_i2i(self,
                       target_ids: InternalIdsArray,
                       dataset: Dataset,
                       k: int,
                       sorted_item_ids_to_recommend: tp.Optional[InternalIdsArray]) -> tp.Tuple[InternalIds, InternalIds, Scores]:
        sorted_item_ids_to_recommend = dataset.get_user_item_matrix(include_weights=False,
                                                                    dtype=np.float64)[sorted_item_ids_to_recommend] if sorted_item_ids_to_recommend is not None else self.all_item_ids

        all_target_ids = []
        all_reco_ids: tp.List[np.ndarray] = []
        all_scores: tp.List[np.ndarray] = []
        for target_id in tqdm(target_ids, disable=self.verbose == 0):
            reco_scores, reco_ids = self.knn_model.kneighbors(self.ui_csr[target_id], n_neighbors = k + 1)
            all_target_ids.extend([target_id] * len(reco_ids.tolist()[0]))
            all_reco_ids.extend(reco_ids.tolist())
            all_scores.extend(reco_scores.tolist())

        all_target_ids = np.array(all_target_ids) 
        all_reco_ids_arr = np.concatenate(all_reco_ids)
        all_reco_scores_array = np.concatenate(all_scores)
        valid_indices = all_reco_ids_arr < len(sorted_item_ids_to_recommend)

        all_reco_ids_arr = all_reco_ids_arr[valid_indices]
        all_target_ids = all_target_ids[valid_indices]
        all_reco_scores_array = all_reco_scores_array[valid_indices]
        
        if sorted_item_ids_to_recommend is not None:
            items_indeces = np.isin(all_reco_ids_arr, sorted_item_ids_to_recommend)
            all_reco_ids_arr = all_reco_ids_arr[items_indeces]
            all_target_ids = all_target_ids[items_indeces]
            all_reco_scores_array = all_reco_scores_array[items_indeces]

        return all_target_ids, all_reco_ids_arr, all_reco_scores_array

In [267]:
model = KnnModel(metric="cosine", algorithm="brute", n_neighbors=20, n_jobs=-1)

In [268]:
dataset = Dataset.construct(ratings)

In [269]:
model.fit(dataset)

<__main__.KnnModel at 0x7db3e9fa4bb0>

## Use model to recommend similar items

In [283]:
reco = model.recommend_to_items([1,7,6,2,3,5], dataset, 10)
reco[Columns.Model] = "KnnCustomModel"

In [290]:
selected_items = {"item_one": 3}
formatters = {"item_id": lambda x: f"<b>{x}</b>"}

In [291]:
app = ItemToItemVisualApp.construct(
    reco=reco,
    item_data=movies,
    selected_items=selected_items,
    formatters=formatters,
    auto_display=True
)

