# Example of building custom model with ModelBase class

- Building custom model
- Visual recommendations checking

In [1]:
import numpy as np
import pandas as pd
from rectools.models.base import ModelBase, ModelConfig
from rectools import Columns
from rectools.dataset import Dataset
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import typing as tp
import typing_extensions as tpe
from rectools.models.base import InternalIdsArray
from rectools.types import *
from tqdm import tqdm
from rectools.models.base import Scores


## Load data

In [3]:
%%time
!wget -q https://files.grouplens.org/datasets/movielens/ml-1m.zip -O ml-1m.zip
!unzip -o ml-1m.zip
!rm ml-1m.zip

Archive:  ml-1m.zip
   creating: ml-1m/
  inflating: ml-1m/movies.dat        
  inflating: ml-1m/ratings.dat       
  inflating: ml-1m/README            
  inflating: ml-1m/users.dat         
CPU times: user 48.4 ms, sys: 23.4 ms, total: 71.9 ms
Wall time: 5.12 s


In [37]:
%%time
ratings = pd.read_csv(
    "ml-1m/ratings.dat", 
    sep="::",
    engine="python",  # Because of 2-chars separators
    header=None,
    names=[Columns.User, Columns.Item, Columns.Weight, Columns.Datetime],
)
print(ratings.shape)
ratings.head()

(1000209, 4)
CPU times: user 2.95 s, sys: 102 ms, total: 3.05 s
Wall time: 3.03 s


Unnamed: 0,user_id,item_id,weight,datetime
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [38]:
%%time
movies = pd.read_csv(
    "ml-1m/movies.dat", 
    sep="::",
    engine="python",  # Because of 2-chars separators
    header=None,
    names=[Columns.Item, "title", "genres"],
    encoding_errors="ignore",
)
print(movies.shape)
movies.head()

(3883, 3)
CPU times: user 11.4 ms, sys: 81 μs, total: 11.4 ms
Wall time: 10.8 ms


Unnamed: 0,item_id,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


## Build model

### Write a model config inherited from `ModelConfig`

In [39]:
class KnnModelConfig(ModelConfig):
    """Config for `KNN` model."""

    # KNN algorithm hyperparams
    metric: tp.Optional[str] = None
    algorithm: tp.Optional[str] = None
    n_neighbors: tp.Optional[int] = None
    n_jobs: tp.Optional[int] = None

### Write a model logic in class inherited from `ModelBase`

In [40]:


class KnnModel(ModelBase[KnnModelConfig]):
    
    recommends_for_warm = True
    recommends_for_cold = False
    config_class = KnnModelConfig

    def __init__(self,
                 metric: tp.Optional[str] = None,
                 algorithm: tp.Optional[str] = None,
                 n_neighbors: tp.Optional[int] = None,
                 n_jobs: tp.Optional[int] = None,
                 verbose: int = 0):
        super().__init__(verbose=verbose)
        self.metric = metric
        self.algorithm = algorithm
        self.n_neighbors = n_neighbors
        self.n_jobs = n_jobs
        self.knn_model = NearestNeighbors(metric = self.metric,
                       algorithm = self.algorithm,
                       n_neighbors = self.n_neighbors,
                       n_jobs = self.n_jobs)
        self.all_item_ids: np.ndarray
        self.ui_csr: csr_matrix

    def _get_config(self) -> KnnModelConfig:
        return KnnModelConfig(metric=self.metric, algorithm=self.algorithm, n_neighbors=self.n_neighbors, verbose=self.verbose)

    @classmethod
    def _from_config(cls, config: KnnModelConfig) -> tpe.Self:
        return cls(metric=config.metric, algorithm=config.algorithm, n_neighbors=config.n_neighbors, verbose=config.verbose)

    def _fit(self, dataset: Dataset) -> None:  # type: ignore
        self.all_item_ids = dataset.item_id_map.internal_ids
        self.ui_csr = dataset.get_user_item_matrix(include_weights=False, dtype=np.float64)
        self.knn_model.fit(self.ui_csr)

    def _recommend_i2i(
        self,
        target_ids: InternalIdsArray,
        dataset: Dataset,
        k: int,
        sorted_item_ids_to_recommend: tp.Optional[InternalIdsArray],
    ) -> tp.Tuple[InternalIds, InternalIds, Scores]:
        sorted_item_ids_to_recommend = dataset.get_user_item_matrix(include_weights=False,
                                                                    dtype=np.float64)[sorted_item_ids_to_recommend] if sorted_item_ids_to_recommend is not None else self.all_item_ids

        all_target_ids = []
        all_reco_ids: tp.List[np.ndarray] = []
        all_scores: tp.List[np.ndarray] = []
        for target_id in tqdm(target_ids, disable=self.verbose == 0):
            reco_scores, reco_ids = self.knn_model.kneighbors(self.ui_csr[target_id], n_neighbors = k + 1)
            all_target_ids.extend([target_id] * len(reco_ids))
            all_reco_ids.append(reco_ids)
            all_scores.append(reco_scores)

        all_reco_ids_arr = np.concatenate(all_reco_ids)

        if sorted_item_ids_to_recommend is not None:
            all_reco_ids_arr = sorted_item_ids_to_recommend[all_reco_ids_arr]

        return all_target_ids, all_reco_ids_arr, np.concatenate(all_scores)

In [41]:
model = KnnModel(metric="cosine", algorithm="brute", n_neighbors=20, n_jobs=-1)

In [42]:
dataset = Dataset.construct(ratings)

In [43]:
model.fit(dataset)

<__main__.KnnModel at 0x797862117af0>

In [46]:
model._recommend_i2i(InternalIdsArray([1]), dataset, 10, None)

IndexError: Index dimension must be 1 or 2