# Базовый солюшн

In [131]:
import numpy as np
import pandas as pd
import polars as pl
from abc import ABC, abstractmethod
from typing import Dict, List
from collections import defaultdict

In [132]:
# Сразу загружу данные
train = pl.read_parquet("../../data/train.pq")
test = pl.read_parquet("../../data/test.pq")

train_items = set(train["item_id"].unique())
test_items = set(test["item_id"].unique())
cold_items = test_items - train_items

### Метрики оценивания моделей

In [133]:
class Validator(ABC):
    def __init__(self, train: pd.DataFrame, test: pd.DataFrame, cold_items: set = None):
        self.train = train
        self.test = test
        self.cold_items = cold_items or set()

    @abstractmethod
    def evaluate(self, predictions: Dict[int, List[int]]) -> Dict[str, float]:
        """
        predictions: dict user_id -> list of recommended item_ids
        """
        pass

    def recall_at_k(self, y_true: List[int], y_pred: List[int], k: int = 10) -> float:
        return len(set(y_true) & set(y_pred[:k])) / len(set(y_true)) if y_true else 0.0

    def precision_at_k(self, y_true: List[int], y_pred: List[int], k: int = 10) -> float:
        return len(set(y_true) & set(y_pred[:k])) / k if y_true else 0.0

    def hitrate_at_k(self, y_true: List[int], y_pred: List[int], k: int = 10) -> float:
        return 1.0 if len(set(y_true) & set(y_pred[:k])) > 0 else 0.0

    def ndcg_at_k(self, y_true: List[int], y_pred: List[int], k: int = 10) -> float:
        dcg = 0.0
        for i, item in enumerate(y_pred[:k]):
            if item in y_true:
                dcg += 1 / np.log2(i + 2)
        idcg = sum(1 / np.log2(i + 2) for i in range(min(len(y_true), k)))
        return dcg / idcg if idcg > 0 else 0.0

    def mrr_at_k(self, y_true: List[int], y_pred: List[int], k: int = 10) -> float:
        for i, item in enumerate(y_pred[:k]):
            if item in y_true:
                return 1 / (i + 1)
        return 0.0

    def coverage(self, predictions: Dict[int, List[int]]) -> float:
        all_pred_items = set(item for recs in predictions.values() for item in recs)
        all_train_items = set(self.train["item_id"].unique())
        return len(all_pred_items) / len(all_train_items)

    @staticmethod
    def print_metrics(metrics: Dict[str, float]):
        print("\n=== Evaluation Results ===")
        for key, value in metrics.items():
            print(f"{key:<15}: {value:.4f}")
        print("==========================\n")

Стоит разделить валидацию на две версии, для сравнения. Моя гипотеза заключается в том, что совместная валидация warm и cold может быть не совсем честной. Например, если в тесте 90% warm и 10% cold, то Recall@10 в среднем будет определяться warm-айтемами. Модель может полностью «забыть» про cold items, но в отчёте всё равно будут хорошие цифры. Это вводит в заблуждение: кажется, что модель универсальная, хотя на самом деле cold-start не решён.

#### Блок совместной валидации (baseline)

In [134]:
class JointValidator(Validator):
    def __init__(self, train: pl.DataFrame, test: pl.DataFrame, cold_items: set = None):
        super().__init__(train, test, cold_items)
        self.user2items = (
            test.group_by("user_id").agg(pl.col("item_id")).to_dict(as_series=False)
        )
        self.user2items = dict(zip(self.user2items["user_id"], self.user2items["item_id"]))

    def evaluate(self, predictions: Dict[int, List[int]]) -> Dict[str, float]:
        recalls, precisions, hits, ndcgs, mrrs = [], [], [], [], []
        for user_id, y_pred in predictions.items():
            y_true = self.user2items.get(user_id, [])
            recalls.append(self.recall_at_k(y_true, y_pred))
            precisions.append(self.precision_at_k(y_true, y_pred))
            hits.append(self.hitrate_at_k(y_true, y_pred))
            ndcgs.append(self.ndcg_at_k(y_true, y_pred))
            mrrs.append(self.mrr_at_k(y_true, y_pred))
        results = {
            "Recall@10": np.mean(recalls),
            "Precision@10": np.mean(precisions),
            "HitRate@10": np.mean(hits),
            "NDCG@10": np.mean(ndcgs),
            "MRR@10": np.mean(mrrs),
            "Coverage": self.coverage(predictions),
        }
        self.print_metrics(results)

#### Разделенная валидация (cold vs warm)

In [135]:
class SplitValidator(Validator):
    def __init__(self, train: pl.DataFrame, test: pl.DataFrame, cold_items: set = None):
        super().__init__(train, test, cold_items)
        self.user2items = (
            test.group_by("user_id").agg(pl.col("item_id")).to_dict(as_series=False)
        )
        self.user2items = dict(zip(self.user2items["user_id"], self.user2items["item_id"]))

    def evaluate(self, predictions: Dict[int, List[int]]) -> Dict[str, float]:
        results = {}
        for subset in ["cold", "warm"]:
            recalls, precisions, hits, ndcgs, mrrs = [], [], [], [], []
            for user_id, y_pred in predictions.items():
                y_true = self.user2items.get(user_id, [])
                if not y_true:
                    continue
                if subset == "cold":
                    y_true = [i for i in y_true if i in self.cold_items]
                elif subset == "warm":
                    y_true = [i for i in y_true if i not in self.cold_items]
                if not y_true:
                    continue
                recalls.append(self.recall_at_k(y_true, y_pred))
                precisions.append(self.precision_at_k(y_true, y_pred))
                hits.append(self.hitrate_at_k(y_true, y_pred))
                ndcgs.append(self.ndcg_at_k(y_true, y_pred))
                mrrs.append(self.mrr_at_k(y_true, y_pred))
            results[f"Recall@10_{subset}"] = np.mean(recalls) if recalls else 0.0
            results[f"Precision@10_{subset}"] = np.mean(precisions) if precisions else 0.0
            results[f"HitRate@10_{subset}"] = np.mean(hits) if hits else 0.0
            results[f"NDCG@10_{subset}"] = np.mean(ndcgs) if ndcgs else 0.0
            results[f"MRR@10_{subset}"] = np.mean(mrrs) if mrrs else 0.0
        results["Coverage"] = self.coverage(predictions)
        self.print_metrics(results)

### Базовые методы 

In [136]:
class ItemPopRecSys:
    def __init__(self, train: pd.DataFrame):
        self.train = train
        self.popular_items = None

    def fit(self):
        # Считаем популярность айтемов по числу взаимодействий
        popularity = (self.train.group_by("item_id").len().sort("len", descending=True))
        self.popular_items = popularity["item_id"].to_list()

    def recommend(self, user_ids: List[int], k: int = 10) -> Dict[int, List[int]]:
        if self.popular_items is None:
            raise ValueError("Model is not fitted. Call fit() before recommend().")
        preds = {u: self.popular_items[:k] for u in user_ids}
        return preds

##### Проблематика ItemPopRecSys и cold-start

* `ItemPopRecSys` - рекомендует только книги, которые были в train.
* Но cold items (которые есть только в test) он никогда не предложит.
* __Это особенность__ - все чисто коллаборативные методы не умеют cold-start без дополнительных фичей.

In [137]:
class UserKNNRecSys:
    def __init__(self, train: pl.DataFrame, k_neighbors: int = 20):
        self.train = train
        self.k_neighbors = k_neighbors
        self.user2items = None

    def fit(self):
        self.user2items = (self.train.group_by("user_id").agg(pl.col("item_id").unique()).to_dict(as_series=False))
        self.user2items = {u: set(items) for u, items in zip(self.user2items["user_id"], self.user2items["item_id"])}

    def recommend(self, user_ids: List[int], k: int = 10) -> Dict[int, List[int]]:
        preds = {}
        for u in user_ids:
            if u not in self.user2items:
                preds[u] = []
                continue
            # считаем схожесть с другими пользователями
            scores = defaultdict(int)
            for v, items in self.user2items.items():
                if u == v:
                    continue
                common = len(self.user2items[u] & items)
                if common > 0:
                    for it in items:
                        if it not in self.user2items[u]:
                            scores[it] += common
            recs = sorted(scores, key=scores.get, reverse=True)[:k]
            preds[u] = recs
        return preds

In [138]:
class ItemKNNRecSys:
    def __init__(self, train: pl.DataFrame, k_neighbors: int = 20):
        self.train = train
        self.k_neighbors = k_neighbors
        self.item2users = None

    def fit(self):
        self.user2items = (self.train.group_by("user_id").agg(pl.col("item_id").unique()).to_dict(as_series=False))
        self.user2items = {u: set(items) for u, items in zip(self.user2items["user_id"], self.user2items["item_id"])}

    def recommend(self, user_ids: List[int], k: int = 10) -> Dict[int, List[int]]:
        df = self.train.to_pandas()
        user2items = df.groupby("user_id")["item_id"].apply(set).to_dict()
        preds = {}
        for u in user_ids:
            if u not in user2items:
                preds[u] = []
                continue
            scores = defaultdict(int)
            for it in user2items[u]:
                if it not in self.item2users:
                    continue
                users_who_liked = self.item2users[it]
                for other_item, other_users in self.item2users.items():
                    if other_item in user2items[u]:
                        continue
                    common = len(users_who_liked & other_users)
                    if common > 0:
                        scores[other_item] += common
            recs = sorted(scores, key=scores.get, reverse=True)[:k]
            preds[u] = recs
        return preds

In [139]:
class MostRecentItemsRecSys:
    def __init__(self, items_metadata: pl.DataFrame, time_col: str = "timestamp"):
        self.items_metadata = items_metadata
        self.time_col = time_col
        self.sorted_items = None

    def fit(self):
        if self.time_col not in self.items_metadata.columns:
            raise ValueError(
                f"Column '{self.time_col}' not found. "
                f"Available columns: {self.items_metadata.columns}"
            )
        self.sorted_items = (
            self.items_metadata
            .sort(self.time_col, descending=True)["item_id"]
            .to_list()
        )

    def recommend(self, user_ids: List[int], k: int = 10) -> Dict[int, List[int]]:
        if self.sorted_items is None:
            raise ValueError("Model is not fitted. Call fit() before recommend().")
        return {u: self.sorted_items[:k] for u in user_ids}

In [140]:
class RandomColdRecSys:
    def __init__(self, cold_items: List[int], seed: int = 42):
        self.cold_items = cold_items
        self.rng = np.random.default_rng(seed)

    def fit(self):
        pass

    def recommend(self, user_ids: List[int], k: int = 10) -> Dict[int, List[int]]:
        return {
            u: self.rng.choice(self.cold_items, size=min(k, len(self.cold_items)), replace=False).tolist()
            for u in user_ids
        }

In [141]:
class HybridPopColdRecSys:
    def __init__(self, train: pl.DataFrame, cold_items: List[int]):
        self.train = train
        self.cold_items = cold_items
        self.popular_items = None

    def fit(self):
        popularity = (
            self.train.group_by("item_id").len().sort("len", descending=True)
        )
        self.popular_items = popularity["item_id"].to_list()

    def recommend(self, user_ids: List[int], k: int = 10) -> Dict[int, List[int]]:
        preds = {}
        for u in user_ids:
            recs = self.popular_items[: k // 2]
            if self.cold_items:
                recs += list(np.random.choice(self.cold_items, size=min(k - len(recs), len(self.cold_items)), replace=False))
            preds[u] = recs[:k]
        return preds

#### Вспомогательные функции для удобного представления результатов

In [142]:
def _shorten_list(lst, max_len=10):
    """Обрезает длинные списки для красивого вывода"""
    if lst is None:
        return []
    return lst[:max_len] if len(lst) > max_len else lst

def show_predictions(models: dict, data: pl.DataFrame, n=5, verbose=True, is_val=False):
    # базовые колонки
    df = data.sample(n).select(["user_id", "item_id"])
    if is_val:
        df = df.rename({"item_id": "true_items"})

    # добавляем предсказания
    for name, preds in models.items():
        df = df.with_columns(
            pl.col("user_id").map_elements(
                lambda u: _shorten_list(preds.get(u, [])), 
                return_dtype=pl.List(pl.Int64)
            ).alias(name)
        )

    if verbose:
        print(df.shape)
        print(df)

    return df


def val_predictions(models: dict, val: pl.DataFrame, validator: Validator, k: int = 10, verbose: bool = True):
    """
    models: dict[str, dict[user_id -> list[item_id]]]
    val: validation DataFrame
    validator: объект класса Validator с методами метрик
    """
    results = []
    user2items = (
        val.group_by("user_id").agg(pl.col("item_id")).to_dict(as_series=False)
    )
    user2items = dict(zip(user2items["user_id"], user2items["item_id"]))

    for model_name, preds in models.items():
        recalls, precisions, hits, ndcgs, mrrs = [], [], [], [], []
        for u, y_true in user2items.items():
            y_pred = preds.get(u, [])
            recalls.append(validator.recall_at_k(y_true, y_pred, k))
            precisions.append(validator.precision_at_k(y_true, y_pred, k))
            hits.append(validator.hitrate_at_k(y_true, y_pred, k))
            ndcgs.append(validator.ndcg_at_k(y_true, y_pred, k))
            mrrs.append(validator.mrr_at_k(y_true, y_pred, k))
        metrics = {
            "model": model_name,
            "Recall@10": np.mean(recalls),
            "Precision@10": np.mean(precisions),
            "HitRate@10": np.mean(hits),
            "NDCG@10": np.mean(ndcgs),
            "MRR@10": np.mean(mrrs),
            "Coverage": validator.coverage(preds),
        }
        results.append(metrics)

    df = pl.DataFrame(results)
    if verbose:
        print(df)
    return df


In [143]:
print(f"Train users: {train['user_id'].n_unique()}, items: {len(train_items)}")
print(f"Test users: {test['user_id'].n_unique()}, items: {len(test_items)}")
print(f"Cold items: {len(cold_items)}")
user_ids = test["user_id"].unique().to_list()

# Инициализируем и обучаем разные модели
pop_model = ItemPopRecSys(train)
pop_model.fit()
pred_pop = pop_model.recommend(user_ids, k=10)

recent_model = MostRecentItemsRecSys(train, time_col="date_added")
recent_model.fit()
pred_recent = recent_model.recommend(user_ids, k=10)

cold_model = RandomColdRecSys(list(cold_items))
pred_cold = cold_model.recommend(user_ids, k=10)

# Собираем предсказания в словарь
models = {
    "Popularity": pred_pop,
    "Recent": pred_recent,
    "RandomCold": pred_cold,
}


print(f"Train users: {train['user_id'].n_unique()}, items: {train['item_id'].n_unique()}")
train_df = show_predictions(models, train, n=5, verbose=True)


Train users: 349719, items: 31300
Test users: 185828, items: 27367
Cold items: 1775
Train users: 349719, items: 31300
(5, 5)
shape: (5, 5)
┌──────────────────────────┬─────────┬─────────────────┬─────────────────┬─────────────────────────┐
│ user_id                  ┆ item_id ┆ Popularity      ┆ Recent          ┆ RandomCold              │
│ ---                      ┆ ---     ┆ ---             ┆ ---             ┆ ---                     │
│ str                      ┆ i64     ┆ list[i64]       ┆ list[i64]       ┆ list[i64]               │
╞══════════════════════════╪═════════╪═════════════════╪═════════════════╪═════════════════════════╡
│ 1e27aefa36f61909617bb6f7 ┆ 1166    ┆ [4058, 15514, … ┆ [14181, 6623, … ┆ [31421, 20937, … 22482] │
│ 6722b5…                  ┆         ┆ 13159]          ┆ 12550]          ┆                         │
│ b9f1c1a0fff38a3f097e0a26 ┆ 18871   ┆ [4058, 15514, … ┆ [14181, 6623, … ┆ [24331, 1395, … 26625]  │
│ 47c12d…                  ┆         ┆ 13159]        

In [144]:
validator = JointValidator(train, test, cold_items)
metrics_df = val_predictions(models, test, validator, k=10, verbose=True)

shape: (3, 7)
┌────────────┬───────────┬──────────────┬────────────┬──────────┬──────────┬──────────┐
│ model      ┆ Recall@10 ┆ Precision@10 ┆ HitRate@10 ┆ NDCG@10  ┆ MRR@10   ┆ Coverage │
│ ---        ┆ ---       ┆ ---          ┆ ---        ┆ ---      ┆ ---      ┆ ---      │
│ str        ┆ f64       ┆ f64          ┆ f64        ┆ f64      ┆ f64      ┆ f64      │
╞════════════╪═══════════╪══════════════╪════════════╪══════════╪══════════╪══════════╡
│ Popularity ┆ 0.032568  ┆ 0.025603     ┆ 0.163899   ┆ 0.033254 ┆ 0.053643 ┆ 0.000319 │
│ Recent     ┆ 0.007675  ┆ 0.010843     ┆ 0.092957   ┆ 0.011604 ┆ 0.024268 ┆ 0.000319 │
│ RandomCold ┆ 0.000249  ┆ 0.000563     ┆ 0.005295   ┆ 0.000616 ┆ 0.001635 ┆ 0.056709 │
└────────────┴───────────┴──────────────┴────────────┴──────────┴──────────┴──────────┘
