In [2]:
import pandas as pd
import polars as pl
import numpy as np
import typing as t
import matplotlib.pyplot as plt
from abc import ABC, abstractmethod
from tqdm.notebook import tqdm
from IPython.display import HTML
from dataclasses import dataclass
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import normalize


def dataframe_to_html(df: pl.DataFrame, columns: list) -> str:
    """
    Convert a Polars DataFrame to an HTML table with specified columns.

    Args:
        df: Polars DataFrame containing the data.
        columns: List of column names to include in the HTML table.

    Returns:
        str: HTML string representing the table.
    """
    html = ["<table border='1'>"]

    html.append("<tr>")
    for col in columns:
        html.append(f"<th>{col}</th>")
    html.append("</tr>")

    for row in df.iter_rows(named=True):
        html.append("<tr>")
        for col in columns:
            value = row[col]
            if col == "image_url":
                html.append(f"<td><img src='{value}' width='100' /></td>")
            else:
                html.append(f"<td>{value}</td>")
        html.append("</tr>")

    html.append("</table>")
    return "\n".join(html)


In [6]:
data_folder = "/home/gleb_galagan/tbank_recsys/sirius_recsys/sirius-2025-recsys/data/"
train = pl.read_parquet(data_folder + "train.pq")
print("Train:")
print(train.shape)
display(train.head(5))

Train:
(11971437, 5)


user_id,item_id,is_read,rating,date_added
str,i64,bool,i64,datetime[μs]
"""90998a29029ecb1fbeedaff2b71752…",8473,False,0,2014-05-15 12:51:13
"""90998a29029ecb1fbeedaff2b71752…",20060,False,0,2014-05-15 12:51:14
"""90998a29029ecb1fbeedaff2b71752…",8354,False,0,2014-05-15 12:51:17
"""90998a29029ecb1fbeedaff2b71752…",11575,False,0,2014-05-15 12:51:19
"""90998a29029ecb1fbeedaff2b71752…",11861,False,0,2014-05-15 12:51:20


In [7]:
books = pl.read_parquet(data_folder + "books.pq")
print("Books:")
print(books.shape)
display(books.head(5))

Books:
(34322, 8)


item_id,series,tags,title,description,url,image_url,authors
i64,list[str],list[str],str,str,str,str,list[struct[2]]
0,[null],"[""e-book"", ""young-adult"", … ""y-a""]","""Hallie Hath No Fury . . .""","""There are two sides to every s…","""https://www.goodreads.com/book…","""https://images.gr-assets.com/b…","[{""1879494"",""""}]"
1,"[""149079""]","[""primary"", ""melissa-j--morgan"", … ""fiction""]","""Hide and Shriek: Super Special…","""The girls go on an overnight a…","""https://www.goodreads.com/book…","""https://s.gr-assets.com/assets…","[{""21740"",""""}]"
2,[null],"[""friendship"", ""middle-reader"", … ""my-library""]","""Dear Mom, You're Ruining My Li…","""Samantha Slayton worries about…","""https://www.goodreads.com/book…","""https://s.gr-assets.com/assets…","[{""18946"",""""}]"
3,"[""151088""]","[""summer-2017"", ""bullying"", … ""re-read""]","""Bratfest at Tiffany's (Clique …","""Massie Block: The Briarwood bo…","""https://www.goodreads.com/book…","""https://images.gr-assets.com/b…","[{""4605"",""""}]"
4,"[""812067""]","[""rosemary-vernon"", ""young-adult"", … ""to-read""]","""Questions of Love (Sweet Dream…","""When Sammi Edwards is chosen t…","""https://www.goodreads.com/book…","""https://images.gr-assets.com/b…","[{""792676"",""""}]"


In [8]:
n_books = books.shape[0]
print(f"N books: {n_books}")

n_cold_books = n_books - train["item_id"].n_unique()
print(f"N 'cold' books: {n_cold_books}")

N books: 34322
N 'cold' books: 3022


In [9]:
test_exploded = pl.read_parquet(data_folder + "test.pq")
test = test_exploded.group_by("user_id", maintain_order=True).agg(pl.col("item_id"))
print("Test:")
print(test.shape)
display(test.head(5))

Test:
(185828, 2)


user_id,item_id
str,list[i64]
"""00000377eea48021d3002730d56aca…",[13252]
"""00009ab2ed8cbfceda5a59da409663…",[2328]
"""00009e46d18f223a82b22da38586b6…","[28636, 30197]"
"""0001085188e302fc6b2568de45a5f5…","[2159, 2969, … 33630]"
"""00014c578111090720e20f5705eba0…","[45, 3513, … 33273]"


In [None]:
test.select('user_id')[0]

'shape: (1, 1)\n┌─────────────────────────────────┐\n│ user_id                         │\n│ ---                             │\n│ str                             │\n╞═════════════════════════════════╡\n│ 00000377eea48021d3002730d56aca… │\n└─────────────────────────────────┘'

In [10]:
# ACHTUNG! DO NOT TOUCH

@dataclass
class AtKMetric(ABC):
    k: int

    @property
    @abstractmethod
    def name(self) -> str:
        raise NotImplementedError

    @property
    def full_name(self) -> str:
        return f"{self.name}@{self.k}"

    @abstractmethod
    def __call__(self, *, preds_col: str = "preds", ground_truth_col: str = "ground_truth") -> pl.Expr:
        raise NotImplementedError


class NDCG(AtKMetric):
    @property
    def name(self) -> str:
        return "ndcg"

    def __call__(self, *, preds_col: str = "preds", ground_truth_col: str = "ground_truth") -> pl.Expr:
        def _dcg(scores: np.ndarray) -> float:
            # Handle empty arrays
            if len(scores) == 0:
                return 0.0
            # Use vectorized operations for better performance
            positions = np.arange(1, len(scores) + 1, dtype=np.float64)
            return np.sum((np.power(2, scores) - 1) / np.log2(positions + 1))

        def ndcg(predicted: list[t.Any], gt_items: list[t.Any]) -> float:
            # Handle empty predictions or ground truth
            if not predicted or not gt_items:
                return 0.0
                
            # Take only top-k predictions
            predicted = predicted[:self.k]
            
            # Create relevance scores (1 for relevant items, 0 otherwise)
            relevance = np.array([1 if x in gt_items else 0 for x in predicted], dtype=np.float64)
            
            rank_dcg = _dcg(relevance)
            if rank_dcg == 0.0:
                return 0.0
                
            # Ideal DCG: sort ground truth by relevance (all 1s) and take top-k
            ideal_relevance = np.ones(min(len(gt_items), self.k), dtype=np.float64)
            ideal_dcg = _dcg(ideal_relevance)
            
            if ideal_dcg == 0.0:
                return 0.0
                
            return rank_dcg / ideal_dcg

        return pl.struct([preds_col, ground_truth_col]).map_elements(
            lambda x: ndcg(x[preds_col], x[ground_truth_col]),
            return_dtype=pl.Float64
        ).alias(self.full_name)


class Recall(AtKMetric):
    @property
    def name(self) -> str:
        return "recall"

    def __call__(self, *, preds_col: str = "preds", ground_truth_col: str = "ground_truth") -> pl.Expr:
        def recall(predicted: list[t.Any], gt_items: list[t.Any]) -> float:
            # Handle empty ground truth
            if not gt_items:
                return 0.0
                
            # Take only top-k predictions
            predicted = predicted[:self.k]
            
            # Calculate intersection
            intersection = len(set(gt_items).intersection(set(predicted)))
            return intersection / len(gt_items)

        return pl.struct([preds_col, ground_truth_col]).map_elements(
            lambda x: recall(x[preds_col], x[ground_truth_col]),
            return_dtype=pl.Float64
        ).alias(self.full_name)


def coverage(df: pl.DataFrame, n_items_in_catalog: int, preds_col: str = "preds") -> float:
    if n_items_in_catalog <= 0:
        return 0.0
        
    unique_recommended = df.select(
        pl.col(preds_col).explode().n_unique()
    ).item()
    return unique_recommended / n_items_in_catalog


def evaluate_recommender(
    df: pl.DataFrame,
    model_preds_col: str,
    ground_truth_col: str = "item_id",
    n_items_in_catalog: int = n_books,
    k: int = 10,
) -> dict:
    metrics = [
        NDCG(k=k),
        Recall(k=k),
    ]
    result = {}
    
    # Create a copy to avoid modifying the original dataframe
    result_df = df.clone()
    
    for metric in metrics:
        result_df = result_df.with_columns(
            metric(preds_col=model_preds_col, ground_truth_col=ground_truth_col)
        )
        result[metric.full_name] = round(result_df.select(pl.col(metric.full_name).mean()).item(), 3)

    if n_items_in_catalog is not None:
        result["coverage"] = round(coverage(result_df, n_items_in_catalog, preds_col=model_preds_col), 3)
    
    return result

## Models

In [None]:
class ItemkNN:
    def __init__(self):
        self.trained: bool = False
        self.train_most_liked_item: pl.DataFrame | None = None
        self.items_embeddings: np.ndarray | None = None
        self.items_item_ids: np.ndarray | None = None

    def fit(
        self,
        df: pl.DataFrame,
        items_df: pl.DataFrame,
        item_id_col: str = "item_id"
    ) -> None:
        self.items_embeddings = np.array(items_df["embedding"].to_list())
        self.items_item_ids = items_df[item_id_col].to_numpy()

        # find the most liked item for each user
        self.train_most_liked_item = (
            df.lazy()
            .sort("rating", descending=True)
            .group_by("user_id")
            .agg(pl.col(item_id_col).first().alias(item_id_col))
            .join(
                items_df.lazy().select([item_id_col, "embedding"]),
                on=item_id_col,
                how="left"
            )
            .collect()
        )
        self.trained = True

    def _get_batch_predictions(
        self,
        batch_user_embeddings: np.ndarray,
        batch_current_items: np.ndarray,
        batch_user_mask: np.ndarray,
        topn: int
    ) -> list[np.ndarray]:
        batch_predictions = []
        
        # compute similarity scores for all items in batch
        batch_scores = batch_user_embeddings @ self.items_embeddings.T

        for user_scores, current_item, has_embedding in zip(
            batch_scores, batch_current_items, batch_user_mask
        ):
            if not has_embedding:
                batch_predictions.append([])
                continue

            # get top (topn + 1) most similar items
            top_indices = np.argpartition(-user_scores, topn + 1)[:topn + 1]
            
            # filter out current item and take topn
            recommended_items = []
            for item_idx in top_indices:
                if self.items_item_ids[item_idx] != current_item:
                    recommended_items.append(self.items_item_ids[item_idx])
                if len(recommended_items) >= topn:
                    break

            batch_predictions.append(recommended_items)

        return batch_predictions

    def predict(
        self,
        df: pl.DataFrame,
        topn: int = 10,
        batch_size: int = 5_000
    ) -> list[np.ndarray]:
        assert self.trained
        
        user_data = (
            df.select(["user_id"])
            .join(self.train_most_liked_item, on="user_id", how="left")
        )

        user_embeddings = np.array(user_data["embedding"].to_list())
        current_items = user_data["item_id"].to_numpy()
        user_mask = user_data["embedding"].is_not_null().to_numpy()

        predictions = []
        n_users = len(user_data)

        for batch_start in tqdm(range(0, n_users, batch_size)):
            batch_end = min(batch_start + batch_size, n_users)
            
            batch_predictions = self._get_batch_predictions(
                user_embeddings[batch_start:batch_end],
                current_items[batch_start:batch_end],
                user_mask[batch_start:batch_end],
                topn
            )
            predictions.extend(batch_predictions)

        return predictions


item_knn = ItemkNN()
item_knn.fit(train, books)
test = test.with_columns(
    item_knn_recs=pl.Series(item_knn.predict(test))
)
print(test.head())
evaluate_recommender(df=test, model_preds_col="item_knn_recs")

In [None]:
# class  LinUCB:
#     def __init__(self, alpha=1.0, d = 5, K_arms=10):
#         self.trained = False
#         self.alpha = alpha
#         self.K_arms = K_arms
#         self.linucb_arms = [self.linucb_disjoint_arm(i, d, alpha) for i in range(K_arms)]
        
# def inucb_disjoint_arm(self, arm_index, d, alpha, K_arms = 10):
    

#     @abstractmethod
#     def fit(self, df: pl.DataFrame, **kwargs) -> None:
#         # реализация может быть любой, никаких ограничений
#         # не забудьте про self.trained = True
#         self.trained = True

#     @abstractmethod
#     def predict(self, df: pl.DataFrame, topn: int = 10, **kwargs) -> list[np.ndarray]:
#         # реализация может быть любой, НО
#         # должен возвращаться список массивов из item_id, которые есть в `books`, чтобы корректно работал подсчет метрик
#         pass

In [None]:
# import numpy as np

# class linucb_disjoint_arm:
#     def __init__(self, arm_index, d, alpha, K_arms = 10):
#         self.arm_index = arm_index
#         self.alpha = alpha
#         self.A = np.identity(d)
#         self.b = np.zeros([d, 1])

#     def calc_UCB(self, x_array):
#         A_inv = np.linalg.inv(self.A)
#         theta = np.dot(A_inv, self.b)
#         x = x_array.reshape([-1, 1])
#         p = np.dot(theta.T, x) + self.alpha * np.sqrt(np.dot(x.T, np.dot(A_inv, x)))
#         return p

#     def reward_update(self, reward, x_array):
#         x = x_array.reshape([-1, 1])
#         self.A += np.dot(x, x.T)
#         self.b += reward * x

# class linucb_policy:
#     def __init__(self, K_arms, d, alpha):
#         self.K_arms = K_arms
#         self.linucb_arms = [linucb_disjoint_arm(i, d, alpha) for i in range(K_arms)]

#     def select_arm(self, x_array):
#         highest_ucb = -1
#         candidate_arms = []

#         for i in range(self.K_arms):
#             ucb = self.linucb_arms[i].calc_UCB(x_array)
#             if ucb > highest_ucb:
#                 highest_ucb = ucb
#                 candidate_arms = [i]
#             elif ucb == highest_ucb:
#                 candidate_arms.append(i)

#         return np.random.choice(candidate_arms)

# # пример использования
# d = 5  # размерность контекста
# K_arms = 10  # количество рук
# alpha = 1.0  # параметр баланса исследования и использования
# policy = linucb_policy(K_arms, d, alpha)

# # допустим, у нас есть контекстный вектор для текущего пользователя
# x_context = np.random.rand(d)

# # выбор руки
# chosen_arm = policy.select_arm(x_context)
# print("Выбранная рука:", chosen_arm)


Выбранная рука: 5


## Bandits

In [None]:

import numpy as np
import polars as pl
from abc import ABC, abstractmethod
from typing import Optional, Dict, Any
from tqdm import tqdm

class BaseRecommender(ABC):
    def __init__(self):
        self.trained = False

    @abstractmethod
    def fit(self, df: pl.DataFrame, **kwargs) -> None:
        self.trained = True

    @abstractmethod
    def predict(self, df: pl.DataFrame, topn: int = 10, **kwargs) -> list[np.ndarray]:
        pass

class LinUCBRecommender(BaseRecommender):
    def __init__(self, alpha: float = 1.0, context_dim: int = None):
        super().__init__()
        self.alpha = alpha
        self.context_dim = context_dim
        self.policy: Optional[linucb_policy] = None
        self.item_to_arm: Dict[int, int] = {}
        self.arm_to_item: Dict[int, int] = {}
        self.user_features: Optional[pl.DataFrame] = None
        self.item_features: Optional[pl.DataFrame] = None
        self.user_embeddings: Optional[Dict[int, np.ndarray]] = {}
        self.item_embeddings: Optional[Dict[int, np.ndarray]] = {}
        
    def _create_context_vector(self, user_id: int, item_id: int = None) -> np.ndarray:
        """Create context vector from user and item features"""
        context_parts = []
        
        # Add user embedding if available
        if user_id in self.user_embeddings:
            context_parts.append(self.user_embeddings[user_id])
        else:
            # Use zero vector if user not found
            user_dim = len(next(iter(self.user_embeddings.values()))) if self.user_embeddings else 10
            context_parts.append(np.zeros(user_dim))
        
        # Add item embedding if available and provided
        if item_id is not None and item_id in self.item_embeddings:
            context_parts.append(self.item_embeddings[item_id])
        elif item_id is not None:
            # Use zero vector if item not found
            item_dim = len(next(iter(self.item_embeddings.values()))) if self.item_embeddings else 10
            context_parts.append(np.zeros(item_dim))
            
        if context_parts:
            return np.concatenate(context_parts)
        else:
            # Fallback to random context
            return np.random.randn(self.context_dim or 20)

    def fit(self, df: pl.DataFrame, 
            items_df: pl.DataFrame = None,
            user_id_col: str = "user_id", 
            item_id_col: str = "item_id",
            rating_col: str = "rating",
            **kwargs) -> None:
        """
        Fit the LinUCB model
        
        Args:
            df: Training DataFrame with user_id, item_id, rating
            items_df: Optional DataFrame with item features/embeddings
            user_id_col: Column name for user ID
            item_id_col: Column name for item ID  
            rating_col: Column name for rating
        """
        
        # Get unique items to create arms
        unique_items = df[item_id_col].unique().sort()
        self.item_to_arm = {item_id: idx for idx, item_id in enumerate(unique_items)}
        self.arm_to_item = {idx: item_id for item_id, idx in self.item_to_arm.items()}
        
        # Extract embeddings if available
        if items_df is not None and "embedding" in items_df.columns:
            items_with_embeddings = items_df.filter(pl.col("embedding").is_not_null())
            for row in items_with_embeddings.iter_rows(named=True):
                self.item_embeddings[row[item_id_col]] = np.array(row["embedding"])
        
        # Create user embeddings based on their interaction patterns
        # This is a simple approach - you might want to use more sophisticated user features
        user_stats = (
            df.group_by(user_id_col)
            .agg([
                pl.col(rating_col).mean().alias("avg_rating"),
                pl.col(rating_col).std().alias("rating_std"),
                pl.col(rating_col).count().alias("interaction_count"),
                pl.col(item_id_col).n_unique().alias("unique_items")
            ])
        )
        
        for row in user_stats.iter_rows(named=True):
            # Create simple user embedding from statistics
            user_embedding = np.array([
                row["avg_rating"] or 0,
                row["rating_std"] or 0, 
                np.log(row["interaction_count"] + 1),
                np.log(row["unique_items"] + 1)
            ])
            self.user_embeddings[row[user_id_col]] = user_embedding
            
        # Determine context dimension
        sample_user = next(iter(self.user_embeddings.keys()))
        sample_item = next(iter(self.item_embeddings.keys())) if self.item_embeddings else None
        
        sample_context = self._create_context_vector(sample_user, sample_item)
        self.context_dim = len(sample_context)
        
        # Initialize LinUCB policy
        n_arms = len(unique_items)
        self.policy = linucb_policy(n_arms, self.context_dim, self.alpha)
        
        # Train on historical data
        print(f"Training LinUCB with {n_arms} items and context dimension {self.context_dim}")
        
        # Convert ratings to binary rewards (1 if rating >= threshold, 0 otherwise)
        rating_threshold = df[rating_col].median()
        
        for row in tqdm(df.iter_rows(named=True), desc="Training LinUCB"):
            user_id = row[user_id_col]
            item_id = row[item_id_col]
            rating = row[rating_col]
            
            if item_id in self.item_to_arm:
                arm_idx = self.item_to_arm[item_id]
                context = self._create_context_vector(user_id, item_id)
                reward = 1.0 if rating >= rating_threshold else 0.0
                
                self.policy.linucb_arms[arm_idx].reward_update(reward, context)
        
        self.trained = True
        print(f"LinUCB training completed!")

    def predict(self, df: pl.DataFrame, topn: int = 10, 
                user_id_col: str = "user_id", **kwargs) -> list[list[int]]:
        """
        Generate recommendations for users
        
        Args:
            df: DataFrame with user_id column
            topn: Number of recommendations per user
            user_id_col: Column name for user ID
            
        Returns:
            List of recommendations (item_ids) for each user
        """
        assert self.trained, "Model must be trained before making predictions"
        
        predictions = []
        
        for row in tqdm(df.iter_rows(named=True), desc="Generating recommendations"):
            user_id = row[user_id_col]
            
            # Create context for this user (without specific item)
            context = self._create_context_vector(user_id)
            
            # Get top N recommendations
            top_arms = self.policy.get_top_arms(context, topn)
            
            # Convert arm indices back to item IDs
            recommended_items = [self.arm_to_item[arm_idx] for arm_idx in top_arms]
            predictions.append(recommended_items)
            
        return predictions

    def update_reward(self, user_id: int, item_id: int, reward: float) -> None:
        """
        Update the model with new reward feedback
        
        Args:
            user_id: User ID
            item_id: Item ID that was recommended
            reward: Reward received (e.g., 1 for click, 0 for no click)
        """
        if not self.trained:
            return
            
        if item_id in self.item_to_arm:
            arm_idx = self.item_to_arm[item_id]
            context = self._create_context_vector(user_id, item_id)
            self.policy.linucb_arms[arm_idx].reward_update(reward, context)


In [None]:
# Initialize
recommender = LinUCBRecommender(alpha=1.0)

# Train on historical data
recommender.fit(train, books)

# Generate recommendations
test_with_recs = test.with_columns(
    linucb_recs=pl.Series(recommender.predict(test, topn=10))
)

# Update with new feedback (online learning)
recommender.update_reward(user_id=test.select('user_id')[0], item_id=456, reward=1.0)  # User liked the item

## First level model

In [None]:
class HybridBookRecommender:
    def __init__(self):
        self.lightfm = LightFMModel()  # для теплых книг
        self.cold_bandit = EmbeddingBasedBandit()
        self.min_interactions_threshold = 5
        
    def classify_books(self, books_df, interactions_df):
        """Разделяем книги на холодные и теплые"""
        interaction_counts = interactions_df.groupby('item_id').size()
        
        cold_books = books_df[
            ~books_df['item_id'].isin(interaction_counts.index) |
            books_df['item_id'].isin(
                interaction_counts[interaction_counts < self.min_interactions_threshold].index
            )
        ]
        
        warm_books = books_df[
            books_df['item_id'].isin(
                interaction_counts[interaction_counts >= self.min_interactions_threshold].index
            )
        ]
        
        return cold_books, warm_books
    
    def recommend(self, user_id, n_recommendations=10):
        cold_books, warm_books = self.classify_books()
        
        # 20% - холодные, 80% - теплые
        n_cold = max(1, int(n_recommendations * 0.2))
        n_warm = n_recommendations - n_cold
        
        # Теплые рекомендации через LightFM
        warm_recs = self.lightfm.recommend(user_id, n_warm)
        
        # Холодные через embeddings + бандиты
        cold_recs = self.cold_bandit.recommend_cold_books(user_id, cold_books, n_cold)
        
        return warm_recs + cold_recs
    
    def update(self, user_id, item_id, reward):
        """Обновляем модели после получения фидбека"""
        if self.is_cold_item(item_id):
            self.cold_bandit.update(user_id, item_id, reward)
            
            # Проверяем, не пора ли перевести в теплые
            if self.cold_bandit.book_stats[item_id]['n_interactions'] >= self.min_interactions_threshold:
                self.move_to_warm(item_id)
        else:
            self.lightfm.update(user_id, item_id, reward)

## Embeddings

In [None]:
import polars as pl
import numpy as np
from scipy.cluster.vq import kmeans2
import random  # Для симуляции

# Предполагаем DF загружен (твоя структура)
# df = pl.read_parquet('books.parquet')  # Или твой способ загрузки
# Для примера создадим маленький DF (замени на реальный)
schema = {
    'item_id': pl.Int64,
    'series': pl.List(pl.String),
    'tags': pl.List(pl.String),
    'title': pl.String,
    'description': pl.String,
    'url': pl.String,
    'image_url': pl.String,
    'authors': pl.List(pl.Struct([pl.Field('id', pl.String), pl.Field('name', pl.String)])),
    'title_embeddings': pl.Array(pl.Float32, 1024),
    'image_embedding': pl.List(pl.Float64),
    'description_embeddings': pl.Array(pl.Float32, 1024)  # Добавляем, как ты сказал
}
# Симуляция данных (в реальности удали)
n_books = 34322
embed_size = 1024
df = pl.DataFrame(
    {
        'item_id': range(n_books),
        'series': [None] * n_books,
        'tags': [[]] * n_books,
        'title': ['Book ' + str(i)] * n_books,
        'description': ['Desc ' + str(i)] * n_books,
        'url': ['url'] * n_books,
        'image_url': ['img'] * n_books,
        'authors': [[{'id': '', 'name': ''}]] * n_books,
        'title_embeddings': np.random.rand(n_books, embed_size).tolist(),
        'image_embedding': np.random.rand(n_books, embed_size).tolist(),
        'description_embeddings': np.random.rand(n_books, embed_size).tolist()
    },
    schema=schema
)

# Шаг 1: Объединение эмбеддингов (конкат title + description для 2048 dims)
title_emb = np.stack(df['title_embeddings'].to_list())
desc_emb = np.stack(df['description_embeddings'].to_list())
# image_emb = np.stack(df['image_embedding'].to_list())  # Если хочешь добавить: np.concatenate([title_emb, desc_emb, image_emb], axis=1)
embeddings = np.concatenate([title_emb, desc_emb], axis=1)  # Shape: (34322, 2048)

# Кластеризация: 50 кластеров
n_clusters = 50
centroids, labels = kmeans2(embeddings, k=n_clusters, minit='points')
df = df.with_columns(pl.Series('cluster', labels, dtype=pl.Int32))

# Класс Epsilon-Greedy Bandit (как раньше)
class EpsilonGreedyBandit:
    def __init__(self, n_arms, epsilon=0.1):
        self.n_arms = n_arms
        self.epsilon = epsilon
        self.counts = np.zeros(n_arms)
        self.values = np.zeros(n_arms)
        
    def select_arm(self):
        if np.random.rand() < self.epsilon:
            return np.random.randint(0, self.n_arms)
        return np.argmax(self.values)
    
    def update(self, arm, reward):
        self.counts[arm] += 1
        n = self.counts[arm]
        value = self.values[arm]
        self.values[arm] = ((n - 1) / n) * value + (1 / n) * reward

# Функция рекомендации 10 книг из кластера (сортировка по длине description как прокси; добавь rating если есть)
def recommend_books(df, cluster, n=10):
    cluster_books = df.filter(pl.col('cluster') == cluster)
    # Сортировка: по длине description (длиннее = детальнее/популярнее?); или random
    cluster_books = cluster_books.with_columns(pl.col('description').str.len_bytes().alias('desc_len'))
    top_books = cluster_books.sort('desc_len', descending=True).head(n)
    return top_books.select(['item_id', 'title', 'description', 'url'])

# Инициализация для user N (отдельный бандит per user)
bandit = EpsilonGreedyBandit(n_arms=n_clusters, epsilon=0.1)

# Рекомендация для user N (холодный старт)
cluster = bandit.select_arm()
recommended = recommend_books(df, cluster)
print(f"Рекомендации для user N (кластер {cluster}):")
print(recommended)

# Симуляция feedback (в реальности — от user: средний reward по 10 книгам, или per книга)
reward = random.choice([0, 1])  # Эмуляция клика/покупки
bandit.update(cluster, reward)
print(f"Reward: {reward}")

# Для нескольких сессий (обучение)
for _ in range(100):  # Симуляция
    cluster = bandit.select_arm()
    reward = random.uniform(0, 1)  # В реальности на основе true prefs
    bandit.update(cluster, reward)

In [None]:
import polars as pl
import numpy as np
from scipy.cluster.vq import kmeans2
from abc import ABC, abstractmethod
from tqdm import tqdm

class BaseRecommender(ABC):
    def __init__(self):
        self.trained = False

    @abstractmethod
    def fit(self, df: pl.DataFrame, **kwargs) -> None:
        self.trained = True

    @abstractmethod
    def predict(self, df: pl.DataFrame, topn: int = 10, **kwargs) -> list[np.ndarray]:
        pass

class BanditRecommender(BaseRecommender):
    def __init__(self, n_clusters=50, epsilon=0.1):
        super().__init__()
        self.n_clusters = n_clusters
        self.epsilon = epsilon
        self.item_ids = None
        self.clusters = None
        self.bandits = {}  # Храним бандитов для каждого user_id
        self.books_df = None

    class EpsilonGreedyBandit:
        def __init__(self, n_arms, epsilon):
            self.n_arms = n_arms
            self.epsilon = epsilon
            self.counts = np.zeros(n_arms)
            self.values = np.zeros(n_arms)
            
        def select_arm(self):
            if np.random.rand() < self.epsilon:
                return np.random.randint(0, self.n_arms)
            return np.argmax(self.values)
        
        def update(self, arm, reward):
            self.counts[arm] += 1
            n = self.counts[arm]
            value = self.values[arm]
            self.values[arm] = ((n - 1) / n) * value + (1 / n) * reward

    def fit(self, df: pl.DataFrame, items_df: pl.DataFrame, item_id_col: str = "item_id", **kwargs) -> None:
        """
        Кластеризуем книги по эмбеддингам и сохраняем данные для рекомендаций.
        df: DataFrame с user_id (и, возможно, item_id, rating).
        items_df: DataFrame с книгами (item_id, title_embeddings, description_embeddings).
        """
        self.books_df = items_df
        self.item_ids = items_df[item_id_col].to_numpy()

        # Объединяем title_embeddings и description_embeddings
        title_emb = np.stack(items_df['title_embeddings'].to_list())
        desc_emb = np.stack(items_df['description_embeddings'].to_list())
        embeddings = np.concatenate([title_emb, desc_emb], axis=1)  # Shape: (n_books, 2048)

        # Кластеризация
        _, labels = kmeans2(embeddings, k=self.n_clusters, minit='points')
        self.clusters = labels  # Массив кластеров для каждого item_id

        # Для каждого пользователя из df создаём бандита
        for user_id in df['user_id'].unique():
            self.bandits[user_id] = self.EpsilonGreedyBandit(n_arms=self.n_clusters, epsilon=self.epsilon)

        self.trained = True

    def _recommend_books(self, cluster: int, topn: int = 10) -> np.ndarray:
        """
        Возвращает topn книг из указанного кластера.
        Сортировка по длине description (как прокси популярности).
        """
        cluster_books = self.books_df.filter(pl.col('cluster') == cluster)
        cluster_books = cluster_books.with_columns(pl.col('description').str.len_bytes().alias('desc_len'))
        top_books = cluster_books.sort('desc_len', descending=True).head(topn)
        return top_books['item_id'].to_numpy()

    def predict(self, df: pl.DataFrame, topn: int = 10, **kwargs) -> list[np.ndarray]:
        """
        Возвращает список рекомендаций (np.ndarray с item_id) для каждого user_id в df.
        """
        assert self.trained, "Model must be trained before prediction"

        # Добавляем кластеры в books_df
        self.books_df = self.books_df.with_columns(pl.Series('cluster', self.clusters, dtype=pl.Int32))

        predictions = []
        for user_id in tqdm(df['user_id']):
            # Получаем или создаём бандита для пользователя
            if user_id not in self.bandits:
                self.bandits[user_id] = self.EpsilonGreedyBandit(n_arms=self.n_clusters, epsilon=self.epsilon)
            
            # Выбираем кластер
            cluster = self.bandits[user_id].select_arm()
            
            # Рекомендуем topn книг из кластера
            recommended_items = self._recommend_books(cluster, topn)
            
            # Если меньше topn, дополняем случайными item_id
            if len(recommended_items) < topn:
                remaining = topn - len(recommended_items)
                additional = np.random.choice(self.item_ids, size=remaining, replace=False)
                recommended_items = np.concatenate([recommended_items, additional])
            
            predictions.append(recommended_items[:topn])

            # Симуляция feedback (в проде заменить на реальный reward, например, через update метод)
            reward = np.random.choice([0, 1])  # Эмуляция клика
            self.bandits[user_id].update(cluster, reward)

        return predictions

In [None]:
# Инициализация
bandit_rec = BanditRecommender(n_clusters=50, epsilon=0.1)

# Обучение (books — твой DataFrame с книгами, train — с user_id)
bandit_rec.fit(train, books)

# Предсказания
test = test.with_columns(
    bandit_recs=pl.Series(bandit_rec.predict(test, topn=10))
)

# Вывод первых строк
print(test.head())

# Оценка (предполагается, что evaluate_recommender определена)
evaluate_recommender(df=test, model_preds_col="bandit_recs")