# Imports

In [1]:
!pip install --upgrade git+https://github.com/evfro/polara.git@develop#egg=polara

Collecting polara
  Cloning https://github.com/evfro/polara.git (to revision develop) to /tmp/pip-install-ctg0fay8/polara_244863647da44536a9f4dda9fa98c121
  Running command git clone --filter=blob:none --quiet https://github.com/evfro/polara.git /tmp/pip-install-ctg0fay8/polara_244863647da44536a9f4dda9fa98c121
  Running command git checkout -b develop --track origin/develop
  Switched to a new branch 'develop'
  Branch 'develop' set up to track remote branch 'develop' from 'origin'.
  Resolved https://github.com/evfro/polara.git to commit 8fdb520722f4c80c20709dbbda5b0896c4ca6f7a
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: polara
  Building wheel for polara (setup.py) ... [?25l[?25hdone
  Created wheel for polara: filename=polara-0.7.2.dev0-py3-none-any.whl size=90560 sha256=18ef96234c5aea928427700cb0191983d3db2088d6eb11bdedf388412b0b5172
  Stored in directory: /tmp/pip-ephem-wheel-cache-xd1vs_ka/wheels/0d/0d/28/cc13e491ae8db91f2ad2815e

In [2]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
import seaborn as sns

import torch
import torch.nn as nn
from polara import get_movielens_data

from tqdm import tqdm

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


# Asymmetric GNN

## Hyperboloid Operations

In [4]:
def minkowski_inner(x, y):
    return -x[..., 0] * y[..., 0] + (x[..., 1:] * y[..., 1:]).sum(dim=-1)

def project_to_hyperboloid(x):
    x0 = torch.sqrt(1.0 + (x[..., 1:] ** 2).sum(dim=-1, keepdim=True))
    return torch.cat([x0, x[..., 1:]], dim=-1)

In [5]:
def tangent_projection(x, v):
    return v + minkowski_inner(x, v).unsqueeze(-1) * x

def exp_map(x, v):
    v_norm = torch.sqrt(torch.clamp(minkowski_inner(v, v), min=1e-9)).unsqueeze(-1)
    return torch.cosh(v_norm) * x + torch.sinh(v_norm) * v / v_norm

def log_map(x, y):
    ip = -minkowski_inner(x, y)
    ip = torch.clamp(ip, min=1.0 + 1e-7)

    d = torch.arccosh(ip)

    direction = y - ip.unsqueeze(-1) * x

    sinh_d = torch.sinh(d).clamp(min=1e-7)
    return d.unsqueeze(-1) * direction / sinh_d.unsqueeze(-1)


## Einstein Midpoint

In [6]:
def hyperboloid_to_klein(x):
    return x[..., 1:] / x[..., :1]

def klein_to_hyperboloid(x):
    norm_sq = (x ** 2).sum(dim=-1, keepdim=True)
    norm_sq = torch.clamp(norm_sq, max=1.0 - 1e-7)
    x0 = 1.0 / torch.sqrt(1.0 - norm_sq)
    return torch.cat([x0, x0 * x], dim=-1)

In [7]:
def einstein_midpoint(x):
    x_k = hyperboloid_to_klein(x)
    norm_sq = (x_k ** 2).sum(dim=-1, keepdim=True)
    gamma = 1.0 / (torch.clamp(1.0 - norm_sq, min=1e-7))
    p = (gamma * x_k).sum(dim=0) / torch.clamp(gamma.sum(dim=0), min=1e-7)

    return klein_to_hyperboloid(p)

## Item embeddings initialization

In [42]:
def init_hyperboloid(n, dim, scale=1e-3):
    x = torch.randn(n, dim, device=device) * scale
    x = torch.cat([torch.zeros(n, 1, device=device), x], dim=1)
    return project_to_hyperboloid(x)

## Hyperbolic GNN

In [9]:
class HyperbolicItemGNNLayer(nn.Module):
    def __init__(self, alpha=0.5):
        super().__init__()
        self.alpha = alpha

    def forward(self, item_emb, item_neighbors):
        new_emb = []

        for i, neigh in enumerate(item_neighbors):
            if len(neigh) == 0:
                new_emb.append(item_emb[i])
                continue

            agg = einstein_midpoint(item_emb[neigh])
            v = item_emb[i]
            delta = log_map(v, agg)
            updated = exp_map(v, self.alpha * delta)

            new_emb.append(project_to_hyperboloid(updated))
        return torch.stack(new_emb)

In [10]:
class HyperbolicItemGNN(nn.Module):
    def __init__(self, n_items, dim, n_layers):
        super().__init__()
        self.item_emb = nn.Parameter(init_hyperboloid(n_items, dim))
        self.layers = nn.ModuleList(
            [HyperbolicItemGNNLayer() for _ in range(n_layers)]
        )

    def forward(self, item_neighbors):
        x = self.item_emb
        for layer in self.layers:
            x = layer(x, item_neighbors)
        return x

## User embeddings

In [11]:
def compute_user_embedding(user_history, item_emb):
    return einstein_midpoint(item_emb[user_history])

## Loss-function

In [12]:
class WMRBLoss(nn.Module):
    def __init__(self, margin=1.0):
        super().__init__()
        self.margin = margin

    def forward(self, user_emb, pos_item, neg_items):
        d_pos = minkowski_inner(user_emb, pos_item)
        d_neg = minkowski_inner(
            user_emb.unsqueeze(1),
            neg_items
        )

        r = torch.relu(self.margin - d_pos.unsqueeze(1) + d_neg).sum(dim=1)
        return torch.log1p(r).mean()

## Optimizer

In [13]:
class RSGD(torch.optim.Optimizer):
    def __init__(self, params, lr=0.05):
        super().__init__(params, dict(lr=lr))

    @torch.no_grad()
    def step(self):
        for group in self.param_groups:
            lr = group["lr"]
            for p in group["params"]:
                if p.grad is None:
                    continue

                g = p.grad
                g[..., 0] *= -1  # inverse Minkowski metric

                g = tangent_projection(p, g)
                p_new = exp_map(p, -lr * g)
                p.copy_(project_to_hyperboloid(p_new))

## Recommender system

In [14]:
class HyperbolicItemGNNRecommender(nn.Module):
    def __init__(self, n_items, dim, n_layers):
        super().__init__()
        self.gnn = HyperbolicItemGNN(n_items, dim, n_layers)
        self.loss_fn = WMRBLoss()

    def forward(self, user_histories, pos_items, neg_items, item_neighbors):
        item_emb = self.gnn(item_neighbors)

        user_embs = []
        for hist in user_histories:
            if len(hist) == 0:
                user_embs.append(torch.zeros_like(item_emb[0]))
            else:
                hist_tensor = torch.tensor(hist, dtype=torch.long, device=item_emb.device)
                hist_emb = item_emb[hist_tensor]
                user_emb = einstein_midpoint(hist_emb)
                user_embs.append(user_emb)

        user_embs = torch.stack(user_embs)

        pos = item_emb[pos_items]
        neg = item_emb[neg_items]

        return self.loss_fn(user_embs, pos, neg)

# Data Preprocessing

Будем обучаться на `MovieLens 1M`

In [15]:
data = get_movielens_data(include_time=True)

In [16]:
data = data.sort_values("timestamp").reset_index(drop=True)

In [17]:
split_date = data["timestamp"].quantile(0.95)

data_split = dict()

#train data
data_split["train"] = data[data["timestamp"] <= split_date]

#test
data_split["test"] = (
    data[data["timestamp"] > split_date]
    .groupby("userid")
    .apply(lambda x: x.iloc[:-1])
    .reset_index(drop=True)
)

#holdout
data_split["holdout"] = (
    data[data["timestamp"] > split_date]
    .groupby("userid")
    .apply(lambda x: x.iloc[-1])
    .reset_index(drop=True)
)

  .apply(lambda x: x.iloc[:-1])
  .apply(lambda x: x.iloc[-1])


In [18]:
data_types = ["train", "test", "holdout"]

rating_matrix = dict()
user_cnt = len(data["userid"].unique())
item_cnt = len(data["movieid"].unique())

user_cats = data["userid"].astype("category")
item_cats = data["movieid"].astype("category")

user_match_dict = {user_cat: ind for ind, user_cat in enumerate(user_cats.cat.categories.tolist())}
item_match_dict = {item_cat: ind for ind, item_cat in enumerate(item_cats.cat.categories.tolist())}

for sample_type in data_types:
    user_indices = [user_match_dict[user] for user in data_split[sample_type]["userid"].values]
    item_indices = [item_match_dict[item] for item in data_split[sample_type]["movieid"].values]

    rating_matrix[sample_type] = csr_matrix(
        (
            data_split[sample_type]["rating"].values,
            (
                user_indices,
                item_indices
            )
        ),
        shape=(user_cnt, item_cnt),
        dtype="f8"
    )

In [19]:
rating_matrix["train"] = (rating_matrix["train"] > 0).astype(float)
rating_matrix["test"] = (rating_matrix["test"] > 0).astype(float)
rating_matrix["holdout"] = (rating_matrix["holdout"] > 0).astype(float)

In [49]:
item_neighbors_sparse = (rating_matrix["train"].T @ rating_matrix["train"] > 100)

item_neighbors = []

for i in range(item_neighbors_sparse.shape[0]):
    row = item_neighbors_sparse.getrow(i)
    item_neighbors.append(row.indices)

In [21]:
users_items = []
for i in range(rating_matrix["train"].shape[0]):
    row = rating_matrix["train"].getrow(i)
    users_items.append(row.indices)

# Training

In [50]:
class BatchGenerator:
    """Sequential batch generator для next-item prediction"""
    def __init__(self, users_items, n_items, batch_size=256, n_neg=10, min_history=1):
        self.users_items = users_items
        self.n_items = n_items
        self.batch_size = batch_size
        self.n_neg = n_neg
        self.min_history = min_history

        self.valid_users = [i for i, items in enumerate(users_items)
                           if len(items) > min_history]

        print(f"Valid users: {len(self.valid_users)} / {len(users_items)}")

    def __iter__(self):
        np.random.shuffle(self.valid_users)

        for i in range(0, len(self.valid_users), self.batch_size):
            batch_users = self.valid_users[i:i+self.batch_size]

            user_histories = []
            pos_items = []
            neg_items_list = []

            for user_id in batch_users:
                items = self.users_items[user_id]

                split_idx = np.random.randint(self.min_history, len(items))
                history = items[:split_idx].tolist()
                target = items[split_idx]

                neg_candidates = np.setdiff1d(np.arange(self.n_items), items)
                if len(neg_candidates) >= self.n_neg:
                    neg = np.random.choice(neg_candidates, size=self.n_neg, replace=False)
                else:
                    neg = np.random.choice(neg_candidates, size=self.n_neg, replace=True)

                user_histories.append(history)
                pos_items.append(target)
                neg_items_list.append(neg.tolist())

            yield (
                user_histories,
                torch.tensor(pos_items, dtype=torch.long, device=device),
                torch.tensor(neg_items_list, dtype=torch.long, device=device)
            )

In [51]:
model = HyperbolicItemGNNRecommender(
    n_items=item_cnt,
    dim=32,
    n_layers=2
)

model = model.to(device)

optimizer = RSGD(model.parameters(), lr=1e-2)

batch_gen = BatchGenerator(
    users_items=users_items,
    n_items=item_cnt,
    batch_size=1024,
    n_neg=10
)

for epoch in range(10):
    total_loss = 0
    n_batches = 0

    for user_histories, pos_items, neg_items in tqdm(batch_gen):
        optimizer.zero_grad()
        loss = model(user_histories, pos_items, neg_items, item_neighbors)

        if torch.isnan(loss):
            print("⚠️ NaN detected! Skipping...")
            continue

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        total_loss += loss.item()
        n_batches += 1

    print(f"Epoch {epoch+1}: Loss = {total_loss / n_batches:.4f}")

Valid users: 6036 / 6040


6it [01:43, 17.17s/it]


Epoch 1: Loss = 2.3979


6it [01:42, 17.04s/it]


Epoch 2: Loss = 2.3979


6it [01:42, 17.07s/it]


Epoch 3: Loss = 2.3979


6it [01:42, 17.11s/it]


Epoch 4: Loss = 2.3979


6it [01:42, 17.11s/it]


Epoch 5: Loss = 2.3979


6it [01:42, 17.10s/it]


Epoch 6: Loss = 2.3979


6it [01:42, 17.14s/it]


Epoch 7: Loss = 2.3979


6it [01:42, 17.07s/it]


Epoch 8: Loss = 2.3979


6it [01:42, 17.10s/it]


Epoch 9: Loss = 2.3979


6it [01:42, 17.11s/it]

Epoch 10: Loss = 2.3979





# Evaluation

In [52]:
rating_matrix["test"] += rating_matrix["train"]

users_items_test_history = []
for i in range(rating_matrix["test"].shape[0]):
    row = rating_matrix["test"].getrow(i)
    users_items_test_history.append(row.indices)

users_items_holdout_targets = []
for i in range(rating_matrix["holdout"].shape[0]):
    row = rating_matrix["holdout"].getrow(i)
    if row.nnz > 0:
        users_items_holdout_targets.append(row.indices[0])
    else:
        users_items_holdout_targets.append(None)

valid_users_eval = []
for i in range(user_cnt):
    if len(users_items_test_history[i]) > 0 and users_items_holdout_targets[i] is not None:
        valid_users_eval.append(i)

item_embeddings = model.gnn(item_neighbors)

def get_recommendations(user_embedding, item_embeddings, user_history, k):
    # Calculate similarity between user embedding and all item embeddings
    # Unsqueeze user_embedding to make it (1, dim) for broadcasting with item_embeddings (n_items, dim)
    scores = minkowski_inner(user_embedding.unsqueeze(0), item_embeddings)

    # Create a mask for interacted items
    interacted_mask = torch.zeros(item_embeddings.shape[0], dtype=torch.bool, device=device)
    interacted_mask[user_history] = True

    # Set scores of interacted items to a very small negative number to exclude them
    scores[interacted_mask] = -1e9

    # Get the top-K recommended item IDs
    # `topk` returns both values and indices; we only need indices
    _, top_k_indices = torch.topk(scores, k)

    return top_k_indices

def calculate_hit_rate(recommended_items, ground_truth_item):
    """
    Calculates if the ground-truth item is present in the recommended items.

    Args:
        recommended_items (list or array-like): A list or array of recommended item IDs.
        ground_truth_item (int): The single ground-truth item ID.

    Returns:
        int: 1 if the ground-truth item is in the recommendations, 0 otherwise.
    """
    if ground_truth_item is None:
        return 0
    return 1 if ground_truth_item in recommended_items else 0

import math

def calculate_ndcg(recommended_items, ground_truth_item):
    """
    Calculates the Normalized Discounted Cumulative Gain (NDCG) for a single user.

    Args:
        recommended_items (list or array-like): A list or array of recommended item IDs.
        ground_truth_item (int): The single ground-truth item ID.

    Returns:
        float: The NDCG score.
    """
    if ground_truth_item is None:
        return 0.0

    # Calculate IDCG (Ideal Discounted Cumulative Gain)
    # For a single relevant item, if present, ideal gain is 1 at position 0, so IDCG = 1/log2(0+2) = 1/1 = 1
    idcg = 1.0

    # Calculate DCG (Discounted Cumulative Gain)
    dcg = 0.0
    if ground_truth_item in recommended_items:
        rank = recommended_items.index(ground_truth_item) # 0-indexed rank
        dcg = 1.0 / math.log2(rank + 1 + 1) # +1 for 1-based log, +1 for 0-indexed rank

    # Calculate NDCG
    return dcg / idcg

def calculate_coverage(all_recommended_items, total_n_items):
    """
    Calculates the coverage metric.

    Args:
        all_recommended_items (list or set): A collection of all unique recommended item IDs.
        total_n_items (int): The total number of available items in the catalog.

    Returns:
        float: The coverage score.
    """
    unique_recommended_items = set(all_recommended_items)
    num_unique_recommended = len(unique_recommended_items)

    if total_n_items == 0:
        return 0.0

    coverage = num_unique_recommended / total_n_items
    return coverage

def evaluate_model(item_embeddings, users_items_test_history, users_items_holdout_targets, valid_users_eval, k):
    hit_rates = []
    ndcg_scores = []
    all_recommended_items_set = set()

    for user_id in tqdm(valid_users_eval, desc=f"Evaluating @{k}"):
        user_history = users_items_test_history[user_id]
        holdout_item = users_items_holdout_targets[user_id]

        if holdout_item is None:
            # If there's no holdout item, we can't evaluate for this user, skip
            continue

        user_history_tensor = torch.tensor(user_history, dtype=torch.long, device=device)
        user_embedding = compute_user_embedding(user_history_tensor, item_embeddings)

        # Get recommendations (tensor of item IDs)
        recommendations_tensor = get_recommendations(user_embedding, item_embeddings, user_history_tensor, k)
        # Convert to a Python list for calculate_hit_rate and calculate_ndcg
        recommendations_list = recommendations_tensor.cpu().numpy().flatten().tolist()

        # Calculate Hit Rate
        hr_score = calculate_hit_rate(recommendations_list, holdout_item)
        hit_rates.append(hr_score)

        # Calculate NDCG
        ndcg_score = calculate_ndcg(recommendations_list, holdout_item)
        ndcg_scores.append(ndcg_score)

        # Collect all recommended items for coverage
        all_recommended_items_set.update(recommendations_list)

    # Calculate average metrics
    avg_hit_rate = sum(hit_rates) / len(hit_rates) if hit_rates else 0.0
    avg_ndcg = sum(ndcg_scores) / len(ndcg_scores) if ndcg_scores else 0.0

    # Calculate Coverage
    total_n_items = item_cnt # item_cnt is available in the kernel state
    coverage = calculate_coverage(all_recommended_items_set, total_n_items)

    return avg_hit_rate, avg_ndcg, coverage


In [53]:
avg_hit_rate, avg_ndcg, coverage = evaluate_model(item_embeddings, users_items_test_history, users_items_holdout_targets, valid_users_eval, 20)

Evaluating @20: 100%|██████████| 813/813 [00:01<00:00, 786.18it/s]


In [59]:
print(f"HyperbolicItemGNNRecommender ndcg={avg_ndcg:.6g}  coverage={coverage:.6g}  hitrate={avg_hit_rate:.6g}")

HyperbolicItemGNNRecommender ndcg=0.00589946  coverage=0.0404749  hitrate=0.0123001
