# MARec + CA-Rec / UA-Rec / GE-Rec
## Cold-Start Recommendation: Reproduction & Enhancements

**Paper:** Monteil et al., RecSys 2024 | **Dataset:** MovieLens HetRec 2011 | **Runtime:** ~20 min on T4

---
| # | Section | | # | Section |
|---|---------|---|---|---------|
| 1 | Configuration | | 8 | UA-Rec (Gaussian NLL) |
| 2 | Environment | | 9 | GE-Rec (CVAE proxy) |
| 3 | Data | | 10 | Pipeline + Ablation |
| 4 | Features | | 11 | Paper Comparison |
| 5 | MARec Baseline | | 12 | Diagnostics (12 plots) |
| 6 | Metrics | | 13 | Export |
| 7 | CA-Rec (InfoNCE) | | | |

---
## 1. Configuration (optimized for Colab T4 < 30 min)

In [None]:
import time as _time
_NOTEBOOK_START = _time.time()

CONFIG = {
    # -- Reproducibility --
    'seed': 42,
    'n_splits': 3,          # 3 splits (paper uses 10, we trade for speed)
    'n_seeds': 2,            # 2 seeds for mean+/-std

    # -- Data --
    'dataset': 'hetrec',
    'cold_train_frac': 0.60,
    'cold_val_frac': 0.20,

    # -- MARec backbone (Paper Eq. 3-9) --
    'lambda1': 1.0,
    'lambda0': 0.0,
    'alpha': 1.0,
    'beta': 100.0,
    'delta': 20.0,
    'second_order': True,
    'fuse_weight': 0.5,

    # -- Sentence Transformer --
    'use_st': True,
    'st_model': 'all-MiniLM-L6-v2',

    # -- Enhancement toggles --
    'use_ca_rec': True,
    'use_ua_rec': True,
    'use_ge_rec': True,

    # -- CA-Rec (Contrastive Alignment) --
    'ca_temperature': 0.07,
    'ca_epochs': 15,         # reduced from 25
    'ca_lr': 1e-3,           # higher LR for faster convergence
    'ca_hidden_dim': 128,    # reduced from 256

    # -- UA-Rec (Uncertainty-Aware) --
    'ua_epochs': 15,         # reduced from 25
    'ua_lr': 1e-3,
    'ua_hidden_dim': 128,
    'ua_min_logvar': -10.0,
    'ua_max_logvar': 2.0,

    # -- GE-Rec (CVAE) --
    'ge_latent_dim': 32,     # reduced from 64
    'ge_hidden_dim': 128,    # reduced from 256
    'ge_epochs': 20,         # reduced from 30
    'ge_lr': 2e-3,           # higher LR
    'ge_kl_warmup': 8,
    'ge_kl_weight': 0.01,

    # -- Evaluation --
    'ks': [10, 25, 50],

    # -- Output --
    'output_dir': '/content/marec_results',
}

# Runtime estimate
_n_runs = CONFIG['n_splits'] * CONFIG['n_seeds'] * 4  # 4 ablation modes
print(f'CONFIG loaded. Estimated pipeline runs: {_n_runs}')
print(f'  Splits={CONFIG["n_splits"]}, Seeds={CONFIG["n_seeds"]}')
print(f'  CA epochs={CONFIG["ca_epochs"]}, UA={CONFIG["ua_epochs"]}, GE={CONFIG["ge_epochs"]}')
print(f'  Hidden dim={CONFIG["ca_hidden_dim"]}, GE latent={CONFIG["ge_latent_dim"]}')
print(f'  Enhancements: CA={CONFIG["use_ca_rec"]}, UA={CONFIG["use_ua_rec"]}, GE={CONFIG["use_ge_rec"]}')

---
## 2. Environment & Reproducibility

In [None]:
import subprocess, sys, os, time, warnings, gc, random, math
from collections import defaultdict
from itertools import product as iprod
from pathlib import Path

def install(pkg):
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', pkg])

for pkg in ['scipy', 'scikit-learn', 'pandas', 'matplotlib', 'seaborn', 'tqdm']:
    install(pkg)
try:
    import sentence_transformers
except ImportError:
    install('sentence-transformers')

import numpy as np
import pandas as pd
import scipy.sparse as sp
from scipy.sparse import csr_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MultiLabelBinarizer, normalize
from sklearn.decomposition import PCA
from sklearn.neighbors import NearestNeighbors
import torch
import torch.nn as nn
import torch.nn.functional as F_t
import matplotlib
matplotlib.rcParams['figure.max_open_warning'] = 0
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
from tqdm.auto import tqdm
warnings.filterwarnings('ignore')

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

set_seed(CONFIG['seed'])
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print('=' * 55)
print('  ENVIRONMENT')
print('=' * 55)
print(f'  Python : {sys.version.split()[0]}')
print(f'  PyTorch: {torch.__version__}')
print(f'  NumPy  : {np.__version__}')
print(f'  Device : {DEVICE}')
if DEVICE.type == 'cuda':
    print(f'  GPU    : {torch.cuda.get_device_name(0)}')
    try:
        _, mem = torch.cuda.mem_get_info(0)
    except Exception:
        mem = getattr(torch.cuda.get_device_properties(0), 'total_memory', 0)
    if mem:
        print(f'  VRAM   : {mem / 1e9:.1f} GB')
print('=' * 55)
print(f'  Elapsed: {time.time() - _NOTEBOOK_START:.0f}s')

---
## 3. Data: MovieLens HetRec 2011

2,107 users x ~6,234 items x 6 metadata types. Cold split: 60/20/20.

In [None]:
import urllib.request, zipfile

DATA_DIR = '/content/data/hetrec'
os.makedirs(DATA_DIR, exist_ok=True)
URL = 'https://files.grouplens.org/datasets/hetrec2011/hetrec2011-movielens-2k-v2.zip'

def find_file(base, name):
    for root, _, files in os.walk(base):
        if name in files:
            return os.path.join(root, name)
    return None

if find_file(DATA_DIR, 'user_ratedmovies.dat') is None:
    zp = os.path.join(DATA_DIR, 'hetrec.zip')
    print('Downloading MovieLens HetRec 2011...')
    urllib.request.urlretrieve(URL, zp)
    with zipfile.ZipFile(zp, 'r') as z:
        z.extractall(DATA_DIR)
    os.remove(zp)
    print('Done.')
else:
    print('Data already present.')
print(f'Elapsed: {time.time() - _NOTEBOOK_START:.0f}s')

In [None]:
# Build interaction matrix + metadata
rf = find_file(DATA_DIR, 'user_ratedmovies.dat')
if rf is None:
    rf = find_file(DATA_DIR, 'user_ratedmovies-timestamps.dat')
raw = pd.read_csv(rf, sep='\t', encoding='latin-1')
ratings = raw[['userID', 'movieID']].drop_duplicates()

all_users = sorted(ratings['userID'].unique())
all_items = sorted(ratings['movieID'].unique())
user2idx = {u: i for i, u in enumerate(all_users)}
item2idx = {it: i for i, it in enumerate(all_items)}
idx2item = {i: it for it, i in item2idx.items()}
n_users, n_items = len(all_users), len(all_items)

row = ratings['userID'].map(user2idx).values
col = ratings['movieID'].map(item2idx).values
X = csr_matrix((np.ones(len(ratings)), (row, col)), shape=(n_users, n_items))
density = X.nnz / (n_users * n_items) * 100

metadata = {}
for fname, key, col_name in [
    ('movie_genres.dat', 'genres', 'genre'),
    ('movie_countries.dat', 'countries', 'country'),
]:
    f = find_file(DATA_DIR, fname)
    if f:
        df = pd.read_csv(f, sep='\t', encoding='latin-1')
        metadata[key] = df.groupby('movieID')[col_name].apply(list).to_dict()

f = find_file(DATA_DIR, 'movie_directors.dat')
if f:
    df = pd.read_csv(f, sep='\t', encoding='latin-1')
    c = 'directorName' if 'directorName' in df.columns else 'directorID'
    metadata['directors'] = df.groupby('movieID')[c].apply(lambda x: [str(v) for v in x]).to_dict()

f = find_file(DATA_DIR, 'movie_actors.dat')
if f:
    df = pd.read_csv(f, sep='\t', encoding='latin-1')
    c = 'actorName' if 'actorName' in df.columns else 'actorID'
    if 'ranking' in df.columns:
        df = df[df['ranking'] <= 10]
    metadata['actors'] = df.groupby('movieID')[c].apply(lambda x: [str(v) for v in x]).to_dict()

f = find_file(DATA_DIR, 'movie_locations.dat')
if f:
    try:
        df = pd.read_csv(f, sep='\t', encoding='latin-1')
        lc = [c for c in df.columns if 'location' in c.lower()][0]
        metadata['locations'] = df.groupby('movieID')[lc].apply(lambda x: [str(v) for v in x]).to_dict()
    except Exception:
        pass

f = find_file(DATA_DIR, 'movies.dat')
if f:
    try:
        df = pd.read_csv(f, sep='\t', encoding='latin-1')
        idc = [c for c in df.columns if 'id' in c.lower()][0]
        if 'year' in df.columns:
            metadata['years'] = df.set_index(idc)['year'].to_dict()
        if 'title' in df.columns:
            metadata['titles'] = df.set_index(idc)['title'].to_dict()
    except Exception:
        pass

print(f'Users: {n_users} | Items: {n_items} | Interactions: {X.nnz} | Density: {density:.2f}%')
print(f'Metadata: {[k for k in metadata if k != "titles"]}')

In [None]:
# Cold-start splits
def create_cold_splits(X, n_splits, seed, train_frac=0.6, val_frac=0.2):
    rng = np.random.RandomState(seed)
    ni = X.shape[1]
    splits = []
    for _ in range(n_splits):
        perm = rng.permutation(ni)
        nt = int(ni * train_frac)
        nv = int(ni * val_frac)
        tr = sorted(perm[:nt].tolist())
        va = sorted(perm[nt:nt + nv].tolist())
        te = sorted(perm[nt + nv:].tolist())
        splits.append({
            'X_train': X[:, tr], 'X_val': X[:, va], 'X_test': X[:, te],
            'train_items': tr, 'val_items': va, 'test_items': te,
            'test_users': np.array(X[:, te].sum(axis=1)).flatten() > 0,
        })
    return splits

splits = create_cold_splits(X, CONFIG['n_splits'], CONFIG['seed'],
                            CONFIG['cold_train_frac'], CONFIG['cold_val_frac'])
s0 = splits[0]
assert len(set(s0['train_items']) & set(s0['test_items'])) == 0, 'LEAKAGE!'
print(f'Splits: {CONFIG["n_splits"]} | Train: {len(s0["train_items"])} | '
      f'Val: {len(s0["val_items"])} | Test: {len(s0["test_items"])}')
print(f'Test users with clicks: {s0["test_users"].sum()} / {n_users}')
print(f'[OK] No item leakage. Elapsed: {time.time() - _NOTEBOOK_START:.0f}s')

---
## 4. Feature Engineering (BoW + TF-IDF + Sentence Transformer)

In [None]:
def build_feature_matrices(metadata, item2idx, n_items):
    fmats = {}
    for key in ['genres', 'directors', 'actors', 'countries', 'locations']:
        if key not in metadata:
            continue
        mlb = MultiLabelBinarizer(sparse_output=True)
        labels = [[] for _ in range(n_items)]
        for iid, vals in metadata[key].items():
            if iid in item2idx:
                labels[item2idx[iid]] = [str(v) for v in vals]
        F = mlb.fit_transform(labels)
        fmats[key] = csr_matrix(F)
        print(f'  {key:<12s}: {F.shape[1]:>5} dims')
    if 'titles' in metadata:
        texts = [''] * n_items
        for iid, t in metadata['titles'].items():
            if iid in item2idx:
                texts[item2idx[iid]] = str(t)
        tfidf = TfidfVectorizer(max_features=1000, stop_words='english')
        F = tfidf.fit_transform(texts)
        fmats['titles_tfidf'] = csr_matrix(F)
        print(f'  {"titles_tfidf":<12s}: {F.shape[1]:>5} dims')
    if 'years' in metadata:
        ya = np.zeros(n_items)
        mask = np.zeros(n_items, dtype=bool)
        for iid, y in metadata['years'].items():
            if iid in item2idx:
                ya[item2idx[iid]] = y
                mask[item2idx[iid]] = True
        if mask.any():
            mu, sd = ya[mask].mean(), max(ya[mask].std(), 1)
            ya[mask] = (ya[mask] - mu) / sd
            fmats['years'] = csr_matrix(ya.reshape(-1, 1))
            print(f'  {"years":<12s}:     1 dims')
    return fmats

print('Building BoW features...')
feature_matrices = build_feature_matrices(metadata, item2idx, n_items)

In [None]:
# Sentence Transformer embeddings
from sentence_transformers import SentenceTransformer

if CONFIG['use_st']:
    print(f'Loading ST: {CONFIG["st_model"]}')
    _st = SentenceTransformer(CONFIG['st_model'])
    item_texts = []
    for idx in range(n_items):
        iid = idx2item.get(idx, idx)
        parts = []
        if 'titles' in metadata and iid in metadata.get('titles', {}):
            parts.append(str(metadata['titles'][iid]))
        for k in ['genres', 'directors', 'actors']:
            if k in metadata and iid in metadata.get(k, {}):
                parts.append(', '.join(str(v) for v in metadata[k][iid][:5]))
        item_texts.append('. '.join(parts) if parts else 'Unknown')
    st_emb = _st.encode(item_texts, show_progress_bar=True, batch_size=256)
    feature_matrices['st_emb'] = csr_matrix(st_emb)
    print(f'ST embeddings: {st_emb.shape}')
    del _st
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
else:
    print('ST disabled.')

fmats = feature_matrices
total_dim = sum(v.shape[1] for v in fmats.values())
print(f'Total: {len(fmats)} feature types, {total_dim} dims')
print(f'Elapsed: {time.time() - _NOTEBOOK_START:.0f}s')

---
## 5. MARec Baseline (EASE + Alignment, Paper Eq. 3-9)

In [None]:
class EASE:
    def __init__(self, lambda1=1.0, lambda0=0.0):
        self.lambda1 = lambda1
        self.lambda0 = lambda0
        self.B = None

    def fit(self, X, F=None, alignment=None):
        n = X.shape[1]
        G = (X.T @ X).toarray().astype(np.float64)
        Gf = G.copy()
        if F is not None and self.lambda0 > 0:
            FtF = (F.T @ F)
            FtF = FtF.toarray() if sp.issparse(FtF) else FtF
            Gf += self.lambda0 * FtF.astype(np.float64)
        if alignment is None:
            XtA = np.zeros_like(G)
        else:
            Xd = X.T.toarray().astype(np.float64) if sp.issparse(X) else X.T
            XtA = Xd @ alignment
        P = np.linalg.inv(Gf + self.lambda1 * np.eye(n) + XtA)
        Theta = P @ (Gf + XtA)
        dP = np.diag(P).copy()
        dP[np.abs(dP) < 1e-10] = 1e-10
        self.B = Theta - P * (np.diag(Theta) / dP)[None, :]
        return self

    def predict(self, X):
        Xd = X.toarray() if sp.issparse(X) else X
        return Xd @ self.B


class MARecAligner:
    def __init__(self, alpha=1.0, beta=100.0, delta=20.0, second_order=True):
        self.alpha = alpha
        self.beta = beta
        self.delta = delta
        self.second_order = second_order
        self.mu = None
        self.mu_cross = None
        self.names = []

    def _d(self, M):
        return M.toarray() if sp.issparse(M) else np.asarray(M)

    def compute_G(self, fmats):
        G_list = []
        self.names = list(fmats.keys())
        for name in self.names:
            Fd = self._d(fmats[name])
            norms = np.maximum(np.linalg.norm(Fd, axis=1, keepdims=True), 1e-10)
            Fn = Fd / norms
            Gk = Fn @ Fn.T
            sc = (np.abs(Fd) > 0).sum(1).astype(float)
            mx = max(sc.max(), 1)
            sf = sc / mx
            mask = np.outer(sf, sf)
            Gk = Gk * mask / (mask + self.delta / (mx + 1))
            G_list.append(Gk)
        return G_list

    def compute_DR(self, Xtr):
        clicks = np.array(Xtr.sum(0)).flatten()
        p = max(np.percentile(clicks[clicks > 0], 10) if (clicks > 0).any() else 1, 1)
        d = np.where(clicks <= p, (self.beta / p) * np.maximum(p - clicks, 0), 0.0)
        return np.diag(d)

    def fit_weights(self, Xtr, G_list):
        N = len(G_list)
        XtX = (Xtr.T @ Xtr).toarray().flatten()
        rng = np.random.RandomState(0)
        idx = rng.choice(len(XtX), min(15000, len(XtX)), replace=False)
        xs = XtX[idx]
        self.mu = np.ones(N)
        best = -1e9
        grid = [0.0, 0.5, 1.0, 2.0, 5.0] if N <= 4 else [0.0, 1.0, 3.0]
        for combo in iprod(grid, repeat=N):
            mu = np.array(combo)
            if mu.sum() == 0:
                continue
            Gc = sum(mu[k] * G_list[k] for k in range(N))
            c = np.corrcoef(Gc.flatten()[idx], xs)[0, 1]
            if not np.isnan(c) and c > best:
                best = c
                self.mu = mu.copy()
        self.mu_cross = np.zeros((N, N))
        if self.second_order and N >= 2:
            for i in range(N):
                for j in range(i + 1, N):
                    for w in [0.0, 0.5, 1.0, 2.0]:
                        Gc = sum(self.mu[k] * G_list[k] for k in range(N))
                        Gc = Gc + w * (G_list[i] @ G_list[j])
                        c = np.corrcoef(Gc.flatten()[idx], xs)[0, 1]
                        if not np.isnan(c) and c > best:
                            best = c
                            self.mu_cross[i, j] = w

    def combine_G(self, G_list):
        N = len(G_list)
        G = sum(self.mu[k] * G_list[k] for k in range(N))
        if self.second_order:
            for i in range(N):
                for j in range(i + 1, N):
                    if self.mu_cross[i, j] > 0:
                        G = G + self.mu_cross[i, j] * (G_list[i] @ G_list[j])
        return G

    def cross_sim(self, fmats, cold_items, warm_items):
        cross = []
        for name in self.names:
            Fc = self._d(fmats[name][cold_items])
            Fw = self._d(fmats[name][warm_items])
            cross.append(normalize(Fc) @ normalize(Fw).T)
        N = len(cross)
        G = sum(self.mu[k] * cross[k] for k in range(N))
        if self.second_order:
            for i in range(N):
                for j in range(i + 1, N):
                    if self.mu_cross[i, j] > 0:
                        G = G + self.mu_cross[i, j] * cross[i] * cross[j]
        return G

print('EASE + MARecAligner defined.')

---
## 6. Evaluation Metrics (Paper Eq. 12-13)

In [None]:
def evaluate(scores, X_test, ks=(10, 25, 50), user_mask=None):
    Xt = X_test.toarray() if sp.issparse(X_test) else X_test
    res = {}
    for k in ks:
        res[f'hr@{k}'] = 0.0
        res[f'ndcg@{k}'] = 0.0
    n_eval = 0
    for u in range(Xt.shape[0]):
        if user_mask is not None and not user_mask[u]:
            continue
        true = np.where(Xt[u] > 0)[0]
        if len(true) == 0:
            continue
        ranked = np.argsort(scores[u])[::-1]
        ts = set(true)
        for k in ks:
            topk = ranked[:k]
            hits = sum(1 for i in topk if i in ts)
            res[f'hr@{k}'] += hits / min(k, len(true))
            dcg = sum(1.0 / np.log2(r + 2) for r, i in enumerate(topk) if i in ts)
            idcg = sum(1.0 / np.log2(i + 2) for i in range(min(k, len(true))))
            res[f'ndcg@{k}'] += (dcg / idcg) if idcg > 0 else 0.0
        n_eval += 1
    if n_eval > 0:
        for k in res:
            res[k] /= n_eval
    res['n_eval'] = n_eval
    return res

def evaluate_coverage(scores, k=50):
    recs = set()
    for u in range(scores.shape[0]):
        recs.update(np.argsort(scores[u])[-k:].tolist())
    return len(recs) / scores.shape[1]

print('Metrics: HR@K, NDCG@K, Coverage@K')

---
## 7. CA-Rec: Contrastive Alignment (InfoNCE)

Replace MSE alignment with InfoNCE contrastive loss.
Metadata embedding should be close to its matching interaction embedding and far from others.

In [None]:
class CARec(nn.Module):
    # Contrastive Alignment: metadata_proj & interact_proj -> InfoNCE
    def __init__(self, meta_dim, interact_dim, hidden_dim=128):
        super().__init__()
        self.meta_proj = nn.Sequential(
            nn.Linear(meta_dim, hidden_dim),
            nn.LayerNorm(hidden_dim), nn.GELU(),
            nn.Linear(hidden_dim, hidden_dim),
        )
        self.interact_proj = nn.Sequential(
            nn.Linear(interact_dim, hidden_dim),
            nn.LayerNorm(hidden_dim), nn.GELU(),
            nn.Linear(hidden_dim, hidden_dim),
        )

    def forward(self, meta_feats, interact_feats, temperature=0.07):
        m = F_t.normalize(self.meta_proj(meta_feats), dim=-1)
        v = F_t.normalize(self.interact_proj(interact_feats), dim=-1)
        logits = m @ v.T / temperature
        labels = torch.arange(m.shape[0], device=m.device)
        loss = F_t.cross_entropy(logits, labels)
        return loss, m

    @torch.no_grad()
    def project_meta(self, meta_feats):
        return F_t.normalize(self.meta_proj(meta_feats), dim=-1)

print('CA-Rec defined.')

---
## 8. UA-Rec: Uncertainty-Aware Alignment (Gaussian NLL)

Alignment head outputs (mu, sigma^2). Noisy metadata -> high sigma^2 -> weak pull.

In [None]:
class UARec(nn.Module):
    # Uncertainty-Aware: shared -> (mu, logvar) -> Gaussian NLL
    def __init__(self, meta_dim, target_dim, hidden_dim=128):
        super().__init__()
        self.shared = nn.Sequential(
            nn.Linear(meta_dim, hidden_dim),
            nn.LayerNorm(hidden_dim), nn.GELU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.LayerNorm(hidden_dim), nn.GELU(),
        )
        self.fc_mu = nn.Linear(hidden_dim, target_dim)
        self.fc_logvar = nn.Linear(hidden_dim, target_dim)

    def forward(self, meta_feats, targets, min_lv=-10.0, max_lv=2.0):
        h = self.shared(meta_feats)
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h).clamp(min_lv, max_lv)
        var = logvar.exp()
        nll = (1.0 / var) * (targets - mu).pow(2) + logvar
        return nll.mean(), mu, var

    @torch.no_grad()
    def predict(self, meta_feats, min_lv=-10.0, max_lv=2.0):
        h = self.shared(meta_feats)
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h).clamp(min_lv, max_lv)
        return mu, logvar.exp()

print('UA-Rec defined.')

---
## 9. GE-Rec: Generative Embeddings (Conditional VAE)

Train CVAE: encode (v_i, m_i) -> z, decode (z, m_i) -> v_hat.
For cold items: z ~ N(0,I), decode(z, m_cold) -> proxy interaction embedding.

In [None]:
class GERec(nn.Module):
    # CVAE: encoder (v,m)->z, decoder (z,m)->v_hat
    def __init__(self, interact_dim, meta_dim, latent_dim=32, hidden_dim=128):
        super().__init__()
        self.latent_dim = latent_dim
        enc_in = interact_dim + meta_dim
        self.encoder = nn.Sequential(
            nn.Linear(enc_in, hidden_dim),
            nn.LayerNorm(hidden_dim), nn.GELU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.LayerNorm(hidden_dim), nn.GELU(),
        )
        self.fc_mu_z = nn.Linear(hidden_dim, latent_dim)
        self.fc_logvar_z = nn.Linear(hidden_dim, latent_dim)

        dec_in = latent_dim + meta_dim
        self.decoder = nn.Sequential(
            nn.Linear(dec_in, hidden_dim),
            nn.LayerNorm(hidden_dim), nn.GELU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.LayerNorm(hidden_dim), nn.GELU(),
            nn.Linear(hidden_dim, interact_dim),
        )

    def encode(self, v, m):
        h = self.encoder(torch.cat([v, m], dim=-1))
        return self.fc_mu_z(h), self.fc_logvar_z(h)

    def reparameterize(self, mu, logvar):
        if self.training:
            return mu + torch.exp(0.5 * logvar) * torch.randn_like(logvar)
        return mu

    def decode(self, z, m):
        return self.decoder(torch.cat([z, m], dim=-1))

    def forward(self, v, m, kl_weight=0.01):
        mu_z, logvar_z = self.encode(v, m)
        z = self.reparameterize(mu_z, logvar_z)
        v_hat = self.decode(z, m)
        recon = F_t.mse_loss(v_hat, v)
        kl = -0.5 * torch.mean(1 + logvar_z - mu_z.pow(2) - logvar_z.exp())
        return recon + kl_weight * kl, recon, kl, v_hat

    @torch.no_grad()
    def generate(self, m_cold):
        B = m_cold.shape[0]
        z = torch.randn(B, self.latent_dim, device=m_cold.device)
        return self.decode(z, m_cold)

print('GE-Rec defined.')

---
## 10. Pipeline & Ablation

Modes: baseline -> +CA -> +CA+UA -> Full (+CA+UA+GE)

In [None]:
def _cat_feats(fmats, items):
    return sp.hstack([v[items] for v in fmats.values()]).tocsr()

def get_feats(fmats, items):
    return _cat_feats(fmats, items).toarray().astype(np.float32)

def run_pipeline(split, fmats, cfg, mode='full'):
    tr = split['train_items']
    te = split['test_items']
    Xtr = split['X_train']
    Xte = split['X_test']
    te_mask = split['test_users']
    ks = cfg['ks']
    Xtr_d = Xtr.toarray().astype(np.float64)
    logs = {'ca': [], 'ua': [], 'ge_recon': [], 'ge_kl': []}
    results = {}

    # Step 1: MARec baseline (always runs)
    fmats_tr = {k: v[tr] for k, v in fmats.items()}
    aligner = MARecAligner(cfg['alpha'], cfg['beta'], cfg['delta'], cfg['second_order'])
    G_list = aligner.compute_G(fmats_tr)
    aligner.fit_weights(Xtr, G_list)
    G_comb = aligner.combine_G(G_list)
    DR = aligner.compute_DR(Xtr)
    alignment = aligner.alpha * Xtr_d @ G_comb @ DR

    ease = EASE(cfg['lambda1'], cfg.get('lambda0', 0))
    ease.fit(Xtr, alignment=alignment)
    warm_scores = ease.predict(Xtr)
    cross_G = aligner.cross_sim(fmats, te, tr)
    cold_ease = warm_scores @ cross_G.T
    direct = aligner.alpha * Xtr_d @ cross_G.T
    fw = cfg.get('fuse_weight', 0.5)
    baseline = fw * cold_ease + (1 - fw) * direct
    results['MARec Baseline'] = evaluate(baseline, Xte, ks, te_mask)
    if mode == 'baseline':
        return results, logs, baseline

    # Prepare tensors (shared across enhancements)
    meta_cold = torch.tensor(get_feats(fmats, te), device=DEVICE)
    meta_warm = torch.tensor(get_feats(fmats, tr), device=DEVICE)
    XtX = (Xtr.T @ Xtr).toarray().astype(np.float32)
    interact_warm = torch.tensor(XtX, device=DEVICE)
    meta_dim = meta_cold.shape[1]
    interact_dim = interact_warm.shape[1]

    # Step 2: CA-Rec
    ca_scores = baseline.copy()
    if cfg.get('use_ca_rec') and mode in ['+ca', '+ca+ua', 'full']:
        ca = CARec(meta_dim, interact_dim, cfg['ca_hidden_dim']).to(DEVICE)
        opt = torch.optim.Adam(ca.parameters(), lr=cfg['ca_lr'])
        ca.train()
        for ep in range(cfg['ca_epochs']):
            bs = min(512, meta_warm.shape[0])
            idx = torch.randperm(meta_warm.shape[0], device=DEVICE)[:bs]
            loss, _ = ca(meta_warm[idx], interact_warm[idx], cfg['ca_temperature'])
            opt.zero_grad()
            loss.backward()
            opt.step()
            logs['ca'].append(loss.item())
        ca.eval()
        cold_proj = ca.project_meta(meta_cold).cpu().numpy()
        warm_proj = ca.project_meta(meta_warm).cpu().numpy()
        ca_sim = cold_proj @ warm_proj.T
        ca_transfer = Xtr_d @ ca_sim.T.astype(np.float64)
        ca_scores = 0.6 * baseline + 0.4 * ca_transfer
        results['+CA-Rec'] = evaluate(ca_scores, Xte, ks, te_mask)
        del ca, opt
        if mode == '+ca':
            return results, logs, ca_scores

    # Step 3: UA-Rec
    ua_scores = ca_scores.copy()
    if cfg.get('use_ua_rec') and mode in ['+ca+ua', 'full']:
        ua = UARec(meta_dim, interact_dim, cfg['ua_hidden_dim']).to(DEVICE)
        opt = torch.optim.Adam(ua.parameters(), lr=cfg['ua_lr'])
        ua.train()
        for ep in range(cfg['ua_epochs']):
            bs = min(512, meta_warm.shape[0])
            idx = torch.randperm(meta_warm.shape[0], device=DEVICE)[:bs]
            loss, _, _ = ua(meta_warm[idx], interact_warm[idx],
                            cfg['ua_min_logvar'], cfg['ua_max_logvar'])
            opt.zero_grad()
            loss.backward()
            opt.step()
            logs['ua'].append(loss.item())
        ua.eval()
        mu_cold, var_cold = ua.predict(meta_cold, cfg['ua_min_logvar'], cfg['ua_max_logvar'])
        confidence = (1.0 / var_cold.mean(dim=1, keepdim=True)).cpu().numpy()
        mu_np = mu_cold.cpu().numpy()
        mu_warm, _ = ua.predict(meta_warm, cfg['ua_min_logvar'], cfg['ua_max_logvar'])
        ua_sim = normalize(mu_np) @ normalize(mu_warm.cpu().numpy()).T
        ua_transfer = Xtr_d @ ua_sim.T.astype(np.float64)
        conf_norm = confidence / (confidence.mean() + 1e-10)
        ua_scores = 0.5 * ca_scores + 0.3 * ua_transfer * conf_norm.T + 0.2 * baseline
        results['+CA+UA-Rec'] = evaluate(ua_scores, Xte, ks, te_mask)
        del ua, opt
        if mode == '+ca+ua':
            return results, logs, ua_scores

    # Step 4: GE-Rec
    if cfg.get('use_ge_rec') and mode == 'full':
        ge = GERec(interact_dim, meta_dim, cfg['ge_latent_dim'],
                   cfg['ge_hidden_dim']).to(DEVICE)
        opt = torch.optim.Adam(ge.parameters(), lr=cfg['ge_lr'])
        ge.train()
        for ep in range(cfg['ge_epochs']):
            kl_w = min(1.0, ep / max(cfg['ge_kl_warmup'], 1)) * cfg['ge_kl_weight']
            bs = min(512, meta_warm.shape[0])
            idx = torch.randperm(meta_warm.shape[0], device=DEVICE)[:bs]
            loss, recon, kl, _ = ge(interact_warm[idx], meta_warm[idx], kl_w)
            opt.zero_grad()
            loss.backward()
            opt.step()
            logs['ge_recon'].append(recon.item())
            logs['ge_kl'].append(kl.item())
        ge.eval()
        v_cold_proxy = ge.generate(meta_cold).cpu().numpy()
        ge_sim = normalize(v_cold_proxy) @ normalize(XtX).T
        ge_transfer = Xtr_d @ ge_sim.T.astype(np.float64)
        final = 0.4 * ua_scores + 0.35 * ge_transfer + 0.25 * baseline
        results['+CA+UA+GE-Rec'] = evaluate(final, Xte, ks, te_mask)
        del ge, opt
        return results, logs, final

    return results, logs, ua_scores if '+CA+UA-Rec' in results else ca_scores

print('Pipeline defined.')
if torch.cuda.is_available():
    torch.cuda.empty_cache()
gc.collect()
print(f'Elapsed: {time.time() - _NOTEBOOK_START:.0f}s')

In [None]:
# ============================================================
#  RUN FULL ABLATION
# ============================================================
def run_ablation(splits, fmats, cfg, seeds):
    modes = ['baseline', '+ca', '+ca+ua', 'full']
    key_map = {
        'baseline': 'MARec Baseline',
        '+ca': '+CA-Rec',
        '+ca+ua': '+CA+UA-Rec',
        'full': '+CA+UA+GE-Rec',
    }
    all_results = {m: defaultdict(list) for m in modes}
    all_logs = {}
    total = len(seeds) * len(splits) * len(modes)
    pbar = tqdm(total=total, desc='Ablation')

    for seed in seeds:
        set_seed(seed)
        for split in splits:
            for mode in modes:
                cfg_run = dict(cfg)
                cfg_run['use_ca_rec'] = mode in ['+ca', '+ca+ua', 'full']
                cfg_run['use_ua_rec'] = mode in ['+ca+ua', 'full']
                cfg_run['use_ge_rec'] = mode == 'full'
                res, logs, _ = run_pipeline(split, fmats, cfg_run, mode=mode)
                target = key_map[mode]
                if target in res:
                    for k, v in res[target].items():
                        if k != 'n_eval':
                            all_results[mode][k].append(v)
                if mode == 'full':
                    all_logs = logs
                pbar.update(1)
                # Memory cleanup between runs
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
    pbar.close()

    summary = {}
    for mode in modes:
        key = key_map[mode]
        summary[key] = {}
        for k, vals in all_results[mode].items():
            summary[key][k] = float(np.mean(vals))
            summary[key][k + '_std'] = float(np.std(vals))
    return summary, all_logs

print('=' * 60)
print('  RUNNING ABLATION')
print(f'  {CONFIG["n_splits"]} splits x {CONFIG["n_seeds"]} seeds x 4 modes = '
      f'{CONFIG["n_splits"] * CONFIG["n_seeds"] * 4} runs')
print('=' * 60)

t0 = time.time()
seeds = [CONFIG['seed'] + i * 111 for i in range(CONFIG['n_seeds'])]
abl_summary, abl_logs = run_ablation(splits, fmats, CONFIG, seeds)
elapsed = time.time() - t0

models_order = ['MARec Baseline', '+CA-Rec', '+CA+UA-Rec', '+CA+UA+GE-Rec']

print(f'\nAblation done in {elapsed:.0f}s ({elapsed/60:.1f} min)')
print('\n' + '=' * 92)
ks = CONFIG['ks']
hdr = f'{"Model":<22s}'
for k in ks:
    hdr += f'{"HR@" + str(k):>14s}{"NDCG@" + str(k):>14s}'
print(hdr)
print('-' * 92)
for m in models_order:
    if m not in abl_summary:
        continue
    r = abl_summary[m]
    row = f'{m:<22s}'
    for k in ks:
        hr = r.get(f'hr@{k}', 0)
        hs = r.get(f'hr@{k}_std', 0)
        nd = r.get(f'ndcg@{k}', 0)
        ns = r.get(f'ndcg@{k}_std', 0)
        row += f'{hr:.4f}+/-{hs:.3f} {nd:.4f}+/-{ns:.3f} '
    print(row)
print('=' * 92)
print(f'Total elapsed: {time.time() - _NOTEBOOK_START:.0f}s')

---
## 11. Paper Comparison (Table 3 - MovieLens HetRec)

In [None]:
PAPER = {
    'ItemKNNCF':     {'hr@10': 0.1175, 'ndcg@10': 0.1335},
    'CLCRec':        {'hr@10': 0.0815, 'ndcg@10': 0.0763},
    'EQUAL':         {'hr@10': 0.1310, 'ndcg@10': 0.1470},
    'NFC':           {'hr@10': 0.1904, 'ndcg@10': 0.2076},
    'MARec (paper)': {'hr@10': 0.2928, 'ndcg@10': 0.3071},
}

print('=' * 70)
print(f'{"Model":<22s}{"HR@10":>12s}{"NDCG@10":>12s}{"Lift":>12s}')
print('-' * 70)
print('  Paper Table 3:')
for n, r in PAPER.items():
    print(f'    {n:<18s}{r["hr@10"]:>12.4f}{r["ndcg@10"]:>12.4f}')

print('  Our results:')
pm = PAPER['MARec (paper)']
for m in models_order:
    if m not in abl_summary:
        continue
    r = abl_summary[m]
    hr = r.get('hr@10', 0)
    lift = (hr - pm['hr@10']) / pm['hr@10'] * 100
    print(f'    {m:<18s}{hr:>12.4f}{r.get("ndcg@10", 0):>12.4f}{lift:>+10.1f}%')
print('=' * 70)
print('Note: paper uses 10 splits + 500 bootstrap, we use',
      CONFIG['n_splits'], 'splits +', CONFIG['n_seeds'], 'seeds.')

---
## 12. Diagnostics (12-plot grid)

In [None]:
# Single diagnostic run for detailed analysis
print('Diagnostic run (single split)...')
set_seed(CONFIG['seed'])
s0 = splits[0]
res_diag, logs_diag, scores_diag = run_pipeline(s0, fmats, CONFIG, mode='full')
cfg_base = dict(CONFIG)
cfg_base['use_ca_rec'] = False
cfg_base['use_ua_rec'] = False
cfg_base['use_ge_rec'] = False
_, _, scores_base = run_pipeline(s0, fmats, cfg_base, mode='baseline')

cold_feats = get_feats(fmats, s0['test_items'])
warm_feats = get_feats(fmats, s0['train_items'])
print(f'Done. Elapsed: {time.time() - _NOTEBOOK_START:.0f}s')

In [None]:
os.makedirs(CONFIG['output_dir'], exist_ok=True)
sns.set_style('whitegrid')
fig = plt.figure(figsize=(20, 20))
gs = gridspec.GridSpec(4, 3, hspace=0.4, wspace=0.3)

# -- Row 1: Ablation bars --
mp = [m for m in models_order if m in abl_summary]
lbl = ['Baseline', '+CA', '+CA+UA', 'Full'][:len(mp)]
x = np.arange(len(mp))

ax = fig.add_subplot(gs[0, 0])
hr10 = [abl_summary[m].get('hr@10', 0) for m in mp]
nd10 = [abl_summary[m].get('ndcg@10', 0) for m in mp]
ax.bar(x - 0.18, hr10, 0.35, label='HR@10', color='#e74c3c', alpha=0.85)
ax.bar(x + 0.18, nd10, 0.35, label='NDCG@10', color='#3498db', alpha=0.85)
ax.set_xticks(x)
ax.set_xticklabels(lbl, fontsize=9)
ax.set_title('Ablation @10', fontweight='bold')
ax.legend(fontsize=8)
for i, (h, n) in enumerate(zip(hr10, nd10)):
    ax.text(i - 0.18, h + 0.002, f'{h:.3f}', ha='center', fontsize=7)
    ax.text(i + 0.18, n + 0.002, f'{n:.3f}', ha='center', fontsize=7)

ax = fig.add_subplot(gs[0, 1])
hr50 = [abl_summary[m].get('hr@50', 0) for m in mp]
nd50 = [abl_summary[m].get('ndcg@50', 0) for m in mp]
ax.bar(x - 0.18, hr50, 0.35, label='HR@50', color='#9b59b6', alpha=0.85)
ax.bar(x + 0.18, nd50, 0.35, label='NDCG@50', color='#e67e22', alpha=0.85)
ax.set_xticks(x)
ax.set_xticklabels(lbl, fontsize=9)
ax.set_title('Ablation @50', fontweight='bold')
ax.legend(fontsize=8)

ax = fig.add_subplot(gs[0, 2])
b10 = abl_summary.get('MARec Baseline', {}).get('hr@10', 1e-9)
lifts = [(abl_summary[m].get('hr@10', 0) - b10) / max(b10, 1e-9) * 100 for m in mp]
cs = ['#2ecc71' if l >= 0 else '#e74c3c' for l in lifts]
ax.barh(lbl, lifts, color=cs, alpha=0.85)
ax.set_xlabel('% Lift over Baseline')
ax.set_title('HR@10 Lift', fontweight='bold')
ax.axvline(0, color='k', lw=0.5)

# -- Row 2: Training losses --
ax = fig.add_subplot(gs[1, 0])
if logs_diag.get('ca'):
    ax.plot(logs_diag['ca'], color='#8e44ad', lw=1.5)
    ax.set_title('CA-Rec: InfoNCE Loss', fontweight='bold')
    ax.set_xlabel('Step')

ax = fig.add_subplot(gs[1, 1])
if logs_diag.get('ua'):
    ax.plot(logs_diag['ua'], color='#e67e22', lw=1.5)
    ax.set_title('UA-Rec: Gaussian NLL', fontweight='bold')
    ax.set_xlabel('Step')

ax = fig.add_subplot(gs[1, 2])
if logs_diag.get('ge_recon'):
    ax.plot(logs_diag['ge_recon'], label='Recon', color='#3498db')
    ax2 = ax.twinx()
    ax2.plot(logs_diag['ge_kl'], label='KL', color='#e74c3c', ls='--')
    ax.set_title('GE-Rec: CVAE Losses', fontweight='bold')
    ax.set_xlabel('Step')
    ax.legend(loc='upper left', fontsize=8)
    ax2.legend(loc='upper right', fontsize=8)

# -- Row 3: PCA + NN --
ax = fig.add_subplot(gs[2, 0])
n_show = min(300, len(warm_feats), len(cold_feats))
combined = np.vstack([warm_feats[:n_show], cold_feats[:n_show]])
pca = PCA(n_components=2)
emb2d = pca.fit_transform(combined)
ax.scatter(emb2d[:n_show, 0], emb2d[:n_show, 1], c='#3498db', alpha=0.3, s=10, label='Warm')
ax.scatter(emb2d[n_show:, 0], emb2d[n_show:, 1], c='#e74c3c', alpha=0.5, s=15, marker='x', label='Cold')
ax.set_title('PCA: Warm vs Cold Features', fontweight='bold')
ax.legend(fontsize=8)
ax.set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]*100:.1f}%)')

ax = fig.add_subplot(gs[2, 1])
n_u = min(400, scores_base.shape[0])
combined_s = np.vstack([scores_base[:n_u], scores_diag[:n_u]])
pca_s = PCA(n_components=2)
s2d = pca_s.fit_transform(combined_s)
ax.scatter(s2d[:n_u, 0], s2d[:n_u, 1], c='#95a5a6', alpha=0.3, s=8, label='Baseline')
ax.scatter(s2d[n_u:, 0], s2d[n_u:, 1], c='#2ecc71', alpha=0.3, s=8, label='Full')
ax.set_title('PCA: Baseline vs Full Scores', fontweight='bold')
ax.legend(fontsize=8)

ax = fig.add_subplot(gs[2, 2])
nn_model = NearestNeighbors(n_neighbors=5, metric='cosine').fit(warm_feats)
d_raw, _ = nn_model.kneighbors(cold_feats[:min(500, len(cold_feats))])
ax.hist(d_raw.mean(axis=1), bins=30, alpha=0.6, label='Feature dist', color='#e74c3c', density=True)
ax.set_xlabel('Mean Cosine Dist to 5-NN')
ax.set_title('Cold-to-Warm NN Distance', fontweight='bold')
ax.legend(fontsize=8)

# -- Row 4: Coverage + HR curves + Paper --
ax = fig.add_subplot(gs[3, 0])
cov_ks = [10, 25, 50]
cov_b = [evaluate_coverage(scores_base, k) for k in cov_ks]
cov_f = [evaluate_coverage(scores_diag, k) for k in cov_ks]
ax.plot(cov_ks, cov_b, 'o-', label='Baseline', color='#e74c3c', lw=2)
ax.plot(cov_ks, cov_f, 's-', label='Full', color='#2ecc71', lw=2)
ax.set_xlabel('K')
ax.set_ylabel('Coverage')
ax.set_title('Coverage@K', fontweight='bold')
ax.legend(fontsize=8)

ax = fig.add_subplot(gs[3, 1])
for mn, c in [('MARec Baseline', '#e74c3c'), ('+CA+UA+GE-Rec', '#2ecc71')]:
    if mn in abl_summary:
        vals = [abl_summary[mn].get(f'hr@{k}', 0) for k in ks]
        ax.plot(ks, vals, 'o-', label=mn, lw=2, color=c, markersize=8)
ax.set_xlabel('K')
ax.set_ylabel('HR@K')
ax.set_xticks(ks)
ax.set_title('HR@K Curves', fontweight='bold')
ax.legend(fontsize=8)

ax = fig.add_subplot(gs[3, 2])
pn = list(PAPER.keys())
phr = [PAPER[n]['hr@10'] for n in pn]
our_best = abl_summary.get('+CA+UA+GE-Rec', abl_summary.get('MARec Baseline', {}))
all_n = pn + ['Ours (Full)']
all_h = phr + [our_best.get('hr@10', 0)]
colors = ['#95a5a6'] * len(pn) + ['#2ecc71']
ax.barh(all_n, all_h, color=colors, alpha=0.85)
ax.set_xlabel('HR@10')
ax.set_title('vs Paper Baselines', fontweight='bold')

fig.suptitle('MARec + CA-Rec / UA-Rec / GE-Rec: Diagnostics',
             fontsize=16, fontweight='bold', y=1.01)
plt.tight_layout()
plt.savefig(os.path.join(CONFIG['output_dir'], 'diagnostics.png'),
            dpi=150, bbox_inches='tight')
plt.show()
print(f'Elapsed: {time.time() - _NOTEBOOK_START:.0f}s')

---
## 13. Export

In [None]:
import shutil
import json as json_mod

out = CONFIG['output_dir']
os.makedirs(out, exist_ok=True)

# Ablation CSV
rows = []
for m, r in abl_summary.items():
    rows.append({'model': m, **{k: round(v, 6) for k, v in r.items()}})
pd.DataFrame(rows).to_csv(os.path.join(out, 'ablation_results.csv'), index=False)

# Config JSON
with open(os.path.join(out, 'config.json'), 'w') as f:
    json_mod.dump({k: str(v) if not isinstance(v, (int, float, bool, str, list)) else v
                   for k, v in CONFIG.items()}, f, indent=2)

# Zip
shutil.make_archive(out, 'zip', out)

total_time = time.time() - _NOTEBOOK_START
print(f'\nResults saved to {out}.zip')
print(f'Files: ablation_results.csv, config.json, diagnostics.png')
print(f'\nTotal runtime: {total_time:.0f}s ({total_time/60:.1f} min)')

---
## Summary

In [None]:
total_time = time.time() - _NOTEBOOK_START
print('=' * 70)
print('  MARec + CA-Rec / UA-Rec / GE-Rec  --  FINAL RESULTS')
print('=' * 70)
print(f'  Dataset: HetRec ({n_users} users, {n_items} items)')
print(f'  Config:  {CONFIG["n_splits"]} splits, {CONFIG["n_seeds"]} seeds')
print(f'  Runtime: {total_time:.0f}s ({total_time/60:.1f} min)')
print()
for m in models_order:
    if m not in abl_summary:
        continue
    r = abl_summary[m]
    parts = []
    for k in ['hr@10', 'ndcg@10', 'hr@50']:
        v = r.get(k, 0)
        s = r.get(k + '_std', 0)
        parts.append(f'{k}: {v:.4f}+/-{s:.3f}')
    print(f'  {m:<22s} {" | ".join(parts)}')
print()
pm = PAPER['MARec (paper)']
best_key = '+CA+UA+GE-Rec' if '+CA+UA+GE-Rec' in abl_summary else 'MARec Baseline'
best = abl_summary[best_key]
for m in ['hr@10', 'ndcg@10']:
    ours = best.get(m, 0)
    paper = pm[m]
    d = (ours - paper) / paper * 100
    print(f'  {best_key} {m}: {ours:.4f} vs paper {paper:.4f} ({d:+.1f}%)')
print('=' * 70)

---
## Enhancements Summary

| Model | Addresses | Mechanism |
|-------|-----------|-----------|
| **CA-Rec** | Linearity, collapse | InfoNCE contrastive loss: pulls matching, pushes non-matching |
| **UA-Rec** | Metadata noise | Gaussian NLL: sigma^2 down-weights noisy/sparse features |
| **GE-Rec** | Bland proxies | CVAE: learns distribution of interaction embeddings, sharp proxies |

**MARec Limitations from Slide 12:**
1. Linearity Assumption -> CA-Rec + GE-Rec add non-linear mappings
2. Metadata Quality Dependency -> UA-Rec down-weights noisy features automatically
3. Not Fully End-to-End -> GE-Rec learns the distribution of embeddings
4. Domain Shift -> CA-Rec contrastive objective is more robust

**Runtime config:** 3 splits, 2 seeds, 15/15/20 epochs, 128-dim hidden -> ~20 min on T4