In [None]:
import os
os.environ['CUDA_DEVICE_ORDER']="PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES']='2'

import numpy as np
import pandas as pd
import torch
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

data_path = "../../data/jeehoshin/allrecipe_dataset/"

user_pt = torch.load(data_path + "user_embedding.pt")
item_pt = torch.load(data_path + "item_embedding.pt")

user_embedding = nn.Embedding(user_pt['weight'].shape[0], user_pt['weight'].shape[1]).to(device)
item_embedding = nn.Embedding(item_pt['weight'].shape[0], item_pt['weight'].shape[1]).to(device)
user_embedding.load_state_dict(user_pt)
item_embedding.load_state_dict(item_pt)

print(user_embedding.weight.shape)
print(item_embedding.weight.shape)

df = pd.read_csv(data_path + "allrecipe.inter", sep='\t')
train_interaction = df[df['x_label'] == 0][['userID', 'itemID']]
test_interaction = df[df['x_label'] == 2][['userID', 'itemID']]
print(train_interaction.shape)
print(test_interaction.shape)

print(train_interaction.head())

torch.Size([68768, 1024])
torch.Size([45630, 1024])
(676946, 2)
(283440, 2)
   userID  itemID
0       0       0
1       0       1
2       0       2
3       1       3
4       2       4


In [2]:
import scipy.sparse as sp
n_users = user_embedding.weight.shape[0]
n_items = item_embedding.weight.shape[0]
max_layers = 4

print(n_users, n_items)

# Extract user and item IDs as NumPy arrays
user_ids = train_interaction['userID'].to_numpy()
item_ids = train_interaction['itemID'].to_numpy()

# Create interaction edges (user→item and item→user)
row = np.concatenate([user_ids, item_ids + n_users])
col = np.concatenate([item_ids + n_users, user_ids])
data = np.ones(len(row), dtype=np.float32)

# Create symmetric adjacency matrix A as COO
A = sp.coo_matrix((data, (row, col)), shape=(n_users + n_items, n_users + n_items))

print('A done (fast)')

# Compute normalized adjacency matrix L = D^(-0.5) * A * D^(-0.5)
sumArr = np.array(A.sum(axis=1)).flatten() + 1e-7
diag = np.power(sumArr, -0.5)
D = sp.diags(diag)
L = D @ A @ D  # Matrix multiplication

# Convert to PyTorch sparse tensor
L = sp.coo_matrix(L)
indices = torch.from_numpy(np.vstack((L.row, L.col)).astype(np.int64))
values = torch.from_numpy(L.data.astype(np.float32))
SparseL = torch.sparse_coo_tensor(indices, values, size=torch.Size(L.shape)).to(device)

print(SparseL.shape)

all_embeddings = torch.cat([user_embedding.weight, item_embedding.weight], 0).to(device)
embeddings_list = [all_embeddings]
scores = []

user_all_embeddings = all_embeddings[:n_users, :]
item_all_embeddings = all_embeddings[n_users:, :]
score = torch.matmul(user_all_embeddings, item_all_embeddings.T)
scores.append(score.detach().cpu())

for i in range(max_layers):
    all_embeddings = torch.sparse.mm(SparseL, all_embeddings)
    embeddings_list.append(all_embeddings)
    lightgcn_all_embeddings = torch.stack(embeddings_list, dim=1)
    lightgcn_all_embeddings = torch.mean(lightgcn_all_embeddings, dim=1)
    user_all_embeddings = lightgcn_all_embeddings[:n_users, :]
    item_all_embeddings = lightgcn_all_embeddings[n_users:, :]
    score = torch.matmul(user_all_embeddings, item_all_embeddings.T).to(device)
    scores.append(score.detach().cpu())
    print(score.shape)

68768 45630
A done (fast)
torch.Size([114398, 114398])
torch.Size([68768, 45630])
torch.Size([68768, 45630])
torch.Size([68768, 45630])
torch.Size([68768, 45630])


In [None]:
from tqdm import tqdm

def get_ground_truth_matrix(df, num_users, num_items):
    """Return a sparse matrix of shape [num_users, num_items] where entries are 1 if interacted."""
    user_ids = df['userID'].to_numpy()
    item_ids = df['itemID'].to_numpy()
    gt_matrix = sp.csr_matrix((np.ones_like(user_ids), (user_ids, item_ids)),
                              shape=(num_users, num_items), dtype=np.int8)
    return gt_matrix

def evaluate_topk(score, gt_matrix, ks=[10, 20]):
    """Evaluate Recall@k and NDCG@k for all users."""
    topk = max(ks)
    score = score.detach().cpu()

    # Get top-K predictions for all users at once
    topk_items = torch.topk(score, k=topk, dim=1).indices.numpy()  # shape: [num_users, topk]
    gt_matrix = gt_matrix.tolil()  # fast row access

    recalls = {k: [] for k in ks}
    ndcgs = {k: [] for k in ks}

    for user_id in tqdm(range(score.shape[0]), desc="Evaluating"):
        true_items = set(gt_matrix.rows[user_id])
        if not true_items:
            continue  # skip users without ground truth

        preds = topk_items[user_id]

        for k in ks:
            pred_k = preds[:k]
            hits = [1 if item in true_items else 0 for item in pred_k]
            recall = sum(hits) / len(true_items)

            dcg = sum([hit / np.log2(i + 2) for i, hit in enumerate(hits)])
            idcg = sum([1.0 / np.log2(i + 2) for i in range(min(len(true_items), k))])
            ndcg = dcg / idcg if idcg > 0 else 0.0

            recalls[k].append(recall)
            ndcgs[k].append(ndcg)

    return {
        f"Recall@{k}": np.mean(recalls[k]) for k in ks
    } | {
        f"NDCG@{k}": np.mean(ndcgs[k]) for k in ks
    }

import scipy.sparse as sp

# Precompute ground truth matrix once
gt_matrix = get_ground_truth_matrix(test_interaction, n_users, n_items)

for idx, score in enumerate(scores):
    print(f"<Score {idx}>")
    metrics = evaluate_topk(score, gt_matrix, ks=[10, 20])
    for name, val in metrics.items():
        print(f"{name}: {val:.4f}")
    print()

<Score 0>


Evaluating: 100%|█████████████████| 68768/68768 [00:04<00:00, 15513.95it/s]


Recall@10: 0.0008
Recall@20: 0.0014
NDCG@10: 0.0005
NDCG@20: 0.0007

<Score 1>


Evaluating: 100%|█████████████████| 68768/68768 [00:04<00:00, 16337.39it/s]


Recall@10: 0.0232
Recall@20: 0.0442
NDCG@10: 0.0148
NDCG@20: 0.0211

<Score 2>


Evaluating: 100%|█████████████████| 68768/68768 [00:04<00:00, 15558.07it/s]


Recall@10: 0.0209
Recall@20: 0.0441
NDCG@10: 0.0140
NDCG@20: 0.0208

<Score 3>


Evaluating: 100%|█████████████████| 68768/68768 [00:04<00:00, 15270.82it/s]


Recall@10: 0.0202
Recall@20: 0.0442
NDCG@10: 0.0136
NDCG@20: 0.0206

<Score 4>


Evaluating: 100%|█████████████████| 68768/68768 [00:04<00:00, 15686.14it/s]

Recall@10: 0.0201
Recall@20: 0.0442
NDCG@10: 0.0135
NDCG@20: 0.0204






In [4]:
# check overlap in train / test interaction
gt_train = get_ground_truth_matrix(train_interaction, n_users, n_items)
gt_test = get_ground_truth_matrix(test_interaction, n_users, n_items)

print(gt_train.shape)
print(gt_test.shape)

summation = gt_train + gt_test

overlap_matrix = summation > 1  # element-wise comparison, returns sparse boolean matrix
print(overlap_matrix)
n_overlaps = overlap_matrix.nnz  # number of non-zero entries (i.e., overlaps)

print(f"Number of overlapping interactions: {n_overlaps}")

(68768, 45630)
(68768, 45630)
<Compressed Sparse Row sparse matrix of dtype 'bool'
	with 0 stored elements and shape (68768, 45630)>
Number of overlapping interactions: 0
