In [None]:
import os
os.environ['CUDA_DEVICE_ORDER']="PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES']='0'

import numpy as np
import pandas as pd
import torch
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

data_path = "../../data/jeehoshin/foodcom_dataset/"

user_pt = torch.load(data_path + "user_embedding.pt")
item_pt = torch.load(data_path + "item_embedding.pt")

user_embedding = nn.Embedding(user_pt['weight'].shape[0], user_pt['weight'].shape[1]).to(device)
item_embedding = nn.Embedding(item_pt['weight'].shape[0], item_pt['weight'].shape[1]).to(device)
user_embedding.load_state_dict(user_pt)
item_embedding.load_state_dict(item_pt)

print(user_embedding.weight.shape)
print(item_embedding.weight.shape)

df = pd.read_csv(data_path + "foodcom.inter", sep='\t')
train_interaction = df[df['x_label'] == 0][['userID', 'itemID']]
test_interaction = df[df['x_label'] == 2][['userID', 'itemID']]
print(train_interaction.shape)
print(test_interaction.shape)

print(train_interaction.head())

torch.Size([7585, 384])
torch.Size([29905, 384])
(191582, 2)
(95883, 2)
   userID  itemID
0       6     175
1      25     445
2      24      28
3      21     353
4      32     477


In [None]:
import scipy.sparse as sp
n_users = user_embedding.weight.shape[0]
n_items = item_embedding.weight.shape[0]
max_layers = 3

print(n_users, n_items)

# Extract user and item IDs as NumPy arrays
user_ids = train_interaction['userID'].to_numpy()
item_ids = train_interaction['itemID'].to_numpy()

# Create interaction edges (user→item and item→user)
row = np.concatenate([user_ids, item_ids + n_users])
col = np.concatenate([item_ids + n_users, user_ids])
data = np.ones(len(row), dtype=np.float32)

# Create symmetric adjacency matrix A as COO
A = sp.coo_matrix((data, (row, col)), shape=(n_users + n_items, n_users + n_items))

print('A done (fast)')

# Compute normalized adjacency matrix L = D^(-0.5) * A * D^(-0.5)
sumArr = np.array(A.sum(axis=1)).flatten() + 1e-7
diag = np.power(sumArr, -0.5)
D = sp.diags(diag)
L = D @ A @ D  # Matrix multiplication

# Convert to PyTorch sparse tensor
L = sp.coo_matrix(L)
indices = torch.from_numpy(np.vstack((L.row, L.col)).astype(np.int64))
values = torch.from_numpy(L.data.astype(np.float32))
SparseL = torch.sparse_coo_tensor(indices, values, size=torch.Size(L.shape)).to(device)

print(SparseL.shape)

all_embeddings = torch.cat([user_embedding.weight, item_embedding.weight], 0).to(device)
embeddings_list = [all_embeddings]
scores = []

user_all_embeddings = all_embeddings[:n_users, :]
item_all_embeddings = all_embeddings[n_users:, :]

for i in range(max_layers):
    all_embeddings = torch.sparse.mm(SparseL, all_embeddings)
    embeddings_list.append(all_embeddings)
    lightgcn_all_embeddings = torch.stack(embeddings_list, dim=1)
    lightgcn_all_embeddings = torch.mean(lightgcn_all_embeddings, dim=1)
    user_all_embeddings = lightgcn_all_embeddings[:n_users, :]
    item_all_embeddings = lightgcn_all_embeddings[n_users:, :]

    print(f"{i+1}hop embedding")
    print(user_all_embeddings.shape)
    print(item_all_embeddings.shape)
    torch.save({"weight": user_all_embeddings.cpu()}, data_path + f"user_embedding_{i+1}hop.pt")
    torch.save({"weight": item_all_embeddings.cpu()}, data_path + f"item_embedding_{i+1}hop.pt")
    print(f"{i+1}hop embeddings saved.")
    break

7585 29905
A done (fast)
torch.Size([37490, 37490])
1hop embedding
torch.Size([7585, 384])
torch.Size([29905, 384])
1hop embeddings saved.
