In [125]:
from google.colab import drive
import shutil
import os
def copy_from_drive(src_path, dst_path):

    if os.path.exists(dst_path):
        print(f"skip:{dst_path} exists")
        return

    if os.path.isdir(src_path):
        shutil.copytree(src_path, dst_path)
    elif os.path.isfile(src_path):
        shutil.copy(src_path, dst_path)

drive.mount('/content/drive')
copy_from_drive('/content/drive/MyDrive/tool', '/content/tool')
copy_from_drive('/content/drive/MyDrive/MicroLens-50k_pairs.csv','/content/MicroLens-50k_pairs.csv')
copy_from_drive('/content/drive/MyDrive/cover_emb128.lmdb','/content/cover_emb128.lmdb')
copy_from_drive('/content/drive/MyDrive/title_emb1024.lmdb','/content/title_emb1024.lmdb')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
skip:/content/tool exists
skip:/content/MicroLens-50k_pairs.csv exists
skip:/content/cover_emb128.lmdb exists
skip:/content/title_emb1024.lmdb exists


In [126]:
!pip install faiss-cpu
!pip install lmdb
from tool import preprocess
from tool import customdataset
from tool import evaluate
import faiss
from datetime import datetime
import scipy.sparse as sp
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import math
import csv
from matplotlib import pyplot as plt




In [127]:
preprocess.set_seed(42)

In [128]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [129]:
# dataset_pd = pd.read_csv('D:\\VideoRecSystem\\MicroLens\\DataSet\\MicroLens-50k_pairs.csv')
path = 'MicroLens-50k_pairs.csv'
cover_lmdb_path = 'cover_emb128.lmdb'
title_lmdb_path = 'title_emb1024.lmdb'
record_path = './records'
user = 'user'
item = 'item'
user_id = 'user_id'
item_id = 'item_id'
timestamp = 'timestamp'
save_dir = './embeddings'
PROJECT_NAME = 'LightGCN'
# ---------- 超参数 ----------
N_LAYERS = 2
EMBEDDING_DIM = 64
EPOCHS = 50
BATCH_SIZE = 1024
LR = 1E-3
MODAL = {'COVER':{"LMDB_DIM":128, "HIDDEN_SIZE":[EMBEDDING_DIM],"DROPOUT":0.2} , 'TITLE':{"LMDB_DIM":1024,"HIDDEN_SIZE":[EMBEDDING_DIM],"DROPOUT":0.2}
         ,'COVER-TITLE': {"LMDB_DIM":128+1024, "HIDDEN_SIZE":[EMBEDDING_DIM],"DROPOUT":0.2}}
FUSION_MODE = "late"
CURRENT_MODAL = "COVER-TITLE"
MODAL_CONFIG = MODAL[CURRENT_MODAL]
MODAL_HIDDEN_SIZE = MODAL_CONFIG.get('HIDDEN_SIZE')
LMDB_DIM = MODAL_CONFIG.get('LMDB_DIM')
MODAL_DROPOUT = MODAL_CONFIG.get('DROPOUT')
DROPOUT = 0.2
L2_NORM = False
TOP_K= 10
PATIENCE = 5
MONITOR = 'hr'
NUM_WORKERS = 10

In [130]:
dataset_pd,num_users,num_items = preprocess.openAndSort(path,user_id=user,item_id=item,timestamp='timestamp')

dataset base information：
- number of users：50000
- number of items：19220
- number of rows：359708


In [131]:

train_df, val_df, test_df, train_all_df = preprocess.split_with_val(dataset_pd,user, item, timestamp)
print(f"Train size: {len(train_df)}")
print(f"Val_df size: {len(val_df)}")
print(f"Test_df size: {len(test_df)}")
print(f"Train_all_df size: {len(train_all_df)}")

Train size: 259708
Val_df size: 49156
Test_df size: 47774
Train_all_df size: 308864


In [132]:
# maintain a map from new id to old id, new id for constructing matrix
user2id = {u: i for i, u in enumerate(dataset_pd[user].unique())}
item2id = {i: j for j, i in enumerate(dataset_pd[item].unique())}

# apply to train_df and test_df
train_df[user_id] = train_df[user].map(user2id)
train_df[item_id] = train_df[item].map(item2id)
val_df[user_id] = val_df[user].map(user2id)
val_df[item_id] = val_df[item].map(item2id)
test_df[user_id] = test_df[user].map(user2id)
test_df[item_id] = test_df[item].map(item2id)
train_all_df[user_id] = train_all_df[user].map(user2id)
train_all_df[item_id] = train_all_df[item].map(item2id)

# 1. 构建 item_id 到 item 的映射（来自 train_df）
item_id_to_item = {v: k for k, v in item2id.items()}

In [133]:

def build_adj_matrix(df, num_users, num_items ,user_id, item_id):
    rows = df[user_id].values
    cols = df[item_id].values
    data = np.ones(len(df))
    # set interaction of user-item as 1, other as 0
    R = sp.coo_matrix((data, (rows, cols)), shape=(num_users, num_items))

    # construct symetric matrix A
    upper = sp.hstack([sp.csr_matrix((num_users, num_users)), R])
    lower = sp.hstack([R.T, sp.csr_matrix((num_items, num_items))])
    A = sp.vstack([upper, lower])

    # normalization A → Ĥ = D^{-1/2} A D^{-1/2}
    rowsum = np.array(A.sum(1)).flatten()
    d_inv_sqrt = np.power(rowsum, -0.5)
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    D_inv_sqrt = sp.diags(d_inv_sqrt)
    A_norm = D_inv_sqrt @ A @ D_inv_sqrt

    # transform to torch.sparse
    A_norm = A_norm.tocoo()
    indices = torch.LongTensor([A_norm.row, A_norm.col])
    values = torch.FloatTensor(A_norm.data)
    return torch.sparse_coo_tensor(indices, values, A_norm.shape)


In [134]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

class LightGCN(nn.Module):
    def __init__(self, num_users, num_items , embedding_dim, n_layers, adjacency,
                 lmdb_dim=LMDB_DIM, modal_hidden_size=MODAL_HIDDEN_SIZE, modal_dropout=MODAL_DROPOUT,
                 fusion_mode=FUSION_MODE):  # 'base' | 'early' | 'late'
        super(LightGCN, self).__init__()
        assert fusion_mode in {'base', 'early', 'late'}
        self.fusion_mode   = fusion_mode
        self.user_emb      = None
        self.item_emb      = None
        self.num_users     = num_users
        self.num_items     = num_items
        self.embedding_dim = embedding_dim
        self.n_layers      = n_layers
        self.adjacency     = adjacency  # torch.sparse_coo_tensor

        # ----- ID embeddings -----
        self.embedding_user = nn.Embedding(num_users, embedding_dim)
        self.embedding_item = nn.Embedding(num_items, embedding_dim)
        nn.init.xavier_uniform_(self.embedding_user.weight)
        nn.init.xavier_uniform_(self.embedding_item.weight)

        # modal 向量（冻结）
        modal_emb_tensor = None
        if FUSION_MODE!='base':
            if CURRENT_MODAL=='COVER':
                modal_emb_tensor = preprocess.load_tensor_from_lmdb(
                    cover_lmdb_path, num_items, item_id_to_item, lmdb_dim
                )
            if CURRENT_MODAL=='TITLE':
                modal_emb_tensor = preprocess.load_tensor_from_lmdb(
                    title_lmdb_path, num_items, item_id_to_item, lmdb_dim
                )
            if CURRENT_MODAL=='COVER-TITLE':
                cover_emb_tensor = preprocess.load_tensor_from_lmdb(
                    cover_lmdb_path, num_items, item_id_to_item, 128
                )
                title_emb_tensor = preprocess.load_tensor_from_lmdb(
                    title_lmdb_path, num_items, item_id_to_item, 1024
                )
                modal_emb_tensor = torch.cat([cover_emb_tensor, title_emb_tensor], dim=-1)

            self.register_buffer('frozen_extra_emb', modal_emb_tensor)


        # ----- 前融合投影：[item_id_emb; modal] -> emb_dim -----
        self.mlp_item_modal = self.build_mlp(embedding_dim + lmdb_dim, modal_hidden_size, modal_dropout)

        # ----- 后融合用 α（全局标量）-----
        # sigmoid(0)=0.5；如需更稳可改为 1.0 使初期更偏向 ID
        self.alpha_param = nn.Parameter(torch.tensor(0.0)) if fusion_mode == 'late' else None

    def build_mlp(self, input_dim, hidden_sizes, dropout):
        layers = []
        for h in hidden_sizes:
            layers += [nn.Linear(input_dim, h), nn.BatchNorm1d(h), nn.Tanh(), nn.Dropout(dropout)]
            input_dim = h
        return nn.Sequential(*layers)

    # ============ LightGCN 传播（给定初始 user/item 表示） ============
    def _propagate(self, user_init, item_init):
        """
        user_init: (U, D), item_init: (I, D)
        返回经 n_layers LightGCN 平均聚合后的 (user_emb, item_emb)
        """
        all_embeddings = torch.cat([user_init, item_init], dim=0)   # (U+I, D)
        embs = [all_embeddings]
        for _ in range(self.n_layers):
            all_embeddings = torch.sparse.mm(self.adjacency, all_embeddings)
            embs.append(all_embeddings)
        final = torch.stack(embs, dim=1).mean(dim=1)                # (U+I, D)
        user_embedding, item_embedding = torch.split(final, [self.num_users, self.num_items])
        return user_embedding, item_embedding

    def _item_init_id_only(self):
        return self.embedding_item.weight                             # (I, D)

    def _item_init_early(self):
        # [id_emb; modal] -> emb_dim
        modal = self.frozen_extra_emb.to(self.embedding_item.weight.device)  # (I, C)
        i_cat = torch.cat([self.embedding_item.weight, modal], dim=-1)       # (I, D+C)
        i_emb = self.mlp_item_modal(i_cat)                                   # (I, D)
        return i_emb

    # ============ 前向：生成/缓存用户与物品图表示 ============
    def forward(self):
        device = self.embedding_item.weight.device
        user_init = self.embedding_user.weight                              # (U, D)

        if self.fusion_mode == 'base':
            # 纯 ID：一次传播
            item_init = self._item_init_id_only()
            user_embedding, item_embedding = self._propagate(user_init, item_init)

        elif self.fusion_mode == 'early':
            # 前融合：先做模态映射，再作为图的初始 item 表示
            item_init = self._item_init_early()
            user_embedding, item_embedding = self._propagate(user_init, item_init)

        else:  # 'late'
            # 后融合：两条路径分别图传播，最后在图传播结果处做 α 加权（仅对 item）
            item_init_id = self._item_init_id_only()
            item_init_mm = self._item_init_early()

            # 两次传播（共享同一 user_init）
            user_id_emb,  item_id_emb  = self._propagate(user_init, item_init_id)
            user_mm_emb,  item_mm_emb  = self._propagate(user_init, item_init_mm)

            alpha = torch.sigmoid(self.alpha_param).to(device)  # 标量
            item_embedding = alpha * item_id_emb + (1.0 - alpha) * item_mm_emb
            # 用户侧：为最小改动与稳定性，采用 ID 路径的 user 表示（也可两路再平均/融合）
            user_embedding = user_id_emb

        # 缓存（评测/导出用）
        self.user_emb = user_embedding.detach()
        self.item_emb = item_embedding.detach()
        return user_embedding, item_embedding

    # ============ Getter（评测/召回） ============
    def get_users_embedding(self, user_ids, l2_norm=False):
        u_vec = self.user_emb[user_ids]
        if l2_norm:
            u_vec = F.normalize(u_vec, p=2, dim=1)
        return u_vec

    def get_items_embedding(self, item_ids, l2_norm=False):
        i_vec = self.item_emb[item_ids]
        if l2_norm:
            i_vec = F.normalize(i_vec, p=2, dim=1)
        return i_vec

    # ============ 导出 ============
    def save_embeddings(self, num_users, num_items, device, save_dir='./embeddings', l2_norm=L2_NORM):
        import os, faiss
        os.makedirs(save_dir, exist_ok=True)
        self.eval().to(device)

        user_ids = torch.arange(num_users, dtype=torch.long, device=device)
        item_ids = torch.arange(num_items, dtype=torch.long, device=device)

        with torch.no_grad():
            user_embeds = self.get_users_embedding(user_ids, l2_norm=l2_norm)
            item_embeds = self.get_items_embedding(item_ids, l2_norm=l2_norm)

        user_embeds = user_embeds.cpu().numpy().astype(np.float32)
        item_embeds = item_embeds.cpu().numpy().astype(np.float32)

        np.save(f"{save_dir}/user_embeddings.npy", user_embeds)
        np.save(f"{save_dir}/item_embeddings.npy", item_embeds)

        dim = item_embeds.shape[1]
        index = faiss.IndexFlatIP(dim)
        index.add(item_embeds)
        faiss.write_index(index, f"{save_dir}/item_index.faiss")
        print("Saved user/item embeddings and FAISS index.")


In [135]:
import torch
import numpy as np

def train_model(model,
                train_df,
                val_df,
                top_k,
                epochs,
                batch_size,
                lr,
                val_mode,
                device=None,
                patience=PATIENCE,         # 早停容忍
                monitor=MONITOR,       # "hr" 或 "ndcg"
                record_path = record_path
                ):
    """
    训练 LightGCN (或其它 BPR 模型) 的通用函数
    ------------------------------------------------
    • train_df      : pandas DataFrame，含 user_id / item_id
    • num_items     : 物品总数
    • device        : torch.device；默认为 'cuda' (若可用) 否则 'cpu'
    • max_grad_norm : 梯度裁剪阈值；避免梯度爆炸，可选
    """
    # -------- 设备 ----------
    if device is None:
        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    model = model.to(device)
    if hasattr(model, "adjacency"):               # adjacency 可能是稀疏张量
        model.adjacency = model.adjacency.to(device)

    # -------- 优化器 ----------
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    train_loader = customdataset.build_train_loader_inbatch(train_df, batch_size=batch_size,user_col=user_id, item_col=item_id)
    val_loader = customdataset.build_test_loader(val_df, num_items ,user_col = user_id, item_col = item_id, batch_size=1024, num_workers=NUM_WORKERS)

    # 训练过程记录
    hist = {
        "epoch": [],
        "loss": [],
        f"hr@{top_k}": [],
        f"ndcg@{top_k}": [],
        "alpha": [],
        "beta": [],
    }

    # 早停配置
    best_metric = -math.inf
    best_epoch  = -1
    patience_cnt = 0
    monitor_key = f"{monitor}@{top_k}"

    print(f"[EarlyStopping] monitor={monitor_key} , patience={patience}")

    # -------- 训练循环 ----------
    for epoch in range(1, epochs + 1):
        model.train()
        dt_start = datetime.now()
        epoch_loss = 0.0

        for batch in train_loader:
            user_ids, pos_item_ids = batch
            user_ids = user_ids.to(device).long()
            pos_item_ids = pos_item_ids.to(device).long()


            # 1. 前向传播（返回 user / item 向量）
            user_emb, item_emb = model()
            u_vec = user_emb[user_ids]
            i_vec = item_emb[pos_item_ids]

            # 2. 得分矩阵：每个 user 对所有正 item 的打分
            logits = torch.matmul(u_vec, i_vec.T)  # shape: (B, B)

            # 3. 构造标签：每个 user 的正确 item 在对角线（即位置 i）
            labels = torch.arange(logits.size(0), device=device)  # [0, 1, ..., B-1]

            # 4. Cross Entropy Loss
            loss = F.cross_entropy(logits, labels)

            # 5. 反向传播
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        # 日志
        avg_loss = epoch_loss / len(train_loader)
        dt_end = datetime.now()
        dt = (dt_end - dt_start).total_seconds()
        model.save_embeddings(num_users=num_users,num_items=num_items,device=device,save_dir=save_dir)
        faiss_index = faiss.read_index(f"{save_dir}/item_index.faiss")
        model.eval()
        hr_m, ndcg_m = evaluate.evaluate_model(val_loader, model, faiss_index, device, top_k=top_k)

        # gates（若存在）
        alpha_val = float(torch.sigmoid(model.alpha_param).item()) if hasattr(model, "alpha_param") and model.alpha_param is not None else float("nan")
        beta_val  = float(torch.sigmoid(model.beta_param).item())  if hasattr(model, "beta_param") and model.beta_param is not None else float("nan")

        print(f"[Epoch {epoch:02d}/{epochs}] avg InBatch Softmax Loss = {avg_loss:.4f}, "
              f"HR@{top_k} = {hr_m:.4f}, NDCG@{top_k} = {ndcg_m:.4f}, "
              f"alpha={alpha_val if not math.isnan(alpha_val) else 'NA'}, "
              f"beta={beta_val if not math.isnan(beta_val) else 'NA'}, "
              f"time = {dt:.2f}s")

        # —— 记录历史 ——
        hist["epoch"].append(epoch)
        hist["loss"].append(avg_loss)
        hist[f"hr@{top_k}"].append(hr_m)
        hist[f"ndcg@{top_k}"].append(ndcg_m)
        hist["alpha"].append(alpha_val)
        hist["beta"].append(beta_val)

        # —— 早停判断（最大化 monitor 指标）——
        if val_mode:
          current_metric = hr_m if monitor == "hr" else ndcg_m
          if current_metric > best_metric:
              best_metric = current_metric
              best_epoch = epoch
              patience_cnt = 0
              print(f"current best {monitor_key}={best_metric:.4f} @ epoch {epoch}.")
                          # ==== 保存最佳 hr / ndcg / epoch ====
              best_info_path = os.path.join(record_path,
                                            "validation mode" if val_mode else "train mode",
                                            "best_result.txt")
              os.makedirs(os.path.dirname(best_info_path), exist_ok=True)
              with open(best_info_path, "w") as f:
                  f.write(f"epoch: {epoch}\n")
                  f.write(f"HR@{top_k}: {hr_m:.4f}\n")
                  f.write(f"NDCG@{top_k}: {ndcg_m:.4f}\n")
              print(f"Best result info saved to {best_info_path}")
          else:
              patience_cnt += 1
              if patience_cnt >= patience:
                  print("Early stopping triggered.")
                  break


    # —— 导出历史 CSV ——
    csv_path = os.path.join(record_path,"validation mode" if val_mode else "train mode","training_history.csv")
    os.makedirs(os.path.dirname(csv_path), exist_ok=True)  # 确保目录存在
    with open(csv_path, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["epoch", "loss", f"hr@{top_k}", f"ndcg@{top_k}", "alpha", "beta", "time_sec"])
        for i in range(len(hist["epoch"])):
            writer.writerow([
                hist["epoch"][i],
                hist["loss"][i],
                hist[f"hr@{top_k}"][i],
                hist[f"ndcg@{top_k}"][i],
                hist["alpha"][i],
                hist["beta"][i],
            ])
    # —— 绘图：Loss ——

    plt.figure()
    plt.plot(hist["epoch"], hist["loss"])
    plt.xlabel("Epoch"); plt.ylabel("In-Batch CE Loss"); plt.title("Training Loss")
    plt.grid(True, linestyle="--", alpha=0.4); plt.tight_layout()
    plt.xticks(range(1, max(hist["epoch"]) + 1, 1))
    fig1_path = os.path.join(record_path,"validation mode" if val_mode else "train mode","curve_loss.png")
    os.makedirs(os.path.dirname(fig1_path), exist_ok=True)  # 确保目录存在

    plt.savefig(fig1_path, dpi=150); plt.close()
    print(f"Saved {fig1_path}")

    # —— 绘图：HR/NDCG ——
    plt.figure()
    plt.plot(hist["epoch"], hist[f"hr@{top_k}"], label=f"HR@{top_k}")
    plt.plot(hist["epoch"], hist[f"ndcg@{top_k}"], label=f"NDCG@{top_k}")
    plt.xlabel("Epoch"); plt.ylabel("Metric"); plt.title("Validation Metrics")
    plt.legend(); plt.grid(True, linestyle="--", alpha=0.4); plt.tight_layout()
    plt.xticks(range(1, max(hist["epoch"]) + 1, 1))
    fig2_path = os.path.join(record_path,"validation mode" if val_mode else "train mode","curve_metrics.png")
    os.makedirs(os.path.dirname(fig2_path), exist_ok=True)  # 确保目录存在
    plt.savefig(fig2_path, dpi=150); plt.close()
    print(f"Saved {fig2_path}")

    # —— 绘图：alpha/beta（如存在） ——
    if not all(math.isnan(v) for v in hist["alpha"]) or not all(math.isnan(v) for v in hist["beta"]):
        plt.figure()
        if not all(math.isnan(v) for v in hist["alpha"]):
            plt.plot(hist["epoch"], hist["alpha"], label="alpha (item late)")
        if not all(math.isnan(v) for v in hist["beta"]):
            plt.plot(hist["epoch"], hist["beta"],  label="beta (user late)")
        plt.xlabel("Epoch"); plt.ylabel("Gate (sigmoid)"); plt.title("Late Fusion Gates")
        plt.ylim(0, 1); plt.legend(); plt.grid(True, linestyle="--", alpha=0.4); plt.tight_layout()
        plt.xticks(range(1, max(hist["epoch"]) + 1, 1))
        fig3_path = os.path.join(record_path,"validation mode" if val_mode else "train mode","curve_alpha_beta.png")
        os.makedirs(os.path.dirname(fig3_path), exist_ok=True)  # 确保目录存在
        plt.savefig(fig3_path, dpi=150); plt.close()
        print(f"Saved {fig3_path}")

    print(f"Best {monitor_key}={best_metric:.4f} at epoch {best_epoch}")
    return best_epoch

In [136]:
adj_torch = build_adj_matrix(train_df, num_users, num_items ,user_id, item_id)
model = LightGCN(num_users,num_items,embedding_dim=EMBEDDING_DIM,n_layers=N_LAYERS,adjacency=adj_torch)
model.to(device)
selected_epoch = train_model(model=model,epochs=EPOCHS, train_df=train_df,val_df=val_df,batch_size=BATCH_SIZE,top_k=TOP_K,lr=LR,val_mode=True)
adj_torch = build_adj_matrix(train_all_df, num_users, num_items ,user_id, item_id)
model = LightGCN(num_users,num_items,embedding_dim=EMBEDDING_DIM,n_layers=N_LAYERS,adjacency=adj_torch)
model.to(device)
train_model(model=model,epochs=selected_epoch, train_df=train_all_df,val_df=test_df,batch_size=BATCH_SIZE,top_k=TOP_K,lr=LR,val_mode=False)

  d_inv_sqrt = np.power(rowsum, -0.5)


[EarlyStopping] monitor=hr@10 , patience=5
Saved user/item embeddings and FAISS index.
[Epoch 01/50] avg InBatch Softmax Loss = 6.5407, HR@10 = 0.0167, NDCG@10 = 0.0081, alpha=0.4142559766769409, beta=NA, time = 4.79s
current best hr@10=0.0167 @ epoch 1.
Best result info saved to ./records/validation mode/best_result.txt
Saved user/item embeddings and FAISS index.
[Epoch 02/50] avg InBatch Softmax Loss = 6.0039, HR@10 = 0.0233, NDCG@10 = 0.0114, alpha=0.3485683798789978, beta=NA, time = 4.81s
current best hr@10=0.0233 @ epoch 2.
Best result info saved to ./records/validation mode/best_result.txt
Saved user/item embeddings and FAISS index.
[Epoch 03/50] avg InBatch Softmax Loss = 5.6822, HR@10 = 0.0236, NDCG@10 = 0.0109, alpha=0.2952204942703247, beta=NA, time = 4.82s
current best hr@10=0.0236 @ epoch 3.
Best result info saved to ./records/validation mode/best_result.txt
Saved user/item embeddings and FAISS index.
[Epoch 04/50] avg InBatch Softmax Loss = 5.4347, HR@10 = 0.0280, NDCG@10 

-1

In [137]:
model.save_embeddings(num_users=num_users,num_items=num_items,device=device,save_dir=save_dir)

Saved user/item embeddings and FAISS index.


In [138]:
test_loader = customdataset.build_test_loader(test_df, num_items ,user_col = user_id, item_col = item_id, batch_size=1024, num_workers=NUM_WORKERS)
item_pool = list(range(num_items))
faiss_index = faiss.read_index(f"{save_dir}/item_index.faiss")

In [139]:
hr_r, ndcg_r = evaluate.evaluate_random(test_loader, item_pool ,top_k=TOP_K)
print(f"Random HR@{TOP_K} = {hr_r:.4f}, NDCG@{TOP_K} = {ndcg_r:.4f}")
hr_p, ndcg_p = evaluate.evaluate_popular(test_loader, train_all_df,top_k=TOP_K)
print(f"Popular HR@{TOP_K} = {hr_p:.4f}, NDCG@{TOP_K} = {ndcg_p:.4f}")
hr_m, ndcg_m = evaluate.evaluate_model(test_loader, model, faiss_index, device,top_k=TOP_K)
print(f"Model   HR@{TOP_K} = {hr_m:.4f}, NDCG@{TOP_K} = {ndcg_m:.4f}")


Random HR@10 = 0.0004, NDCG@10 = 0.0002
Popular HR@10 = 0.0030, NDCG@10 = 0.0014
Model   HR@10 = 0.0320, NDCG@10 = 0.0132


In [140]:
from google.colab import drive
drive.mount('/content/drive')

# 挂载 Google Drive
drive.mount('/content/drive')
# 目标路径
target_dir = None
if(FUSION_MODE=="base"):
    target_dir = f"/content/drive/MyDrive/REC/{PROJECT_NAME}/{FUSION_MODE}/"
else:
    target_dir = f"/content/drive/MyDrive/REC/{PROJECT_NAME}/{FUSION_MODE}/{CURRENT_MODAL}"
# 创建目标路径（包含上层目录）
os.makedirs(target_dir, exist_ok=True)
# 复制 records 到目标路径
!cp -r /content/records "{target_dir}"
!rm -rf /content/records

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
