In [2]:
from tkinter.constants import HIDDEN

from google.colab import drive
import shutil
import os


def copy_from_drive(src_path, dst_path):

    if os.path.exists(dst_path):
        print(f"skip:{dst_path} exists")
        return

    if os.path.isdir(src_path):
        shutil.copytree(src_path, dst_path)
    elif os.path.isfile(src_path):
        shutil.copy(src_path, dst_path)

drive.mount('/content/drive')
copy_from_drive('/content/drive/MyDrive/tool', '/content/tool')
copy_from_drive('/content/drive/MyDrive/MicroLens-50k_pairs.csv','/content/MicroLens-50k_pairs.csv')
copy_from_drive('/content/drive/MyDrive/cover_emb128.lmdb','/content/cover_emb128.lmdb')
copy_from_drive('/content/drive/MyDrive/title_emb1024.lmdb','/content/title_emb1024.lmdb')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
skip:/content/tool exists
skip:/content/MicroLens-50k_pairs.csv exists
skip:/content/cover_emb128.lmdb exists
skip:/content/title_emb1024.lmdb exists


In [3]:
!pip install faiss-cpu
!pip install lmdb
from tool import preprocess
from tool import customdataset
from tool import evaluate
import faiss
import torch.nn as nn
import numpy as np
import torch
import torch.nn.functional as F
from datetime import datetime
import math
import csv
from matplotlib import pyplot as plt

Collecting faiss-cpu
  Downloading faiss_cpu-1.13.1-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Downloading faiss_cpu-1.13.1-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m117.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.1
Collecting lmdb
  Downloading lmdb-1.7.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (1.4 kB)
Downloading lmdb-1.7.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (299 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m299.4/299.4 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lmdb
Successfully installed lmdb-1.7.5


In [4]:
preprocess.set_seed(42)

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
path = 'MicroLens-50k_pairs.csv'
user = 'user'
item = 'item'
user_id = 'user_id'
item_id = 'item_id'
timestamp = 'timestamp'
save_dir = './embeddings'
record_path = './records'
cover_lmdb_path = 'cover_emb128.lmdb'
title_lmdb_path = 'title_emb1024.lmdb'
PROJECT_NAME = 'DSSM'
# ---------- 超参数 ----------
LR = 1e-3
TOP_K= 10
PATIENCE = 5
MONITOR = 'hr'
NUM_WORKERS = 10
L2_NORM = False
EPOCHS = 50
BATCH_SIZE = 1024
EMBEDDING_DIM = 256
HIDDEN_SIZE = [256, 128, 64]
DROPOUT = 0.2
MODAL = {'COVER':{"LMDB_DIM":128, "HIDDEN_SIZE":[EMBEDDING_DIM],"DROPOUT":0.2} , 'TITLE':{"LMDB_DIM":1024,"HIDDEN_SIZE":[EMBEDDING_DIM],"DROPOUT":0.2}
         ,'COVER-TITLE': {"LMDB_DIM":128+1024, "HIDDEN_SIZE":[EMBEDDING_DIM],"DROPOUT":0.2}}
FUSION_MODE='base'
CURRENT_MODAL = "COVER-TITLE"
MODAL_CONFIG = MODAL[CURRENT_MODAL]
MODAL_HIDDEN_SIZE = MODAL_CONFIG.get('HIDDEN_SIZE')
LMDB_DIM = MODAL_CONFIG.get('LMDB_DIM')
MODAL_DROPOUT = MODAL_CONFIG.get('DROPOUT')
# path = pd.read_csv('MicroLens-50k_pairs.csv')

In [7]:
dataset_pd,num_users,num_items = preprocess.openAndSort(path,user_id=user,item_id=item,timestamp='timestamp')

dataset base information：
- number of users：50000
- number of items：19220
- number of rows：359708


In [8]:

train_df, val_df, test_df, train_all_df = preprocess.split_with_val(dataset_pd,user, item, timestamp)
print(f"Train size: {len(train_df)}")
print(f"Val_df size: {len(val_df)}")
print(f"Test_df size: {len(test_df)}")
print(f"Train_all_df size: {len(train_all_df)}")


Train size: 259708
Val_df size: 49156
Test_df size: 47774
Train_all_df size: 308864


In [9]:
# maintain a map from new id to old id, new id for constructing matrix
user2id = {u: i for i, u in enumerate(dataset_pd[user].unique())}
item2id = {i: j for j, i in enumerate(dataset_pd[item].unique())}

# apply to train_df and test_df
train_df[user_id] = train_df[user].map(user2id)
train_df[item_id] = train_df[item].map(item2id)
val_df[user_id] = val_df[user].map(user2id)
val_df[item_id] = val_df[item].map(item2id)
test_df[user_id] = test_df[user].map(user2id)
test_df[item_id] = test_df[item].map(item2id)
train_all_df[user_id] = train_all_df[user].map(user2id)
train_all_df[item_id] = train_all_df[item].map(item2id)

# 1. 构建 item_id 到 item 的映射（来自 train_df）
item_id_to_item = {v: k for k, v in item2id.items()}

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

class DSSM(nn.Module):
    """
    双塔 DSSM（支持三种融合）:
      fusion_mode:
        - 'base'  : 纯 ID
        - 'early' : 前融合（你现有的： [item_id_emb; modal] -> mlp_item_modal -> emb_dim）
        - 'late'  : 后融合（ID 路径与模态路径分别编码，再 alpha 加权）
    """
    def __init__(self, num_users, num_items,
                 emb_dim=EMBEDDING_DIM,
                 mlp_hidden_size=HIDDEN_SIZE,
                 dropout=DROPOUT,
                 modal_hidden_size=MODAL_HIDDEN_SIZE,
                 modal_dropout=MODAL_DROPOUT,
                 lmdb_dim=LMDB_DIM,
                 fusion_mode=FUSION_MODE):
        super().__init__()
        assert fusion_mode in {'base','early','late'}
        self.fusion_mode = fusion_mode

        self.user_emb = nn.Embedding(num_users, emb_dim)
        self.item_emb = nn.Embedding(num_items, emb_dim)
        nn.init.xavier_uniform_(self.user_emb.weight)
        nn.init.xavier_uniform_(self.item_emb.weight)
        # modal 向量（冻结）
        modal_emb_tensor = None
        if FUSION_MODE!='base':
            if CURRENT_MODAL=='COVER':
                modal_emb_tensor = preprocess.load_tensor_from_lmdb(
                    cover_lmdb_path, num_items, item_id_to_item, lmdb_dim
                )
            if CURRENT_MODAL=='TITLE':
                modal_emb_tensor = preprocess.load_tensor_from_lmdb(
                    title_lmdb_path, num_items, item_id_to_item, lmdb_dim
                )
            if CURRENT_MODAL=='COVER-TITLE':
                cover_emb_tensor = preprocess.load_tensor_from_lmdb(
                    cover_lmdb_path, num_items, item_id_to_item, 128
                )
                title_emb_tensor = preprocess.load_tensor_from_lmdb(
                    title_lmdb_path, num_items, item_id_to_item, 1024
                )
                modal_emb_tensor = torch.cat([cover_emb_tensor, title_emb_tensor], dim=-1)

            self.register_buffer('frozen_extra_emb', modal_emb_tensor)

        # 通用 MLP
        self.mlp_user = self.build_mlp(emb_dim, mlp_hidden_size, dropout)
        self.mlp_item = self.build_mlp(emb_dim, mlp_hidden_size, dropout)

        # 前融合用：将 [item; modal] -> emb_dim
        self.mlp_item_modal = self.build_mlp(emb_dim + lmdb_dim, modal_hidden_size, modal_dropout)

        # 后融合用：全局 alpha（标量，sigmoid 后 ∈ (0,1)）# sigmoid(0)=0.5 起步
        self.alpha_param = nn.Parameter(torch.tensor(0.0)) if fusion_mode == 'late' else None

    def build_mlp(self, input_dim, hidden_sizes, dropout):
        layers = []
        for h in hidden_sizes:
            layers += [nn.Linear(input_dim, h), nn.BatchNorm1d(h), nn.Tanh(), nn.Dropout(dropout)]
            input_dim = h
        return nn.Sequential(*layers)

    def _item_vec_id_only(self, item_id):
        i_id = self.item_emb(item_id)      # (B, emb_dim)
        return self.mlp_item(i_id)         # (B, d)

    def _item_vec_early(self, item_id):
        # 前融合路径： [item_emb; modal] -> emb_dim -> mlp_item -> d
        i_id = self.item_emb(item_id)
        modal = self.frozen_extra_emb.to(item_id.device)[item_id]
        i_cat = torch.cat([i_id, modal], dim=-1)
        i_emb = self.mlp_item_modal(i_cat)   # (B, emb_dim)
        return self.mlp_item(i_emb)          # (B, d)

    def _item_vec_late(self, item_id):
        # 后融合：向量级
        i_vec_id = self._item_vec_id_only(item_id)   # (B, d)
        i_vec_mm = self._item_vec_early(item_id)     # (B, d) —— 复用 early 路径的“模态子塔”
        alpha = torch.sigmoid(self.alpha_param)      # 标量
        return alpha * i_vec_id + (1.0 - alpha) * i_vec_mm

    def forward(self, user_id, item_id, l2_norm=L2_NORM):
        # 用户向量
        u = self.user_emb(user_id)
        u_vec = self.mlp_user(u)

        # 物品向量（按模式）
        if self.fusion_mode == 'base':
            i_vec = self._item_vec_id_only(item_id)
        elif self.fusion_mode == 'early':
            i_vec = self._item_vec_early(item_id)
        else:  # 'late'
            i_vec = self._item_vec_late(item_id)

        if l2_norm:
            u_vec = F.normalize(u_vec, p=2, dim=1)
            i_vec = F.normalize(i_vec, p=2, dim=1)
        return u_vec, i_vec

    def get_users_embedding(self, user_ids, l2_norm=L2_NORM):
        u = self.user_emb(user_ids)
        u_vec = self.mlp_user(u)
        if l2_norm: u_vec = F.normalize(u_vec, p=2, dim=1)
        return u_vec

    def get_items_embedding(self, item_ids, l2_norm=L2_NORM):
        if self.fusion_mode == 'base':
            i_vec = self._item_vec_id_only(item_ids)
        elif self.fusion_mode == 'early':
            i_vec = self._item_vec_early(item_ids)
        else:
            i_vec = self._item_vec_late(item_ids)
        if l2_norm: i_vec = F.normalize(i_vec, p=2, dim=1)
        return i_vec

    def save_embeddings(self, num_users, num_items, device, save_dir='./embeddings', l2_norm=L2_NORM):
        import os, faiss
        os.makedirs(save_dir, exist_ok=True)
        self.eval().to(device)
        user_ids = torch.arange(num_users, dtype=torch.long, device=device)
        item_ids = torch.arange(num_items, dtype=torch.long, device=device)
        with torch.no_grad():
            user_embeds = self.get_users_embedding(user_ids, l2_norm=l2_norm)
            item_embeds = self.get_items_embedding(item_ids, l2_norm=l2_norm)
        user_embeds = user_embeds.cpu().numpy().astype(np.float32)
        item_embeds = item_embeds.cpu().numpy().astype(np.float32)
        np.save(f"{save_dir}/user_embeddings.npy", user_embeds)
        np.save(f"{save_dir}/item_embeddings.npy", item_embeds)
        dim = item_embeds.shape[1]
        index = faiss.IndexFlatIP(dim)
        index.add(item_embeds)
        faiss.write_index(index, f"{save_dir}/item_index.faiss")

In [11]:
def train_model(model,train_df,val_df,top_k,
                epochs,
                batch_size,
                lr,
                val_mode,
                device=None,
                patience=PATIENCE,         # 早停容忍
                monitor=MONITOR,       # "hr" 或 "ndcg"
                record_path = record_path
                ):

    if device is None:
        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # 你需要一个 data_loader 返回 (user_id, pos_item_id) 对，无负样本
    train_loader = customdataset.build_train_loader_inbatch(train_df, batch_size=batch_size,user_col=user_id, item_col=item_id)
    val_loader = customdataset.build_test_loader(val_df, num_items ,user_col = user_id, item_col = item_id, batch_size=1024, num_workers=NUM_WORKERS)

    # 训练过程记录
    hist = {
        "epoch": [],
        "loss": [],
        f"hr@{top_k}": [],
        f"ndcg@{top_k}": [],
        "alpha": [],
        "beta": [],
    }

    # 早停配置
    best_metric = -math.inf
    best_epoch  = -1
    patience_cnt = 0
    monitor_key = f"{monitor}@{top_k}"

    print(f"[EarlyStopping] monitor={monitor_key} , patience={patience}")

    for epoch in range(1, epochs + 1):
        model.train()
        dt_start = datetime.now()
        epoch_loss = 0.0
        for batch in train_loader:
            user_ids, pos_item_ids = batch
            user_ids = user_ids.to(device)
            pos_item_ids = pos_item_ids.to(device)

            # 1. 前向传播（返回 user / item 向量）
            u_vec, i_vec = model(user_ids, pos_item_ids, l2_norm=L2_NORM)

            # 2. 得分矩阵：每个 user 对所有正 item 的打分
            logits = torch.matmul(u_vec, i_vec.T)  # shape: (B, B)

            # 3. 构造标签：每个 user 的正确 item 在对角线（即位置 i）
            labels = torch.arange(logits.size(0), device=device)  # [0, 1, ..., B-1]

            # 4. Cross Entropy Loss
            loss = F.cross_entropy(logits, labels)

            # 5. 反向传播
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        # 日志
        avg_loss = epoch_loss / len(train_loader)
        dt_end = datetime.now()
        dt = (dt_end - dt_start).total_seconds()
        model.save_embeddings(num_users=num_users,num_items=num_items,device=device,save_dir=save_dir)
        faiss_index = faiss.read_index(f"{save_dir}/item_index.faiss")
        model.eval()
        hr_m, ndcg_m = evaluate.evaluate_model(val_loader, model, faiss_index, device, top_k=top_k)

        # gates（若存在）
        alpha_val = float(torch.sigmoid(model.alpha_param).item()) if hasattr(model, "alpha_param") and model.alpha_param is not None else float("nan")
        beta_val  = float(torch.sigmoid(model.beta_param).item())  if hasattr(model, "beta_param") and model.beta_param is not None else float("nan")

        print(f"[Epoch {epoch:02d}/{epochs}] avg InBatch Softmax Loss = {avg_loss:.4f}, "
              f"HR@{top_k} = {hr_m:.4f}, NDCG@{top_k} = {ndcg_m:.4f}, "
              f"alpha={alpha_val if not math.isnan(alpha_val) else 'NA'}, "
              f"beta={beta_val if not math.isnan(beta_val) else 'NA'}, "
              f"time = {dt:.2f}s")

        # —— 记录历史 ——
        hist["epoch"].append(epoch)
        hist["loss"].append(avg_loss)
        hist[f"hr@{top_k}"].append(hr_m)
        hist[f"ndcg@{top_k}"].append(ndcg_m)
        hist["alpha"].append(alpha_val)
        hist["beta"].append(beta_val)

        # —— 早停判断（最大化 monitor 指标）——
        current_metric = hr_m if monitor == "hr" else ndcg_m
        if current_metric > best_metric:
            best_metric = current_metric
            best_epoch = epoch
            patience_cnt = 0
            print(f"current best {monitor_key}={best_metric:.4f} @ epoch {epoch}.")
                        # ==== 保存最佳 hr / ndcg / epoch ====
            best_info_path = os.path.join(record_path,
                                          "validation mode" if val_mode else "train mode",
                                          "best_result.txt")
            os.makedirs(os.path.dirname(best_info_path), exist_ok=True)
            with open(best_info_path, "w") as f:
                f.write(f"epoch: {epoch}\n")
                f.write(f"HR@{top_k}: {hr_m:.4f}\n")
                f.write(f"NDCG@{top_k}: {ndcg_m:.4f}\n")
            print(f"Best result info saved to {best_info_path}")
        else:
            patience_cnt += 1
            if patience_cnt >= patience:
                print("Early stopping triggered.")
                break

    # —— 导出历史 CSV ——
    csv_path = os.path.join(record_path,"validation mode" if val_mode else "train mode","training_history.csv")
    os.makedirs(os.path.dirname(csv_path), exist_ok=True)  # 确保目录存在
    with open(csv_path, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["epoch", "loss", f"hr@{top_k}", f"ndcg@{top_k}", "alpha", "beta", "time_sec"])
        for i in range(len(hist["epoch"])):
            writer.writerow([
                hist["epoch"][i],
                hist["loss"][i],
                hist[f"hr@{top_k}"][i],
                hist[f"ndcg@{top_k}"][i],
                hist["alpha"][i],
                hist["beta"][i],
            ])
    # —— 绘图：Loss ——

    plt.figure()
    plt.plot(hist["epoch"], hist["loss"])
    plt.xlabel("Epoch"); plt.ylabel("In-Batch CE Loss"); plt.title("Training Loss")
    plt.grid(True, linestyle="--", alpha=0.4); plt.tight_layout()
    plt.xticks(range(1, max(hist["epoch"]) + 1, 1))
    fig1_path = os.path.join(record_path,"validation mode" if val_mode else "train mode","curve_loss.png")
    os.makedirs(os.path.dirname(fig1_path), exist_ok=True)  # 确保目录存在

    plt.savefig(fig1_path, dpi=150); plt.close()
    print(f"Saved {fig1_path}")

    # —— 绘图：HR/NDCG ——
    plt.figure()
    plt.plot(hist["epoch"], hist[f"hr@{top_k}"], label=f"HR@{top_k}")
    plt.plot(hist["epoch"], hist[f"ndcg@{top_k}"], label=f"NDCG@{top_k}")
    plt.xlabel("Epoch"); plt.ylabel("Metric"); plt.title("Validation Metrics")
    plt.legend(); plt.grid(True, linestyle="--", alpha=0.4); plt.tight_layout()
    plt.xticks(range(1, max(hist["epoch"]) + 1, 1))
    fig2_path = os.path.join(record_path,"validation mode" if val_mode else "train mode","curve_metrics.png")
    os.makedirs(os.path.dirname(fig2_path), exist_ok=True)  # 确保目录存在
    plt.savefig(fig2_path, dpi=150); plt.close()
    print(f"Saved {fig2_path}")

    # —— 绘图：alpha/beta（如存在） ——
    if not all(math.isnan(v) for v in hist["alpha"]) or not all(math.isnan(v) for v in hist["beta"]):
        plt.figure()
        if not all(math.isnan(v) for v in hist["alpha"]):
            plt.plot(hist["epoch"], hist["alpha"], label="alpha (item late)")
        if not all(math.isnan(v) for v in hist["beta"]):
            plt.plot(hist["epoch"], hist["beta"],  label="beta (user late)")
        plt.xlabel("Epoch"); plt.ylabel("Gate (sigmoid)"); plt.title("Late Fusion Gates")
        plt.ylim(0, 1); plt.legend(); plt.grid(True, linestyle="--", alpha=0.4); plt.tight_layout()
        plt.xticks(range(1, max(hist["epoch"]) + 1, 1))
        fig3_path = os.path.join(record_path,"validation mode" if val_mode else "train mode","curve_alpha_beta.png")
        os.makedirs(os.path.dirname(fig3_path), exist_ok=True)  # 确保目录存在
        plt.savefig(fig3_path, dpi=150); plt.close()
        print(f"Saved {fig3_path}")

    print(f"Best {monitor_key}={best_metric:.4f} at epoch {best_epoch}")
    return

In [12]:
model = DSSM(num_users,num_items)
model.to(device)
train_model(model=model,epochs=EPOCHS, train_df=train_df,val_df=val_df,batch_size=BATCH_SIZE,top_k=TOP_K,lr=LR,val_mode=True)
model = DSSM(num_users,num_items)
model.to(device)
train_model(model=model,epochs=EPOCHS, train_df=train_all_df,val_df=test_df,batch_size=BATCH_SIZE,top_k=TOP_K,lr=LR,val_mode=False)

[EarlyStopping] monitor=hr@10 , patience=5




[Epoch 01/50] avg InBatch Softmax Loss = 9.7129, HR@10 = 0.0004, NDCG@10 = 0.0002, alpha=NA, beta=NA, time = 6.55s
current best hr@10=0.0004 @ epoch 1.
Best result info saved to ./records/validation mode/best_result.txt
[Epoch 02/50] avg InBatch Softmax Loss = 8.1679, HR@10 = 0.0005, NDCG@10 = 0.0002, alpha=NA, beta=NA, time = 3.26s
current best hr@10=0.0005 @ epoch 2.
Best result info saved to ./records/validation mode/best_result.txt
[Epoch 03/50] avg InBatch Softmax Loss = 7.4777, HR@10 = 0.0004, NDCG@10 = 0.0003, alpha=NA, beta=NA, time = 2.90s
[Epoch 04/50] avg InBatch Softmax Loss = 7.1383, HR@10 = 0.0006, NDCG@10 = 0.0003, alpha=NA, beta=NA, time = 2.98s
current best hr@10=0.0006 @ epoch 4.
Best result info saved to ./records/validation mode/best_result.txt
[Epoch 05/50] avg InBatch Softmax Loss = 6.8930, HR@10 = 0.0042, NDCG@10 = 0.0018, alpha=NA, beta=NA, time = 3.00s
current best hr@10=0.0042 @ epoch 5.
Best result info saved to ./records/validation mode/best_result.txt
[Epoc

In [13]:
model.save_embeddings(num_users=num_users,num_items=num_items,device=device,save_dir=save_dir)

In [14]:
test_loader = customdataset.build_test_loader(test_df, num_items ,user_col = user_id, item_col = item_id, batch_size=1024, num_workers=NUM_WORKERS)
item_pool = list(range(num_items))
faiss_index = faiss.read_index(f"{save_dir}/item_index.faiss")

In [15]:
hr_r, ndcg_r = evaluate.evaluate_random(test_loader, item_pool ,top_k=TOP_K)
print(f"Random HR@{TOP_K} = {hr_r:.4f}, NDCG@{TOP_K} = {ndcg_r:.4f}")
hr_p, ndcg_p = evaluate.evaluate_popular(test_loader, train_all_df,top_k=TOP_K)
print(f"Popular HR@{TOP_K} = {hr_p:.4f}, NDCG@{TOP_K} = {ndcg_p:.4f}")
hr_m, ndcg_m = evaluate.evaluate_model(test_loader, model, faiss_index, device,top_k=TOP_K)
print(f"Model   HR@{TOP_K} = {hr_m:.4f}, NDCG@{TOP_K} = {ndcg_m:.4f}")


Random HR@10 = 0.0005, NDCG@10 = 0.0002
Popular HR@10 = 0.0030, NDCG@10 = 0.0014
Model   HR@10 = 0.0240, NDCG@10 = 0.0102


In [16]:
from google.colab import drive
drive.mount('/content/drive')

# 挂载 Google Drive
drive.mount('/content/drive')
# 目标路径
target_dir = None
if(FUSION_MODE=="base"):
    target_dir = f"/content/drive/MyDrive/REC/{PROJECT_NAME}/{FUSION_MODE}/"
else:
    target_dir = f"/content/drive/MyDrive/REC/{PROJECT_NAME}/{FUSION_MODE}/{CURRENT_MODAL}"
# 创建目标路径（包含上层目录）
os.makedirs(target_dir, exist_ok=True)
# 复制 records 到目标路径
!cp -r /content/records "{target_dir}"
!rm -rf /content/records

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
