Cell 1: 环境设置与路径配置

功能说明：
将项目根目录添加到 Python 模块搜索路径中，确保后续能够正确加载项目内部模块。

In [13]:
# Cell 1: 环境设置与路径配置
import sys
import os
from pathlib import Path

# 项目根目录
project_path   = "/scratch/guanguowei/Code/MyWork/VIP5_Shadowcast_DPA"
# src/ 子模块目录
src_path       = os.path.join(project_path, "src")
# notebooks/ 目录（evaluate 模块就在这里）
notebooks_path = os.path.join(project_path, "notebooks")

# 1) 把 project_root、src/ 和 notebooks/ 都加入到 sys.path，
#    这样后续 import src.* 和 import evaluate.* 都能正常工作
for p in (project_path, src_path, notebooks_path):
    if p not in sys.path:
        sys.path.insert(0, p)

# 2) 切到项目根目录，这样后续 open("config_eval.yaml")、get_loader(data_root="data") 等都可以直接用相对路径
os.chdir(project_path)

print("→ Current working directory:", os.getcwd())
print("→ First entries in sys.path:", sys.path[:3])


→ Current working directory: /scratch/guanguowei/Code/MyWork/VIP5_Shadowcast_DPA
→ First entries in sys.path: ['/scratch/guowei/Code/VIP5/transformers', '/scratch/guanguowei/Code/MyWork/VIP5_Shadowcast_DPA/notebooks', '/scratch/guanguowei/Code/MyWork/VIP5_Shadowcast_DPA/src']


Cell 2: 导入依赖库与模块

功能说明：
导入所有需要的第三方库和项目内部模块。注意部分模块（如 P5Tokenizer）在后续 cell 中会用到。

In [14]:
# Cell 2: 导入依赖库与模块
import collections
import random
import re
import os
import logging
import shutil
import time
from pathlib import Path
from packaging import version
from collections import defaultdict

from tqdm import tqdm
import numpy as np
import gzip
import torch
import torch.nn as nn
from torch.nn.parallel import DistributedDataParallel as DDP
import torch.distributed as dist
import torch.backends.cudnn as cudnn

# 导入项目内部模块
from src.param import parse_args
from src.utils import LossMeter, load_state_dict, set_global_logging_level
from src.dist_utils import reduce_dict
from transformers import T5Tokenizer
from src.tokenization import P5Tokenizer
from src.model import VIP5Tuning
from src.trainer_base import TrainerBase

# 判断是否使用 native AMP 或 Apex
_use_native_amp = False
_use_apex = False
if version.parse(torch.__version__) < version.parse("1.6"):
    from transormers.file_utils import is_apex_available
    if is_apex_available():
        from apex import amp
    _use_apex = True
else:
    _use_native_amp = True
    from torch.cuda.amp import autocast

print("所有依赖库已导入")


所有依赖库已导入


Cell 3: 定义辅助函数

功能说明：
定义常用的辅助函数，如 pickle、json 的加载函数，以及文件读取函数等，方便后续调用。

In [15]:
# Cell 3: 定义辅助函数
import pickle
import json

def load_pickle(filename):
    with open(filename, "rb") as f:
        return pickle.load(f)

def save_pickle(data, filename):
    with open(filename, "wb") as f:
        pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
        
def load_json(file_path):
    with open(file_path, "r") as f:
        return json.load(f)
    
def ReadLineFromFile(path):
    lines = []
    with open(path, 'r') as fd:
        for line in fd:
            lines.append(line.rstrip('\n'))
    return lines

def parse(path):
    g = gzip.open(path, 'r')
    for l in g:
        yield eval(l)

print("辅助函数定义完成")


辅助函数定义完成


Cell 4: 定义 DotDict 类及参数设置

功能说明：
定义一个 DotDict 类，使得可以通过属性方式访问字典中的值；并设置所有实验参数、随机种子等，保证实验结果可复现。

In [17]:
# Cell 4: 设置参数与随机种子
import re
import yaml
from pathlib import Path
import torch
import random
import numpy as np
import torch.backends.cudnn as cudnn

class DotDict(dict):
    def __init__(self, **kwds):
        super().__init__(**kwds)
        self.__dict__ = self
    def __repr__(self):
        return dict.__repr__(self)

# 构造参数对象
args = DotDict()

# ──── 1) checkpoint 路径 ────────────────────────────────────────────────
args.load = (
    "/scratch/guanguowei/Code/MyWork/VIP5_Shadowcast_DPA/snap/toys/0509/NoAttack_0.0_toys-vitb32-2-8-20/BEST_EVAL_LOSS.pth"
)

# ──── 2) 自动解析 attack_mode / mr / split / feat / size_ratio / reduction / epoch ─────────
ckpt_folder = Path(args.load).parent.name
# 现在 ckpt_folder = "NoAttack_0.0_toys-vitb32-2-8-20"
mode, mr_str, rest = ckpt_folder.split("_", 2)
args.attack_mode = mode
args.mr = float(mr_str)

# rest 里再拆： dataset=toys, img_feat=vitb32, size_ratio=2, reduction=8, epoch=20
dataset, img_feat, size_ratio, reduction, epoch = rest.split("-")
args.split = dataset
args.train = args.valid = args.test = dataset

args.image_feature_type       = img_feat
args.image_feature_size_ratio = int(size_ratio)
args.reduction_factor         = int(reduction)
args.epoch                    = int(epoch)

print("✔️ Parsed from checkpoint path:")
print(f"  attack_mode={args.attack_mode}, mr={args.mr}")
print(f"  split={args.split}, feat={args.image_feature_type},",
      f"size_ratio={args.image_feature_size_ratio},",
      f"reduction={args.reduction_factor}, epoch={args.epoch}")

# ──── 3) 写临时 config_eval.yaml，供 VIP5_Dataset 读取 ──────────────────────────────
cfg = {"experiment": {"suffix": args.attack_mode, "mr": args.mr}}
with open("config_eval.yaml", "w") as f:
    yaml.safe_dump(cfg, f)
print("✔️ Wrote temporary config_eval.yaml:", cfg)

# ──── 4) 其余静态参数 ────────────────────────────────────────────────
args.distributed = False
args.multiGPU    = True
args.fp16        = True

args.batch_size      = 16
args.optim           = 'adamw'
args.warmup_ratio    = 0.1
args.lr              = 1e-3
args.num_workers     = 4
args.clip_grad_norm  = 5.0
args.losses          = 'sequential,direct,explanation'
args.backbone        = 't5-small'
args.comment         = ''    # ← 新增
args.local_rank      = 0         # ← 在这里新增

# 数据集目标物品（你现有的逻辑）
args.data_target = {}
for ds in ["beauty", "clothing", "sports", "toys"]:
    path = (
        f"/scratch/guanguowei/Code/MyWork/VIP5_Shadowcast_DPA/"
        f"analysis/results/{ds}/low_pop_items_{ds}_lowcount_1.txt"
    )
    ids = []
    with open(path, "r", encoding="utf-8") as fin:
        for line in fin:
            m = re.search(r"\(ID:\s*(\d+)\)", line)
            if m:
                ids.append(int(m.group(1)))
    if not ids:
        raise RuntimeError(f"{ds} 没有解析到任何 ID，请检查 {path}")
    args.data_target[ds] = ids

# 模型＋视觉特征
args.use_adapter            = True
args.use_single_adapter     = True
args.use_vis_layer_norm     = True
args.add_adapter_cross_attn = True
args.use_lm_head_adapter    = True

# 文本长度／dropout／tokenizer
args.tokenizer               = 'p5'
args.max_text_length         = 1024
args.gen_max_length          = 64
args.do_lower_case           = False
args.dropout                 = 0.1
args.weight_decay            = 0.01
args.adam_eps                = 1e-6
args.gradient_accumulation_steps = 1

# 随机种子
args.seed = 2022
torch.manual_seed(args.seed)
random.seed(args.seed)
np.random.seed(args.seed)

# Whole Word & Category Embedding
args.whole_word_embed = True
args.category_embed   = True

# cudnn & GPU
cudnn.benchmark     = True
args.world_size     = torch.cuda.device_count()

# 损失名称列表
LOSSES_NAME = [f'{n}_loss' for n in args.losses.split(',')] + ['total_loss']
args.LOSSES_NAME = LOSSES_NAME

print("✔️ 完整 args 配置：")
print(args)


✔️ Parsed from checkpoint path:
  attack_mode=NoAttack, mr=0.0
  split=toys, feat=vitb32, size_ratio=2, reduction=8, epoch=20
✔️ Wrote temporary config_eval.yaml: {'experiment': {'suffix': 'NoAttack', 'mr': 0.0}}
✔️ 完整 args 配置：
{'load': '/scratch/guanguowei/Code/MyWork/VIP5_Shadowcast_DPA/snap/toys/0509/NoAttack_0.0_toys-vitb32-2-8-20/BEST_EVAL_LOSS.pth', 'attack_mode': 'NoAttack', 'mr': 0.0, 'split': 'toys', 'train': 'toys', 'valid': 'toys', 'test': 'toys', 'image_feature_type': 'vitb32', 'image_feature_size_ratio': 2, 'reduction_factor': 8, 'epoch': 20, 'distributed': False, 'multiGPU': True, 'fp16': True, 'batch_size': 16, 'optim': 'adamw', 'warmup_ratio': 0.1, 'lr': 0.001, 'num_workers': 4, 'clip_grad_norm': 5.0, 'losses': 'sequential,direct,explanation', 'backbone': 't5-small', 'comment': '', 'local_rank': 0, 'data_target': {'beauty': [2], 'clothing': [8], 'sports': [53], 'toys': [62]}, 'use_adapter': True, 'use_single_adapter': True, 'use_vis_layer_norm': True, 'add_adapter_cross

Cell 5: GPU设置与生成运行名称

功能说明：
指定使用的 GPU（手动设置），并构造一个运行名称（run_name），便于后续日志及保存结果区分。

In [18]:
# Cell 5: GPU设置与生成运行名称
# 功能：指定 GPU（手动设置），并构造一个运行名称

# 手动指定 GPU ID
gpu = 0
args.gpu = gpu
args.rank = gpu
print(f'Process Launching at GPU {gpu}')

# 设置当前 GPU 设备
torch.cuda.set_device(f'cuda:{gpu}')

# 构造运行名称
comments = []
dsets = []
if 'toys' in args.train:
    dsets.append('toys')
if 'beauty' in args.train:
    dsets.append('beauty')
if 'sports' in args.train:
    dsets.append('sports')
if 'clothing' in args.train:
    dsets.append('clothing')
comments.append(''.join(dsets))
if args.backbone:
    comments.append(args.backbone)
comments.append(''.join(args.losses.split(',')))
if args.comment != '':
    comments.append(args.comment)
comment = '_'.join(comments)

from datetime import datetime
current_time = datetime.now().strftime('%m%d')  # 例如 '0304'

if args.local_rank in [0, -1]:
    run_name = f'{current_time}_GPU{args.world_size}'
    if len(comments) > 0:
        run_name += f'_{comment}'
    args.run_name = run_name
    print("运行名称:", args.run_name)


Process Launching at GPU 0
运行名称: 0511_GPU4_toys_t5-small_sequentialdirectexplanation


Cell 6: 构建模型配置、Tokenizer 与模型

功能说明：
根据参数构建模型配置（config）、创建 Tokenizer，并加载预训练模型。
注意：由于 checkpoint 使用的是 T5Tokenizer，而我们调用 P5Tokenizer，所以会有警告信息，但功能不受影响。
另外，为了适配 adapter，需要将 config.d_model 赋值给 adapter_config。

In [19]:
# ──── Cell 6: 构建模型配置、Tokenizer 与模型 ────────────────────────
import re
from transformers import T5Config, T5Tokenizer
from adapters import AdapterConfig
from src.tokenization import P5Tokenizer
from src.model import VIP5Tuning

# ──── Monkey-patch：给 VIP5Tuning 增加一个 model 属性，指向自身 ─────────────
# 这样 VIP5.__init__ 里 self.model.shared 就能正常访问 self.shared
VIP5Tuning.model = property(lambda self: self)

def create_config(args):
    # 1) 从 backbone pretrained 拿到基础 config
    config = T5Config.from_pretrained(args.backbone)
    # 2) 把所有我们在 args 里写的字段都塞进 config
    for k, v in vars(args).items():
        setattr(config, k, v)
    config.non_linearity = "relu"

    # 3) 视觉特征维度映射
    dim_map = {
        'vitb32': 512, 'vitb16': 512, 'vitl14': 768,
        'rn50': 1024, 'rn101': 512,
    }
    config.feat_dim           = dim_map[args.image_feature_type]
    config.n_vis_tokens       = args.image_feature_size_ratio
    config.use_vis_layer_norm = args.use_vis_layer_norm
    config.reduction_factor   = args.reduction_factor

    # 4) Adapter 相关开关
    config.use_adapter            = args.use_adapter
    config.add_adapter_cross_attn = args.add_adapter_cross_attn
    config.use_lm_head_adapter    = args.use_lm_head_adapter
    config.use_single_adapter     = args.use_single_adapter
    config.dropout_rate           = args.dropout
    config.attention_dropout      = args.dropout
    config.activation_dropout     = args.dropout
    config.losses                 = args.losses

    if args.use_adapter:
        tasks = re.split("[, ]+", args.losses)
        adapter_cfg = AdapterConfig()
        adapter_cfg.tasks             = tasks
        adapter_cfg.d_model           = config.d_model
        adapter_cfg.use_single_adapter= args.use_single_adapter
        adapter_cfg.reduction_factor  = args.reduction_factor
        adapter_cfg.track_z           = False
        config.adapter_config        = adapter_cfg
    else:
        config.adapter_config = None

    return config

def create_tokenizer(args):
    # 根据 args.tokenizer 决定用 P5Tokenizer 还是 T5Tokenizer
    if 'p5' in args.tokenizer:
        tok_cls = P5Tokenizer
    else:
        tok_cls = T5Tokenizer

    tokenizer = tok_cls.from_pretrained(
        args.backbone,
        max_length=args.max_text_length,
        do_lower_case=args.do_lower_case
    )
    print("Tokenizer:", tok_cls, args.backbone)
    return tokenizer

def create_model(args, config):
    # 用 from_pretrained 搭好所有底层组件（包括 self.shared, self.encoder, adapter 层…）
    print(f"→ 正在从预训练模型 '{args.backbone}' 初始化 VIP5Tuning 结构…")
    model = VIP5Tuning.from_pretrained(
        args.backbone,
        config=config
    )
    return model

# —— 真·执行三步 —— 
config    = create_config(args)
tokenizer = create_tokenizer(args)
model     = create_model(args, config).cuda()

# 如果用的是 P5Tokenizer，就扩增词表
if 'p5' in args.tokenizer:
    model.resize_token_embeddings(tokenizer.vocab_size)

# 挂上 tokenizer，方便后续 decode
model.tokenizer = tokenizer

print("✔️ 模型结构与 Tokenizer 初始化完成，下一步 Cell 7 再加载你的 .pth 权重")


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'T5Tokenizer'. 
The class this function is called from is 'P5Tokenizer'.


Tokenizer: <class 'src.tokenization.P5Tokenizer'> t5-small
→ 正在从预训练模型 't5-small' 初始化 VIP5Tuning 结构…
JointEncoder initialized successfully.
T5Stack initialized successfully.


Some weights of VIP5Tuning were not initialized from the model checkpoint at t5-small and are newly initialized: ['decoder.block.5.layer.0.attn_adapter.adapters.explanation.down_sampler.bias', 'encoder.block.2.layer.0.attn_adapter.adapters.direct.up_sampler.bias', 'decoder.block.4.layer.0.attn_adapter.adapters.sequential.down_sampler.weight', 'decoder.block.5.layer.2.ff_adapter.adapters.sequential.down_sampler.weight', 'decoder.block.1.layer.2.ff_adapter.adapters.explanation.down_sampler.bias', 'decoder.block.5.layer.2.ff_adapter.adapters.direct.down_sampler.bias', 'decoder.block.1.layer.2.ff_adapter.adapters.sequential.down_sampler.bias', 'encoder.block.5.layer.1.ff_adapter.adapters.explanation.down_sampler.weight', 'encoder.block.0.layer.0.attn_adapter.adapters.sequential.down_sampler.bias', 'encoder.block.1.layer.0.attn_adapter.adapters.direct.up_sampler.bias', 'decoder.block.3.layer.0.attn_adapter.adapters.explanation.up_sampler.bias', 'encoder.block.0.layer.0.attn_adapter.adapters

lm_head initialized successfully.
OutputParallelAdapterLayer initialized successfully.
AdapterConfig: AdapterConfig(add_layer_norm_before_adapter=False, add_layer_norm_after_adapter=False, non_linearity='gelu_new', reduction_factor=8)
✔️ 模型结构与 Tokenizer 初始化完成，下一步 Cell 7 再加载你的 .pth 权重


Cell 7: 加载预训练模型权重

功能说明：
从指定 checkpoint 路径加载预训练模型权重，并打印加载结果。

In [20]:
# Cell 7: 加载预训练模型权重
from pprint import pprint
from src.utils import load_state_dict

def load_checkpoint(ckpt_path):
    if not ckpt_path.endswith('.pth'):
        ckpt_path += '.pth'
    print(f"📥 Loading checkpoint from: {ckpt_path}")
    state_dict = load_state_dict(ckpt_path, 'cpu')
    res = model.load_state_dict(state_dict, strict=False)
    print("ℹ️  load_state_dict 结果：")
    pprint(res)

# 直接用 Cell 4 里设置好的 args.load
load_checkpoint(args.load)


📥 Loading checkpoint from: /scratch/guanguowei/Code/MyWork/VIP5_Shadowcast_DPA/snap/toys/0509/NoAttack_0.0_toys-vitb32-2-8-20/BEST_EVAL_LOSS.pth
ℹ️  load_state_dict 结果：
_IncompatibleKeys(missing_keys=['output_adapter.adapter.down_sampler.weight', 'output_adapter.adapter.down_sampler.bias', 'output_adapter.adapter.up_sampler.weight', 'output_adapter.adapter.up_sampler.bias'], unexpected_keys=[])


Cell 8: 加载数据集及数据映射

功能说明：
加载数据分割文件（如 rating_splits_augmented.pkl）以及数据映射文件（datamaps.json），用于后续评估。

In [21]:
# Cell 8: 加载数据集及数据映射
# 功能：加载 rating_splits_augmented.pkl 和 datamaps.json 数据文件


data_splits = load_pickle(f'data/{args.split}/rating_splits_augmented.pkl')
test_review_data = data_splits['test']
print("Test data长度:", len(test_review_data))
print("Test data示例:", test_review_data[0])

data_maps = load_json(os.path.join('data', args.split, 'datamaps.json'))
print("用户数量:", len(data_maps['user2id']))
print("物品数量:", len(data_maps['item2id']))


Test data长度: 16759
Test data示例: {'reviewerID': 'A5K3CK2PWYQ7O', 'asin': 'B00F4CFEYG', 'reviewerName': 'Ellie "mittbooks"', 'helpful': [0, 0], 'reviewText': "I've found the Melissa & Doug brand to be overall good, although there are occasional negatives.  This is definitely one of the toys we'll mark a &#34;winner.&#34;  The vacuum comes in two pieces that require minimal assembly (the long handle and the base need to be put together - no tools required).  The height is perfect for our two year old who is 3 feet tall.  The top part moves at about a 45 degree angle to facilitate little people pushing the vacuum.  I'm not sure how long the six wooden pieces of &#34;trash&#34; will last.  Although not tiny, they would be easy to lose.  The vacuum does a good job of picking them up easily and there is a small area in the back of the base to take them out again.  There is also a rotating knob on the front of the handle that makes a good clicking noise when it moves.  Our son is truly enjoyin

Cell 9: 加载数据生成器与评价指标

功能说明：
导入数据加载函数和评价指标函数，为后续评估生成数据加载器和计算 BLEU/ROUGE 等指标。

In [22]:
# Cell 9: 导入数据加载器与评价指标函数
# 功能：导入 get_loader、BLEU、ROUGE 等评价指标函数

from torch.utils.data import DataLoader, Dataset, Sampler
from src.data import get_loader
from evaluate.utils import rouge_score, bleu_score, unique_sentence_percent, root_mean_square_error, mean_absolute_error, feature_detect, feature_matching_ratio, feature_coverage_ratio, feature_diversity
from evaluate.metrics4rec import evaluate_all

print("数据加载器与评价指标函数已导入")


数据加载器与评价指标函数已导入


Cell 10: Evaluation - Explanation 任务

功能说明：
加载 explanation 任务的数据生成器，调用模型生成输出，并计算 BLEU、ROUGE 指标。

In [15]:
print("Loading checkpoint from:", args.load)

# =============================================================================
# Cell 10: Evaluation - Explanation 任务（带 Prompt 信息）
# =============================================================================

import os
from datetime import datetime
from pathlib import Path
from tqdm import tqdm
import torch

# 如果 args.load 不为空，则从其中提取日期，否则使用当前日期
if args.load is not None:
    eval_date = Path(args.load).parents[1].name
else:
    eval_date = datetime.now().strftime("%m%d")

# 指定 Explanation 任务的 prompt 及样本数量
exp_prompt = 'C-12'  # 可修改为 'C-12', 'C-3' 等所需的 prompt 编号
test_task_list = {'explanation': [exp_prompt]}
test_sample_numbers = {'sequential': (1, 1), 'direct': (1, 1), 'explanation': 1}

# 获取 Explanation 任务的测试数据加载器
zeroshot_test_loader = get_loader(
    args,
    test_task_list,
    test_sample_numbers,
    split=args.test, 
    mode='test', 
    batch_size=args.batch_size,
    workers=args.num_workers,
    distributed=args.distributed,
    data_root="data",
    feature_root="features"
)
print(f"Explanation 任务 (Prompt: {exp_prompt}) 数据量:", len(zeroshot_test_loader))

tokens_predict = []
tokens_test = []

# 遍历测试数据加载器，调用模型生成预测结果
for _, batch in tqdm(enumerate(zeroshot_test_loader), total=len(zeroshot_test_loader), ncols=100):
    with torch.no_grad():
        results = model.generate_step(batch)
        tokens_predict.extend(results)
        tokens_test.extend(batch['target_text'])

# 计算 BLEU 与 ROUGE 指标
BLEU1 = bleu_score(tokens_test, tokens_predict, n_gram=1, smooth=False)
BLEU4 = bleu_score(tokens_test, tokens_predict, n_gram=4, smooth=False)
ROUGE = rouge_score(tokens_test, tokens_predict)

print(f'BLEU-1 {BLEU1:7.4f}, BLEU-4 {BLEU4:7.4f}')
for k, v in ROUGE.items():
    print(f'{k} {v:7.4f}')

# 构建保存评估结果的目录和文件名
eval_dir = Path("/scratch/guanguowei/Code/MyWork/VIP5_Shadowcast_DPA/log") \
           / args.split / eval_date / "evaluation_logs"
eval_dir.mkdir(parents=True, exist_ok=True)

# 保持与训练时相同的 base_name
suffix = args.attack_mode
mr = args.mr
dataset = args.split
img_feat = args.image_feature_type
reduction = args.reduction_factor
epoch = args.epoch
base_name = f"{suffix}_{mr}_VIP5_{dataset}_{img_feat}_{reduction}_{epoch}"

explanation_filename = f"{base_name}_eval_explanation_{exp_prompt}.txt"
explanation_log_path = eval_dir / explanation_filename

# 保存评估结果，文件头加入 Dataset、AttackMode 和 MaliciousRatio
with open(explanation_log_path, "w", encoding="utf-8") as f:
    f.write("Explanation Evaluation Results\n")
    f.write(f"Dataset: {dataset}\n")
    f.write(f"AttackMode: {args.attack_mode}\n")
    f.write(f"MaliciousRatio: {args.mr}\n")
    f.write(f"Prompt: {exp_prompt}\n\n")
    f.write(f"BLEU-1: {BLEU1:7.4f}\n")
    f.write(f"BLEU-4: {BLEU4:7.4f}\n")
    for k, v in ROUGE.items():
        f.write(f"{k}: {v:7.4f}\n")

print(f"Explanation 任务 (Prompt: {exp_prompt}) 评价结果已保存至: {explanation_log_path}")


Loading checkpoint from: /scratch/guanguowei/Code/MyWork/VIP5_Shadowcast_DPA/snap/toys/0509/DirectBoostingAttack_0.1_toys-vitb32-2-8-20/BEST_EVAL_LOSS.pth


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'T5Tokenizer'. 
The class this function is called from is 'P5Tokenizer'.


[DEBUG] exp_splits_path = data/toys/poisoned/exp_splits_direct_boost_mr0.1.pkl
[DEBUG] seq_path        = data/toys/poisoned/sequential_data_direct_boost_mr0.1.txt
[DEBUG] idx_path        = data/toys/poisoned/user_id2idx_direct_boost_mr0.1.pkl
[DEBUG] name_path       = data/toys/poisoned/user_id2name_direct_boost_mr0.1.pkl
[DEBUG] Val/Test 模式下，剔除了 1941 条 fake 用户数据
[DEBUG] Explanation-only，动态构建了 6831 个用户映射
compute_datum_info
Explanation 任务 (Prompt: C-12) 数据量: 646


100%|█████████████████████████████████████████████████████████████| 646/646 [01:41<00:00,  6.37it/s]


BLEU-1 13.0810, BLEU-4  8.2130
rouge_1/f_score 27.5366
rouge_1/r_score 22.6278
rouge_1/p_score 45.0171
rouge_2/f_score  8.9422
rouge_2/r_score  7.5122
rouge_2/p_score 15.6873
rouge_l/f_score 21.1545
rouge_l/r_score 20.9009
rouge_l/p_score 41.8729
Explanation 任务 (Prompt: C-12) 评价结果已保存至: /scratch/guanguowei/Code/MyWork/VIP5_Shadowcast_DPA/log/toys/0509/evaluation_logs/DirectBoostingAttack_0.1_VIP5_toys_vitb32_8_20_eval_explanation_C-12.txt


Cell 11: Evaluation - Direct 任务

功能说明：
加载 direct 任务的测试数据，生成输出并计算评价指标。

In [23]:
# =============================================================================
# Cell 11: Evaluation - Direct 任务（带 Prompt 信息）
# =============================================================================

import os
from datetime import datetime
from pathlib import Path
from tqdm import tqdm
import torch


# 1. 确定 eval_date：若从 checkpoint 恢复则取目录名，否则用当前日期
if args.load is not None:
    eval_date = Path(args.load).parents[1].name
else:
    eval_date = datetime.now().strftime("%m%d")

# 2. 指定 Direct 任务的 Prompt
test_task_list = {'direct': ['B-5']}  # 可选 'B-5' 或 'B-8'
prompt = test_task_list['direct'][0]

test_sample_numbers = {
    'sequential': (1, 1),
    'direct': (1, 1),
    'explanation': 1
}

# 3. 获取 Direct 测试 Loader
zeroshot_test_loader = get_loader(
    args,
    test_task_list,
    test_sample_numbers,
    split=args.test,
    mode='test',
    batch_size=args.batch_size,
    workers=args.num_workers,
    distributed=args.distributed,
    data_root="data",
    feature_root="features"
)

print(f"攻击模式：{args.attack_mode}，恶意比例：{args.mr}")
print(f"Direct 任务 (Prompt: {prompt}) 数据量:", len(zeroshot_test_loader))

# 4. 收集所有样本的 GT 与模型输出
all_info = []
for _, batch in tqdm(enumerate(zeroshot_test_loader), total=len(zeroshot_test_loader)):
    with torch.no_grad():
        results = model.generate_step(batch)
        beam_outputs = model.generate(
            input_ids=batch['input_ids'].cuda(),
            whole_word_ids=batch['whole_word_ids'].cuda(),
            category_ids=batch['category_ids'].cuda(),
            vis_feats=batch['vis_feats'].cuda(),
            task=batch["task"][0],
            max_length=50,
            num_beams=20,
            no_repeat_ngram_size=0,
            num_return_sequences=20,
            early_stopping=True
        )
        generated_sents = model.tokenizer.batch_decode(beam_outputs, skip_special_tokens=True)

        # for j, (_, tgt_text, _) in enumerate(zip(results, batch['target_text'], batch['source_text'])):
        #     all_info.append({
        #         'target_item': tgt_text,
        #         'gen_item_list': generated_sents[j * 20: (j + 1) * 20]
        #     })
        for j, item in enumerate(zip(results, batch['target_text'], batch['source_text'])):
            new_info = {}
            new_info['target_item'] = item[1]
            new_info['gen_item_list'] = generated_sents[j*20: (j+1)*20]
            all_info.append(new_info)

# 5. 构造 GT 与评分字典
gt = {}
ui_scores = {}
for i, info in enumerate(all_info):
    gt[i] = [int(info['target_item'])]
    pred_dict = {}
    for j in range(len(info['gen_item_list'])):
        try:
            pred_dict[int(info['gen_item_list'][j])] = -(j+1)
        except:
            pass
    ui_scores[i] = pred_dict

# 6. 定义用于 ER@K 的目标集合
targeted_items = args.data_target[args.split] 

# 7. 计算指标 + ER@K
msg1, res1 = evaluate_all(ui_scores, gt, targeted_items, 1)
msg5, res5 = evaluate_all(ui_scores, gt, targeted_items, 5)
msg10, res10 = evaluate_all(ui_scores, gt,targeted_items, 10)


# print("\nMetrics @1:")
# print(msg1)
# # print(f"ER@1: {res1['er']:.4f}")
# print("\nMetrics @5:")
# print(msg5)
# # print(f"ER@5: {res5['er']:.4f}")
# print("\nMetrics @10:")
# print(msg10)
# # print(f"ER@10: {res10['er']:.4f}")

# 8. 保存结果目录
eval_dir = Path("/scratch/guanguowei/Code/MyWork/VIP5_Shadowcast_DPA/log") \
           / args.split / eval_date / "evaluation_logs"
eval_dir.mkdir(parents=True, exist_ok=True)

# 9. 生成与训练时一致的 base_name，并组装文件名（改为 eval_direct）
suffix = args.attack_mode          # e.g. DirectBoostingAttack
mr = args.mr                       # e.g. 0.1
dataset = args.split               # e.g. toys
img_feat = args.image_feature_type # e.g. t5-small
reduction = args.reduction_factor  # e.g. 8
epoch = args.epoch                 # e.g. 20
base_name = f"{suffix}_{mr}_VIP5_{dataset}_{img_feat}_{reduction}_{epoch}"

direct_filename = f"{base_name}_eval_direct_{prompt}.txt"
direct_log_path = eval_dir / direct_filename

# 10. 写入文件，开头加入 Direct Evaluation Results 与 Dataset
with open(direct_log_path, "w", encoding="utf-8") as f:
    f.write("Direct Evaluation Results\n")
    f.write(f"Dataset: {dataset}\n\n")
    f.write(f"Prompt: {prompt}\n")
    f.write(f"AttackMode: {args.attack_mode}\n")
    f.write(f"MaliciousRatio: {args.mr}\n\n")
    f.write("=== Metrics @1 ===\n")
    f.write(msg1 + "\n")
    #f.write(f"ER@1: {res1['er']:.4f}\n\n")
    f.write("=== Metrics @5 ===\n")
    f.write(msg5 + "\n")
    #f.write(f"ER@5: {res5['er']:.4f}\n\n")
    f.write("=== Metrics @10 ===\n")
    f.write(msg10 + "\n")
    #f.write(f"ER@10: {res10['er']:.4f}\n")

print(f"Direct 结果已保存至: {direct_log_path}")


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'T5Tokenizer'. 
The class this function is called from is 'P5Tokenizer'.


[DEBUG] exp_splits_path = data/toys/exp_splits.pkl
[DEBUG] seq_path        = data/toys/sequential_data.txt
[DEBUG] idx_path        = data/toys/user_id2idx.pkl
[DEBUG] name_path       = data/toys/user_id2name.pkl
[WARN] NoAttack 模式下，动态构建了 26243 个用户映射
compute_datum_info
攻击模式：NoAttack，恶意比例：0.0
Direct 任务 (Prompt: B-5) 数据量: 1214


100%|██████████| 1214/1214 [16:22<00:00,  1.24it/s]



NDCG@1	Rec@1	Hits@1	Prec@1	MAP@1	MRR@1	ER@1
0.0491	0.0491	0.0491	0.0491	0.0491	0.0491	0.0000

NDCG@5	Rec@5	Hits@5	Prec@5	MAP@5	MRR@5	ER@5
0.1028	0.1554	0.1554	0.0311	0.0856	0.0856	0.0000

NDCG@10	Rec@10	Hits@10	Prec@10	MAP@10	MRR@10	ER@10
0.1314	0.2447	0.2447	0.0245	0.0972	0.0972	0.0000
Direct 结果已保存至: /scratch/guanguowei/Code/MyWork/VIP5_Shadowcast_DPA/log/toys/0509/evaluation_logs/NoAttack_0.0_VIP5_toys_vitb32_8_20_eval_direct_B-5.txt


Cell 12: Evaluation - Sequential 任务

功能说明：
加载 sequential 任务的测试数据，生成输出并计算评价指标，同时对 beam search 结果进行解码。

In [None]:
# =============================================================================
# Cell 12: Evaluation - Sequential 任务（带 Prompt 信息）
# =============================================================================

import os
from datetime import datetime
from pathlib import Path
from tqdm import tqdm
import torch

# 如果 args.load 不为空，则从 load 路径中提取日期，否则使用当前日期
if args.load is not None:
    eval_date = Path(args.load).parents[1].name
else:
    eval_date = datetime.now().strftime("%m%d")

# 指定 Sequential 任务的 prompt 及样本数量
test_task_list = {'sequential': ['A-3']} # A-3 or A-9
prompt = test_task_list['sequential'][0]
test_sample_numbers = {'sequential': (1, 1), 'direct': (1, 1), 'explanation': 1}

# 获取 Sequential 任务的测试数据加载器
zeroshot_test_loader = get_loader(
    args,
    test_task_list,
    test_sample_numbers,
    split=args.test,
    mode='test',
    batch_size=args.batch_size,
    workers=args.num_workers,
    distributed=args.distributed,
    data_root="data",
    feature_root="features"
)

print(f"攻击模式：{args.attack_mode}，恶意比例：{args.mr}")
print(f"Sequential 任务 (Prompt: {prompt}) 数据量:", len(zeroshot_test_loader))

# 生成候选并收集结果
all_info = []
for _, batch in tqdm(enumerate(zeroshot_test_loader), total=len(zeroshot_test_loader), ncols=100):
    with torch.no_grad():
        # 单次生成
        results = model.generate_step(batch)
        # Beam search 多样本生成
        beam_outputs = model.generate(
            input_ids=batch['input_ids'].cuda(),
            whole_word_ids=batch['whole_word_ids'].cuda(),
            category_ids=batch['category_ids'].cuda(),
            vis_feats=batch['vis_feats'].cuda(),
            task=batch["task"][0],
            max_length=50,
            num_beams=20,
            no_repeat_ngram_size=0,
            num_return_sequences=20,
            early_stopping=True
        )
        generated_sents = model.tokenizer.batch_decode(beam_outputs, skip_special_tokens=True)

        for j, item in enumerate(zip(results, batch['target_text'], batch['source_text'])):
            new_info = {}
            new_info['target_item'] = item[1]
            new_info['gen_item_list'] = generated_sents[j*20: (j+1)*20]
            all_info.append(new_info)

# 构造 GT 与评分字典
gt = {}
ui_scores = {}
for i, info in enumerate(all_info):
    gt[i] = [int(info['target_item'])]
    pred_dict = {}
    for j in range(len(info['gen_item_list'])):
        try:
            pred_dict[int(info['gen_item_list'][j])] = -(j+1)
        except:
            pass
    ui_scores[i] = pred_dict

# 定义目标集合 & 计算指标
targeted_items = args.data_target[args.split]  # 目标物品列表

msg1, res1 = evaluate_all(ui_scores, gt, targeted_items, 1)
msg5, res5 = evaluate_all(ui_scores, gt, targeted_items, 5)
msg10, res10 = evaluate_all(ui_scores, gt, targeted_items, 10)

# print("\nMetrics @1:", msg1, f"ER@1: {res1['er']:.4f}")
# print("Metrics @5:", msg5, f"ER@5: {res5['er']:.4f}")
# print("Metrics @10:", msg10, f"ER@10: {res10['er']:.4f}")

# 构建保存目录
eval_dir = Path("/scratch/guanguowei/Code/MyWork/VIP5_Shadowcast_DPA/log") \
           / args.split / eval_date / "evaluation_logs"
eval_dir.mkdir(parents=True, exist_ok=True)

# 文件名保持与训练一致的前缀，然后替换为 eval_sequential
suffix = args.attack_mode
mr = args.mr
dataset = args.split
img_feat = args.image_feature_type
reduction = args.reduction_factor
epoch = args.epoch
base_name = f"{suffix}_{mr}_VIP5_{dataset}_{img_feat}_{reduction}_{epoch}"

sequential_filename = f"{base_name}_eval_sequential_{prompt}.txt"
sequential_log_path = eval_dir / sequential_filename

# 写入文件，开头加上 Dataset、AttackMode、MaliciousRatio 和 Prompt
with open(sequential_log_path, "w", encoding="utf-8") as f:
    f.write("Sequential Evaluation Results\n")
    f.write(f"Dataset: {dataset}\n")
    f.write(f"Prompt: {prompt}\n")
    f.write(f"AttackMode: {args.attack_mode}\n")
    f.write(f"MaliciousRatio: {args.mr}\n")
    f.write(f"Prompt: {prompt}\n\n")
    f.write("=== Metrics @1 ===\n")
    f.write(msg1 + "\n")
    #f.write(f"ER@1: {res1['er']:.4f}\n\n")
    f.write("=== Metrics @5 ===\n")
    f.write(msg5 + "\n")
    #f.write(f"ER@5: {res5['er']:.4f}\n\n")
    f.write("=== Metrics @10 ===\n")
    f.write(msg10 + "\n")
    #f.write(f"ER@10: {res10['er']:.4f}\n")

print(f"Sequential 结果已保存至: {sequential_log_path}")


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'T5Tokenizer'. 
The class this function is called from is 'P5Tokenizer'.


[DEBUG] exp_splits_path = data/toys/exp_splits.pkl
[DEBUG] seq_path        = data/toys/sequential_data.txt
[DEBUG] idx_path        = data/toys/user_id2idx.pkl
[DEBUG] name_path       = data/toys/user_id2name.pkl
[WARN] NoAttack 模式下，动态构建了 26243 个用户映射
compute_datum_info
攻击模式：NoAttack，恶意比例：0.0
Sequential 任务 (Prompt: A-3) 数据量: 1214


100%|███████████████████████████████████████████████████████████| 1214/1214 [16:12<00:00,  1.25it/s]



NDCG@1	Rec@1	Hits@1	Prec@1	MAP@1	MRR@1	ER@1
0.0720	0.0720	0.0720	0.0720	0.0720	0.0720	0.0000

NDCG@5	Rec@5	Hits@5	Prec@5	MAP@5	MRR@5	ER@5
0.0907	0.1075	0.1075	0.0215	0.0851	0.0851	0.0001

NDCG@10	Rec@10	Hits@10	Prec@10	MAP@10	MRR@10	ER@10
0.0962	0.1246	0.1246	0.0125	0.0874	0.0874	0.0002
Sequential 结果已保存至: /scratch/guanguowei/Code/MyWork/VIP5_Shadowcast_DPA/log/toys/0509/evaluation_logs/NoAttack_0.0_VIP5_toys_vitb32_8_20_eval_sequential_A-3.txt


: 