输出结果有误的代码

In [None]:
import torch
import numpy as np
from transformers import FlavaTextModel, FlavaProcessor
# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 1. ==========加载模型和分词器==========
model_name = "C:/Users/xinlong/Desktop/code/python/flava_use/model/facebook/flava-full"

model = FlavaTextModel.from_pretrained(model_name)
tokenizer = FlavaProcessor.from_pretrained(model_name)

model.eval()
model.to(device)
print(f"模型已加载至 {device}")
# 2. ==========准备输入文本==========
texts = [
    "一只函数的返回大幅改进企鹅瑞华企鹅舞i意见猫", 
    "一只猫和一啊但是发射点发射点只狗", 
    "as阿凡达发hpoerujhiopertfasfa", 
    "放噶撒旦发射覅殴打事件回顾i哦速度返回结果点"
]  # 示例输入，替换为你的m个文本

inputs = tokenizer(
    text=texts,
    return_tensors="pt",
    padding=True,
    truncation=True
).to(device)
# 3. ==========定义一个钩子函数来捕获中间值==========
q_list = [[] for _ in range(len(model.encoder.layer))]  # 存储每一层的Q
k_list = [[] for _ in range(len(model.encoder.layer))]  # 存储每一层的K
ffn_output_list = [[] for _ in range(len(model.encoder.layer))]  # 存储每一层的FFN输出

def get_q_hook(layer_idx):
    def hook(module, input, output):
        q_list[layer_idx].append(output.detach().cpu().numpy())
    return hook
    

def get_k_hook(layer_idx):
    def hook(module, input, output):
        k_list[layer_idx].append(output.detach().cpu().numpy())
    return hook

def get_ffn_hook(layer_idx):
    def hook(module, input, output):
        ffn_output_list[layer_idx].append(output.detach().cpu().numpy())
    return hook
# 4. ==========注册钩子函数==========
for i, layer in enumerate(model.encoder.layer):
    layer.attention.attention.query.register_forward_hook(get_q_hook(i))
    layer.attention.attention.key.register_forward_hook(get_k_hook(i))
    layer.output.register_forward_hook(get_ffn_hook(i))
# 5. ==========获取模型输出==========
with torch.no_grad():
    outputs = model(**inputs)
# 6. ==========保存所有数据为NumPy数组==========
output_dir = "./flava_full_outputs"
import os
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
# 保存Q、K和FFN输出
for i in range(len(model.encoder.layer)):
    # 保存Q
    np.save(f'{output_dir}/q_layer_{i+1}.npy', np.array(q_list[i]))
    # 保存K
    np.save(f'{output_dir}/k_layer_{i+1}.npy', np.array(k_list[i]))
    # 保存FFN输出
    np.save(f'{output_dir}/ffn_output_layer_{i+1}.npy', np.array(ffn_output_list[i]))
print(f"所有中间值已保存至 {output_dir}")

DeepSeek提供的原始的代码

In [None]:
import torch
from transformers import FlavaModel

def capture_flava_activations(model):
    # 存储钩子的引用
    hooks = []
    
    # 存储各层的数据
    attention_data = {}  # 保存每层的Q和K
    ffn_outputs = {}     # 保存每层FFN的输出
    ffn_weights = {}     # 保存每层FFN的线性层权重

    # 遍历模型的encoder层（以image_model为例）
    encoder = model.image_model.encoder
    for layer_idx, layer in enumerate(encoder.layer):
        # 获取当前层的SelfAttention模块
        self_attention = layer.attention.attention
        
        # 注册钩子捕获Q和K
        def q_hook(module, input, output, idx=layer_idx):
            attention_data.setdefault(idx, {})['Q'] = output.detach()
        hook_q = self_attention.query.register_forward_hook(q_hook)
        hooks.append(hook_q)
        
        def k_hook(module, input, output, idx=layer_idx):
            attention_data.setdefault(idx, {})['K'] = output.detach()
        hook_k = self_attention.key.register_forward_hook(k_hook)
        hooks.append(hook_k)
        
        # 保存FFN的权重（中间层和输出层）
        intermediate_weights = layer.intermediate.dense.weight.data.clone()
        output_weights = layer.output.dense.weight.data.clone()
        ffn_weights[layer_idx] = {
            'intermediate': intermediate_weights,
            'output': output_weights
        }
        
        # 注册钩子捕获FFN的输出（FlavaOutput的输出）
        def ffn_hook(module, input, output, idx=layer_idx):
            ffn_outputs[idx] = output.detach()
        hook_ffn = layer.output.register_forward_hook(ffn_hook)
        hooks.append(hook_ffn)
    
    return hooks, attention_data, ffn_outputs, ffn_weights

# 使用示例
model = FlavaModel.from_pretrained("facebook/flava-full")  # 加载模型

# 注册钩子
hooks, attention_data, ffn_outputs, ffn_weights = capture_flava_activations(model)

# 准备输入数据（示例）
inputs = {
    "pixel_values": torch.randn(1, 3, 224, 224),  # 示例图像输入
    "input_ids": torch.randint(0, 30522, (1, 77)), # 示例文本输入
}

# 前向传播，触发钩子
outputs = model(**inputs)

# 移除钩子
for hook in hooks:
    hook.remove()

# 打印结果示例
print("Q values for layer 0:", attention_data[0]['Q'].shape)
print("FFN output for layer 0:", ffn_outputs[0].shape)
print("FFN intermediate weights shape:", ffn_weights[0]['intermediate'].shape)

修改的DeepSeek的代码

In [None]:
# 输出结果维度（层数，1，样本数，tokens数，hidden_size）
import torch
import numpy as np
from transformers import FlavaModel, FlavaProcessor


# 0. ==========定义钩子==========
def capture_flava_activations(model):
    # 存储钩子的引用
    hooks = []

    # 遍历模型的encoder层（以image_model为例）
    encoder = model.text_model.encoder
    
    # 存储各层的数据
    q_list = [[] for _ in range(len(encoder.layer))]  # 保存每层的Q和K
    k_list = [[] for _ in range(len(encoder.layer))]  # 保存每层的Q和K
    ffn_outputs = [[] for _ in range(len(encoder.layer))]    # 保存每层FFN的输出
    ffn_weights = {}     # 保存每层FFN的线性层权重


    for layer_idx, layer in enumerate(encoder.layer):
        # 获取当前层的SelfAttention模块
        self_attention = layer.attention.attention
        
        # 注册钩子捕获Q和K
        def q_hook(module, input, output, idx=layer_idx):
            q_list[idx].append(output.detach().cpu().numpy())
        hook_q = self_attention.query.register_forward_hook(q_hook)
        hooks.append(hook_q)
        
        def k_hook(module, input, output, idx=layer_idx):
            k_list[idx] = output.detach()
        hook_k = self_attention.key.register_forward_hook(k_hook)
        hooks.append(hook_k)
        
        # 保存FFN的权重（中间层和输出层）
        intermediate_weights = layer.intermediate.dense.weight.data.clone()
        output_weights = layer.output.dense.weight.data.clone()
        ffn_weights[layer_idx] = {
            'intermediate': intermediate_weights,
            'output': output_weights
        }
        
        # 注册钩子捕获FFN的输出（FlavaOutput的输出）
        def ffn_hook(module, input, output, idx=layer_idx):
            ffn_outputs[idx] = output.detach().cpu().numpy()
        hook_ffn = layer.output.register_forward_hook(ffn_hook)
        hooks.append(hook_ffn)
    
    return hooks, q_list, k_list, ffn_outputs, ffn_weights

if __name__ == "__main__":
    model_name = "C:/Users/xinlong/Desktop/code/python/flava_use/model/facebook/flava-full"

    # 设置设备
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # 1. ==========加载模型和分词器==========
    model = FlavaModel.from_pretrained(model_name)  # 加载模型
    tokenizer = FlavaProcessor.from_pretrained(model_name)

    model.eval()
    model.to(device)
    print(f"模型已加载至 {device}")


    # 2. ==========准备输入文本==========
    texts = [
        "一只函数的返回大幅改进企鹅瑞华企鹅舞i意见猫", 
        "一只猫和一啊但是发射点发射点只狗", 
        "as阿凡达发hpoerujhiopertfasfa", 
        "放噶撒旦发射覅殴打事件回顾i哦速度返回结果点"
    ]
    inputs = tokenizer(
        text=texts, 
        return_tensors="pt", 
        padding=True, 
        truncation=True
    ).to(device)


    # 3. ==========注册钩子==========
    hooks, q_list, k_list, ffn_outputs, ffn_weights = capture_flava_activations(model)


    # 4. ==========前向传播，触发钩子==========
    outputs = model(**inputs)


    # 5. ==========移除钩子==========
    for hook in hooks:
        hook.remove()

    # # 打印结果示例
    # print("Q values for layer 0:", attention_data[0]['Q'].shape)
    # print("FFN output for layer 0:", ffn_outputs[0].shape)
    # print("FFN intermediate weights shape:", ffn_weights[0]['intermediate'].shape)

    # # 6. ==========保存所有数据为NumPy数组==========
    # output_dir = "./flava_full_outputs"
    # import os
    # if not os.path.exists(output_dir):
    #     os.makedirs(output_dir)

    # # 保存Q、K和FFN输出
    # for i in range(len(model.text_model.encoder.layer)):
    #     # 保存Q
    #     np.save(f'{output_dir}/q_layer_{i+1}.npy', np.array(q_list[i]))

    # ffn_outputs_np = np.array(ffn_outputs)
    # print(ffn_outputs_np.shape)


    # import gc   # 清理内存
    # gc.collect()    # 清理内存
    # del q_list, k_list, ffn_outputs, ffn_weights
    # torch.cuda.empty_cache()

模型已加载至 cuda




In [9]:
# q_list_np = np.array(q_list)
ffn_outputs_np = np.array(ffn_outputs)
print(ffn_outputs_np.shape)
print(ffn_outputs_np[0][0][0])

(12, 4, 24, 768)
[ 1.70472309e-01  1.53833896e-01  3.94382179e-01 -1.61612034e-01
  2.59675264e-01 -2.44080603e-01 -1.14520989e-01  1.69595867e-01
 -1.54785067e-01  9.13730264e-02 -5.52020147e-02  4.09352481e-02
  5.27053401e-02  1.15245104e-01  4.80525158e-02 -5.11678904e-02
 -2.87483037e-02  2.91853040e-01  4.31904256e-01  3.16732019e-01
 -9.49063897e-03 -1.01475067e-01 -8.95549357e-03 -1.16956249e-01
 -1.24302559e-01  1.82863384e-01 -1.62635408e-02  2.66659379e-01
  8.08232725e-02  1.26410842e-01 -1.33667633e-01  2.68276483e-02
 -3.19827348e-03 -2.37897098e-01 -4.32671607e-02  1.25211611e-01
 -5.86714447e-02 -4.15027998e-02 -1.54236071e-02  3.01429987e-01
  2.98926849e-02  2.35183388e-01 -3.69920619e-02  1.10269353e-01
  1.63826346e-01 -1.72405988e-01 -1.12857103e-01  8.36321637e-02
  6.42416775e-02 -4.73584384e-02  1.35626584e-01 -9.32509080e-02
 -1.77340865e-01  3.16753276e-02  1.55827141e+00 -1.42473578e-02
 -9.12624523e-02  6.41826838e-02 -8.21418017e-02 -1.20944738e-01
 -5.1783