In [1]:
import torch
import numpy as np
from transformers import FlavaTextModel, FlavaProcessor

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# 1. ==========加载模型和分词器==========
model_name = "C:/Users/xinlong/Desktop/code/python/flava_use/model/facebook/flava-full"

model = FlavaTextModel.from_pretrained(model_name)
tokenizer = FlavaProcessor.from_pretrained(model_name)

model.eval()
model.to(device)
print(f"模型已加载至 {device}")

模型已加载至 cuda


In [5]:
# 2. ==========准备输入文本==========
texts = ["一只猫", "一只狗", "一只猫和一只狗"]  # 示例输入，替换为你的m个文本
inputs = tokenizer(
    text=texts,
    return_tensors="pt",
    padding=True,
    truncation=True
    ).to(device)

In [None]:
# 3. ==========定义一个钩子函数来捕获中间值==========
q_list = [[] for _ in range(len(model.encoder.layer))]  # 存储每一层的Q
k_list = [[] for _ in range(len(model.encoder.layer))]  # 存储每一层的K
ffn_output_list = [[] for _ in range(len(model.encoder.layer))]  # 存储每一层的FFN输出

def get_q_hook(layer_idx):
    def hook(module, input, output):
        q_list[layer_idx].append(output.detach().cpu().numpy())
    return hook

def get_k_hook(layer_idx):
    def hook(module, input, output):
        k_list[layer_idx].append(output.detach().cpu().numpy())
    return hook

def get_ffn_hook(layer_idx):
    def hook(module, input, output):
        ffn_output_list[layer_idx].append(output.detach().cpu().numpy())
    return hook

In [None]:
# 4. ==========注册钩子函数==========
for i, layer in enumerate(model.encoder.layer):
    layer.self_attn.q_proj.register_forward_hook(get_q_hook(i))
    layer.self_attn.k_proj.register_forward_hook(get_k_hook(i))
    layer.mlp.register_forward_hook(get_ffn_hook(i))