In [1]:
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM
import torch

device = 'cuda:0'
model_path  = "/root/autodl-tmp/models/qwen2-1.5b"
model = AutoModelForCausalLM.from_pretrained(model_path,device_map=device)

In [2]:
from datasets import load_dataset,load_from_disk
import random

nsamples = 1
seqlen = 2048
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)

dataset_path = "/root/autodl-tmp/datasets/ptb"
traindata = load_from_disk(dataset_path)["train"]
trainenc = tokenizer(" ".join(traindata['sentence']), return_tensors='pt')

data_loader = []
for _ in range(nsamples):
    i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1)
    j = i + seqlen
    inp = trainenc.input_ids[:, i:j]
    tar = inp.clone()
    tar[:, :-1] = -100
data_loader.append((inp, tar))

Token indices sequence length is longer than the specified maximum sequence length for this model (1062233 > 32768). Running this sequence through the model will result in indexing errors


In [3]:
fisher_matrix = {name: torch.zeros_like(param).to("cpu") for name, param in model.named_parameters()}

In [4]:
model.eval()

for data, target in data_loader:
    output = model(data.to(device))
    # print(output)
    loss = torch.nn.functional.mse_loss(output, target)
    print(loss)
    torch.cuda.empty_cache()
    del data
    del target
    del output
    del loss
    print("-----------------------------------------------------------------")
    model.zero_grad()  # 清除梯度
    loss.backward()  # 反向传播计算梯度

Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


AttributeError: 'CausalLMOutputWithPast' object has no attribute 'size'

In [5]:
output

CausalLMOutputWithPast(loss=None, logits=tensor([[[ 5.3570,  5.6960,  6.2455,  ..., -3.4583, -3.4582, -3.4586],
         [ 4.4224,  2.2458,  2.4666,  ..., -3.3813, -3.3804, -3.3800],
         [ 4.6134,  4.9141,  3.0070,  ..., -4.7578, -4.7569, -4.7570],
         ...,
         [ 2.2307,  3.0682,  1.7333,  ..., -3.1445, -3.1439, -3.1435],
         [ 6.4739,  5.1217,  3.5723,  ..., -3.5855, -3.5849, -3.5845],
         [ 2.8959,  3.0308,  1.7949,  ..., -3.9827, -3.9824, -3.9827]]],
       device='cuda:0', grad_fn=<UnsafeViewBackward0>), past_key_values=((tensor([[[[ 3.0371e+00, -6.6965e-01, -1.2751e-01,  ..., -1.0727e+02,
            1.3519e+02,  1.3516e+02],
          [ 1.4193e+00, -7.4070e-01, -4.6314e-01,  ..., -1.0821e+02,
            1.3472e+02,  1.3557e+02],
          [ 7.3286e-01, -1.4242e+00, -2.4083e-01,  ..., -1.0774e+02,
            1.3624e+02,  1.3545e+02],
          ...,
          [-1.1148e+00, -2.2909e+00, -1.3121e-01,  ..., -1.0724e+02,
            1.3586e+02,  1.3409e+02],


In [None]:
import torch

def empirical_fisher(model, data_loader, loss_fn):
    fisher_matrix = {name: torch.zeros_like(param) for name, param in model.named_parameters()}

    model.eval()  # 切换到评估模式

    for data, target in data_loader:
        output = model(data.to(device))
        print(output)
        loss = loss_fn(output.to("cpu"), target)

        model.zero_grad()  # 清除梯度
        loss.backward()  # 反向传播计算梯度

        # 计算每个参数的梯度平方并累加到 Fisher 信息矩阵中
        for name, param in model.named_parameters():
            fisher_matrix[name] += param.grad ** 2

    # 归一化 Fisher 信息矩阵
    for name in fisher_matrix:
        fisher_matrix[name] /= len(data_loader)

    return fisher_matrix

empirical_fisher(model, data_loader, torch.nn.functional.mse_loss) 

## 有关梯度求解的简单demo

$I(\delta\theta) = \sum_{i=0}^{N} (\frac{\partial L(\theta_{i} + \delta \theta_{i};x)}{\partial \delta \theta_{i}})^2$

In [1]:
import torch
import torch.nn as nn

# 假设一个简单的神经网络模型
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(10, 5)
        self.fc2 = nn.Linear(5, 2)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# 初始化模型和损失函数
model = SimpleNN()
criterion = nn.CrossEntropyLoss()

# 输入数据
inputs = torch.randn(1, 10)  # 输入
labels = torch.tensor([1])   # 标签

# 获取模型参数
original_params = {name: param.clone() for name, param in model.named_parameters()}

# 定义可求导的扰动 δθ，设置为可求导的张量
delta_thetas = {name: torch.randn_like(param, requires_grad=True) for name, param in model.named_parameters()}

# 添加扰动 δθ 到模型参数
for name, param in model.named_parameters():
    param.data.add_(delta_thetas[name].data)

# 计算扰动后的输出和损失
outputs = model(inputs)
loss = criterion(outputs, labels)

# 对扰动 δθ 求导
loss.backward()

# 获取相对于扰动 δθ 的梯度
for name, delta_theta in delta_thetas.items():
    print(f"Gradient of loss w.r.t delta_theta ({name}): {delta_theta.grad}")

# 恢复原始参数
for name, param in model.named_parameters():
    param.data.copy_(original_params[name])


Gradient of loss w.r.t delta_theta (fc1.weight): None
Gradient of loss w.r.t delta_theta (fc1.bias): None
Gradient of loss w.r.t delta_theta (fc2.weight): None
Gradient of loss w.r.t delta_theta (fc2.bias): None


In [2]:
import torch
import torch.nn as nn

# 定义神经网络
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(10, 5)
        self.fc2 = nn.Linear(5, 2)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# 初始化模型和损失函数
model = SimpleNN()
criterion = nn.CrossEntropyLoss()

# 输入数据
inputs = torch.randn(1, 10)  # 输入
labels = torch.tensor([1])   # 标签

# 获取模型参数的原始值
original_params = {name: param.clone() for name, param in model.named_parameters()}

# 定义可求导的扰动 δθ
delta_thetas = {name: torch.randn_like(param, requires_grad=True) for name, param in model.named_parameters()}

# 创建扰动后的参数，并计算模型输出和损失
outputs = model(inputs)

# 使用 `with torch.no_grad()` 确保模型参数不被求导修改
with torch.no_grad():
    for name, param in model.named_parameters():
        param.add_(delta_thetas[name])  # 添加扰动

# 计算扰动后的输出和损失
outputs = model(inputs)
loss = criterion(outputs, labels)

# 对扰动 δθ 求导
loss.backward()

# 获取相对于扰动 δθ 的梯度
for name, delta_theta in delta_thetas.items():
    if delta_theta.grad is not None:
        print(f"Gradient of loss w.r.t delta_theta ({name}): {delta_theta.grad}")
    else:
        print(f"Gradient of loss w.r.t delta_theta ({name}): None")

# 恢复原始参数
with torch.no_grad():
    for name, param in model.named_parameters():
        param.copy_(original_params[name])

Gradient of loss w.r.t delta_theta (fc1.weight): None
Gradient of loss w.r.t delta_theta (fc1.bias): None
Gradient of loss w.r.t delta_theta (fc2.weight): None
Gradient of loss w.r.t delta_theta (fc2.bias): None


In [11]:
import torch
import torch.nn as nn

# 定义神经网络
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(100, 50)
        self.fc2 = nn.Linear(50, 2)

    def forward(self, x, delta_thetas=None):
        # 如果提供了扰动参数，就在模型的权重上加上这些扰动
        if delta_thetas is not None:
            # 加上扰动
            x = torch.relu(torch.matmul(x, self.fc1.weight.T + delta_thetas['fc1.weight'].T) + self.fc1.bias + delta_thetas['fc1.bias'])
            x = torch.matmul(x, self.fc2.weight.T + delta_thetas['fc2.weight'].T) + self.fc2.bias + delta_thetas['fc2.bias']
        else:
            # 常规计算
            x = torch.relu(self.fc1(x))
            x = self.fc2(x)
        return x

# 初始化模型和损失函数
model = SimpleNN()
criterion = nn.CrossEntropyLoss()

# 输入数据
inputs = torch.randn(1, 100)  # 输入
labels = torch.tensor([1])   # 标签

# 定义扰动参数 δθ，作为可求导的变量
delta_thetas = {
    'fc1.weight': torch.randn_like(model.fc1.weight, requires_grad=True),
    'fc1.bias': torch.randn_like(model.fc1.bias, requires_grad=True),
    'fc2.weight': torch.randn_like(model.fc2.weight, requires_grad=True),
    'fc2.bias': torch.randn_like(model.fc2.bias, requires_grad=True)
}

# 使用添加扰动后的参数进行前向传播
outputs = model(inputs, delta_thetas=delta_thetas)

# 计算损失
loss = criterion(outputs, labels)

# 对扰动 δθ 求导
loss.backward()

# 输出扰动参数的梯度
for name, delta_theta in delta_thetas.items():
    if delta_theta.grad is not None:
        print(f"Gradient of loss w.r.t delta_theta ({name}): {(delta_theta.grad** 2).mean()}")
    else:
        print(f"Gradient of loss w.r.t delta_theta ({name}): None")


Gradient of loss w.r.t delta_theta (fc1.weight): 7.336907572068756e-18
Gradient of loss w.r.t delta_theta (fc1.bias): 7.026649495092979e-18
Gradient of loss w.r.t delta_theta (fc2.weight): 3.723814651783225e-16
Gradient of loss w.r.t delta_theta (fc2.bias): 8.370606232254835e-18


一定程度上可以反映模型层的敏感性变化

In [36]:
import torch
import torch.nn as nn

# 定义神经网络
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(10, 5)
        self.fc2 = nn.Linear(5, 10)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# 初始化模型和损失函数
model = SimpleNN()
criterion = nn.CrossEntropyLoss()

# 输入数据
inputs = torch.randn(1, 10)  # 输入
with torch.no_grad():
    labels = model(inputs)

# 定义扰动参数 δθ，作为可求导的变量
delta_thetas = {name: torch.randn_like(param, requires_grad=True) for name, param in model.named_parameters()}

for name, para in model.named_parameters():
    para.requires_grad=False
    para.add_(delta_thetas[name])

# 使用添加扰动后的参数进行前向传播
outputs = model(inputs)

print(outputs.shape)

# 计算损失
loss = torch.nn.functional.mse_loss(outputs, labels)
# loss = criterion(outputs, labels)

# 对扰动 δθ 求导
loss.backward()

# 输出扰动参数的梯度
for name, delta_theta in delta_thetas.items():
    if delta_theta.grad is not None:
        print(f"Gradient of loss w.r.t delta_theta ({name}): {(delta_theta.grad** 2).mean()}")
    else:
        print(f"Gradient of loss w.r.t delta_theta ({name}): None")

torch.Size([1, 10])
Gradient of loss w.r.t delta_theta (fc1.weight): 2.469496011734009
Gradient of loss w.r.t delta_theta (fc1.bias): 6.147805213928223
Gradient of loss w.r.t delta_theta (fc2.weight): 0.9514350295066833
Gradient of loss w.r.t delta_theta (fc2.bias): 0.39639297127723694


# 标准化计算结果

In [40]:
import torch

def standardize_keys(input_dict):
    keys_to_keep = [
        "self_attn.q_proj",
        "self_attn.k_proj",
        "self_attn.v_proj",
        "self_attn.o_proj",
        "mlp.gate_proj",
        "mlp.up_proj",
        "mlp.down_proj"
    ]
    standardized_dict = {}

    counter = 0
    
    # 遍历原字典
    for key, value in input_dict.items():
        if len(key.split('.'))>=4:
            core_key = key.split('.')[2] + '.' + key.split('.')[3]
        else:
            core_key = key.split('.')[1]
        print(f"{core_key}:{core_key in keys_to_keep}")
        if core_key in keys_to_keep:
            if core_key == keys_to_keep[0]:
                standardized_dict[counter] = {}
                standardized_dict[counter][core_key] = value.item()
            elif core_key == keys_to_keep[-1]:
                standardized_dict[counter][core_key] = value.item()
                counter += 1
            else:
                standardized_dict[counter][core_key] = value.item()
    
    return standardized_dict

In [41]:
# 示例输入字典
input_dict = {
    '0.weight': torch.tensor(0.8525, dtype=torch.float16),
    '1.0.self_attn.q_proj.weight': torch.tensor(0.0080, dtype=torch.float16),
    '1.0.self_attn.k_proj.weight': torch.tensor(0.0040, dtype=torch.float16),
    '1.0.self_attn.v_proj.weight': torch.tensor(2.4746, dtype=torch.float16),
    '1.0.self_attn.o_proj.weight': torch.tensor(0.3433, dtype=torch.float16),
    '1.0.mlp.gate_proj.weight': torch.tensor(0.0118, dtype=torch.float16),
    '1.0.mlp.up_proj.weight': torch.tensor(0.0198, dtype=torch.float16),
    '1.0.mlp.down_proj.weight': torch.tensor(0.0976, dtype=torch.float16),
    '1.0.input_layernorm.weight': torch.tensor(0.0346, dtype=torch.float16),
    '1.0.post_attention_layernorm.weight': torch.tensor(0.0412, dtype=torch.float16),
    '1.1.self_attn.q_proj.weight': torch.tensor(0.0086, dtype=torch.float16),
    '1.1.self_attn.k_proj.weight': torch.tensor(0.0090, dtype=torch.float16),
    '1.1.self_attn.v_proj.weight': torch.tensor(18.5625, dtype=torch.float16),
    '1.1.self_attn.o_proj.weight': torch.tensor(0.8960, dtype=torch.float16),
    '1.1.mlp.gate_proj.weight': torch.tensor(0.0170, dtype=torch.float16),
    '1.1.mlp.up_proj.weight': torch.tensor(0.0377, dtype=torch.float16),
    '1.1.mlp.down_proj.weight': torch.tensor(107.0625, dtype=torch.float16),
    '1.1.input_layernorm.weight': torch.tensor(0.0512, dtype=torch.float16),
    '1.1.post_attention_layernorm.weight': torch.tensor(0.0483, dtype=torch.float16)
}

# 调用函数
standardized_dict = standardize_keys(input_dict)
print(standardized_dict)

weight:False
self_attn.q_proj:True
self_attn.k_proj:True
self_attn.v_proj:True
self_attn.o_proj:True
mlp.gate_proj:True
mlp.up_proj:True
mlp.down_proj:True
input_layernorm.weight:False
post_attention_layernorm.weight:False
self_attn.q_proj:True
self_attn.k_proj:True
self_attn.v_proj:True
self_attn.o_proj:True
mlp.gate_proj:True
mlp.up_proj:True
mlp.down_proj:True
input_layernorm.weight:False
post_attention_layernorm.weight:False
{0: {'self_attn.q_proj': 0.00800323486328125, 'self_attn.k_proj': 0.004001617431640625, 'self_attn.v_proj': 2.474609375, 'self_attn.o_proj': 0.34326171875, 'mlp.gate_proj': 0.01180267333984375, 'mlp.up_proj': 0.019805908203125, 'mlp.down_proj': 0.09759521484375}, 1: {'self_attn.q_proj': 0.00859832763671875, 'self_attn.k_proj': 0.009002685546875, 'self_attn.v_proj': 18.5625, 'self_attn.o_proj': 0.89599609375, 'mlp.gate_proj': 0.016998291015625, 'mlp.up_proj': 0.037689208984375, 'mlp.down_proj': 107.0625}}


In [19]:
# 定义需要保留的关键字列表
keys_to_keep = [
    "self_attn.q_proj",
    "self_attn.k_proj",
    "self_attn.v_proj",
    "self_attn.o_proj",
    "mlp.gate_proj",
    "mlp.up_proj",
    "mlp.down_proj"
]

# 输入键
input_key = '1.0.self_attn.q_proj.weight'

print(len(input_key.split('.')))
# 提取核心部分（去掉 .weight 后缀)
if len(input_key.split('.'))>=4:
    core_key = input_key.split('.')[2] + '.' + input_key.split('.')[3]
    print(core_key)
# 判断是否在 keys_to_keep 中
if core_key in keys_to_keep:
    print(f"The key {input_key} is in keys_to_keep.")
else:
    print(f"The key {input_key} is NOT in keys_to_keep.")

5
self_attn.q_proj
The key 1.0.self_attn.q_proj.weight is in keys_to_keep.
