# 权重分析
分析后门模型的权重，尤其是比较它们与干净模型的权重。查看是否存在不寻常的模式、权重值或分布。

In [None]:
import torch
import torch.nn as nn
import numpy as np
# 权重分析
# 加载模型
model = torch.load('your_model.pth')  # 替换为你的模型文件路径

# 打印模型的结构
print(model)

# 获取模型的所有权重
all_weights = []
for name, param in model.named_parameters():
    print(f"Parameter Name: {name}")
    print(f"Parameter Shape: {param.shape}")
    print(f"Parameter Min: {param.min().item()}")
    print(f"Parameter Max: {param.max().item()}")
    print(f"Parameter Mean: {param.mean().item()}")
    print(f"Parameter Std: {param.std().item()}")
    all_weights.append(param.data.cpu().numpy())

# 统计模型所有权重的信息
all_weights_array = np.concatenate([weights.flatten() for weights in all_weights])
print("All Weights Info:")
print(f"  Total number of weights: {len(all_weights_array)}")
print(f"  Min weight: {np.min(all_weights_array)}")
print(f"  Max weight: {np.max(all_weights_array)}")
print(f"  Mean weight: {np.mean(all_weights_array)}")
print(f"  Std weight: {np.std(all_weights_array)}")


# 可视化特征图
可视化后门模型的特征图，了解模型如何处理数据。异常特征图可能表明后门存在。

In [None]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

# 定义一个函数来获取模型的中间特征图
def get_interested_layers(model, layer_names):
    interested_layers = []
    hooks = []

    def hook_fn(module, input, output):
        if module.__class__.__name__ in layer_names:
            interested_layers.append(output)

    for layer_name, layer in model.named_modules():
        if layer_name in layer_names:
            hook = layer.register_forward_hook(hook_fn)
            hooks.append(hook)

    return interested_layers, hooks

# 加载模型
model = torch.load('your_model.pth')  # 替换为你的模型文件路径

# 定义输入数据
input_data = torch.randn(1, 3, 224, 224)  # 替换为你的输入数据

# 获取模型的中间特征图
layer_names = ['conv1', 'layer1', 'layer2']  # 替换为你感兴趣的层名称
interested_layers, hooks = get_interested_layers(model, layer_names)

# 前向传播
model.eval()
with torch.no_grad():
    _ = model(input_data)

# 可视化中间特征图
for i, layer in enumerate(layer_names):
    feature_map = interested_layers[i][0]
    plt.figure()
    plt.title(f"Feature Map for {layer}")
    for j in range(feature_map.size(0)):
        plt.subplot(1, feature_map.size(0), j + 1)
        plt.imshow(feature_map[0, j].cpu(), cmap='viridis')
        plt.axis('off')
    plt.show()

# 释放注册的hook
for hook in hooks:
    hook.remove()


# 模型行为测试
使用相同的数据集运行后门模型和干净模型，检查它们在正常数据上的性能是否相似。如果后门模型在正常数据上表现异常，可能存在后门。

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

# 加载后门模型和干净模型
backdoor_model = torch.load('backdoor_model.pth')  # 替换为后门模型文件路径
clean_model = torch.load('clean_model.pth')  # 替换为干净模型文件路径

# 数据预处理
transform = transforms.Compose([
    transforms.Resize(224),  # 调整输入图像大小，根据你的模型需要
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 根据你的数据集进行归一化
])

# 加载正常数据集
normal_dataset = datasets.ImageFolder('path_to_normal_data', transform=transform)  # 替换为正常数据集路径
normal_loader = DataLoader(normal_dataset, batch_size=64, shuffle=False)

# 定义评估函数
def evaluate_model(model, data_loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in data_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

# 在正常数据上评估后门模型和干净模型
backdoor_accuracy = evaluate_model(backdoor_model, normal_loader)
clean_accuracy = evaluate_model(clean_model, normal_loader)

print(f"Backdoor Model Accuracy on Normal Data: {backdoor_accuracy}%")
print(f"Clean Model Accuracy on Normal Data: {clean_accuracy}%")

# 比较后门模型和干净模型的性能
if backdoor_accuracy < clean_accuracy:
    print("The backdoor model performs worse on normal data, indicating the presence of a potential backdoor.")
else:
    print("The backdoor model performs similarly on normal data as the clean model.")


# 模型比对
如果有可信的标准模型可用（例如，来自公共数据集的模型），则可以将它与后门模型进行比对，查看是否存在显着的不同。

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

# 加载后门模型
backdoor_model = torch.load('backdoor_model.pth')  # 替换为后门模型文件路径

# 加载可信的标准模型（例如，来自公共数据集的预训练模型）
standard_model = torch.hub.load('pytorch/vision', 'resnet18', pretrained=True)  # 示例使用ResNet-18，你可以替换为其他标准模型

# 数据预处理
transform = transforms.Compose([
    transforms.Resize(224),  # 调整输入图像大小，根据模型需要
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 根据数据集进行归一化
])

# 加载正常数据集
normal_dataset = datasets.ImageFolder('path_to_normal_data', transform=transform)  # 替换为正常数据集路径
normal_loader = DataLoader(normal_dataset, batch_size=64, shuffle=False)

# 定义评估函数
def evaluate_model(model, data_loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in data_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

# 在正常数据上评估后门模型和标准模型
backdoor_accuracy = evaluate_model(backdoor_model, normal_loader)
standard_model_accuracy = evaluate_model(standard_model, normal_loader)

print(f"Backdoor Model Accuracy on Normal Data: {backdoor_accuracy}%")
print(f"Standard Model Accuracy on Normal Data: {standard_model_accuracy}%")

# 比较后门模型和标准模型的性能
if backdoor_accuracy < standard_model_accuracy:
    print("The backdoor model performs worse on normal data compared to the standard model, indicating the presence of a potential backdoor.")
else:
    print("The backdoor model performs similarly on normal data as the standard model.")
