# 1. 基础设置

In [1]:
# 导包
import torch
import os
import argparse
import pandas as pd
import tqdm
import numpy as np
# os.environ['NUMEXPR_MAX_THREADS'] = '48'

# 导入各个指标
import sys
sys.path.append('/home/dengruijun/data/FinTech/PP-Split/')
from ppsplit.quantification.distance_correlation.distCor import distCorMetric
from ppsplit.quantification.fisher_information.dFIL_inverse import dFILInverseMetric
from ppsplit.quantification.shannon_information.mutual_information import MuInfoMetric
from ppsplit.quantification.shannon_information.ULoss import ULossMetric
from ppsplit.quantification.rep_reading.rep_reader import PCA_Reader

# 导入各个baseline模型及其数据集预处理方法
# 模型
from target_model.models.splitnn_utils import split_weights_client
from target_model.models.VGG import VGG,VGG5Decoder,model_cfg
from target_model.models.BankNet import BankNet1,bank_cfg
from target_model.models.CreditNet import CreditNet1,credit_cfg
from target_model.models.PurchaseNet import PurchaseClassifier1,purchase_cfg

# 数据预处理方法
from target_model.data_preprocessing.preprocess_cifar10 import get_cifar10_normalize,deprocess
from target_model.data_preprocessing.preprocess_bank import bank_dataset,preprocess_bank
from target_model.data_preprocessing.preprocess_credit import preprocess_credit
from target_model.data_preprocessing.preprocess_purchase import preprocess_purchase
from target_model.data_preprocessing.dataset import get_one_data

# utils
from ppsplit.utils.utils import create_dir

# 2. pytorch的自动梯度

In [None]:
# 前向传播
import torch

x = torch.ones(5, requires_grad=True)  # input tensor
y = torch.zeros(3, requires_grad=True)  # expected output
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x, w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)

# 信息打印
print(x.shape)
print(x.requires_grad)
print(z.shape)

In [None]:
# 反向传播
loss.backward()
print(x.grad.shape)

In [None]:
# grad/backward 张量求梯度 sum
x = torch.ones(5, requires_grad=True)  # input tensor
y = torch.zeros(3, requires_grad=True)  # expected output
w = torch.randn(5, 3)
b = torch.randn(3)
z = torch.matmul(x, w)+b

# 计算sumed雅可比矩阵
z.backward(torch.ones_like(z))
print(x.grad.shape)
print(x.grad)

# 用grad
from torch.autograd import grad
x = torch.ones(5, requires_grad=True)  # input tensor
y = torch.zeros(3, requires_grad=True)  # expected output
w = torch.eye(5, 3)
b = torch.randn(3)
z = torch.matmul(x, w)+b
xgrad = grad(z, x, grad_outputs=torch.ones_like(z))[0]
print(xgrad)


In [None]:
# jacobian
from torch.autograd.functional import jacobian
from torch.autograd import grad
x = torch.ones(5, requires_grad=True)  # input tensor

def forward(x):
    y = torch.zeros(3, requires_grad=True)  # expected output
    w = torch.eye(5, 3)
    b = torch.randn(3)
    z = torch.matmul(x, w)+b
    return z


# 计算sumed雅可比矩阵
jac = jacobian(forward, x)
print(jac.shape)
print(jac)
print(jac.sum(dim=0))

# 3. 试探FIL计算

In [None]:
# FIL计算：
import torch.autograd.functional as F
# 切割模型通讯量查看
# for i in range(7):
vgg5 = VGG('Client', 'VGG5', 1, model_cfg)

client_outputs = vgg5(images)
print('outputs.shape:',client_outputs.shape)
jacs = F.jacobian(vgg5, images)
print('jacobian: ', jacs)
# print('output size:',torch.prod(torch.tensor(list(client_outputs.shape))[1:]))
print('output size:',torch.prod(torch.tensor(list(client_outputs.shape))))

# 0到6层每层的jacobians
import torch.autograd.functional as F
for i in range(7):
    vgg5 = VGG('Client', 'VGG5', i, model_cfg)
    client_outputs = vgg5(images)
    print('outputs.shape:',client_outputs.shape)
    jacs = F.jacobian(vgg5, images)
    print('jacobian: ', jacs)
    # print('output size:',torch.prod(torch.tensor(list(client_outputs.shape))[1:]))
    print('output size:',torch.prod(torch.tensor(list(client_outputs.shape))))



In [None]:
# FIL 计算，摸索出来一条路
import torch.autograd.functional as F
# 参数：
sigma = 0.01

# 计算jacobian
# 取一个batch的数据
train_iter=iter(trainloader)
inputs,labels = train_iter.next()6
print("inputs.shape: ",inputs.shape)
print("labels.shape: ",labels.shape)
print(f"input.requires_grad: {inputs.requires_grad}")

# 加载模型：
# vgg5 = VGG('Client', 'VGG5', 1, model_cfg)

# 进行前向传播：
inputs.requires_grad_(True) # 需要求导
outputs = vgg5(inputs)
outputs = outputs + sigma * torch.randn_like(outputs) # 加噪声 (0,1] uniform
print("outputs.shape: ",outputs.shape)

# 1. 进行反向传播,计算jacobian
# outputs.backward(torch.ones_like(outputs))
# J = inputs.grad / sigma # 计算jacobian
# print(f"J1.shape: {J.shape}")

# 2. 重新计算jacobian（用torch.autograd.functional.jacobian函数）
J = F.jacobian(vgg5, inputs)
# print(f"J2.shape: {J.shape}, J2.prod: {torch.prod(torch.tensor(list(J.shape)))}")
J = J.reshape(J.shape[0],outputs.numel(),inputs.numel())
print(f"J2.shape: {J.shape}, J2.prod: {torch.prod(torch.tensor(list(J.shape)))}")

# 计算eta 源论文
# J = model.influence_jacobian(train_data)[:, :, :-1] / args.sigma  # 计算FIL（梯度）jacobian
# etas = J.pow(2).sum(1).mean(1).sqrt() # 计算dFIL(这时候不是spectral norm了) 

# 计算eta：drj摸索：
I = torch.matmul(J[0].t(), J[0])
dFIL = I.trace().div(inputs.numel())
eta = dFIL.sqrt()
print(f"eta: {eta}")


In [None]:
# dFIL的两个要求: 可导 + unbiased
# x = torch.rand_like(torch.Tensor([1,5]))
x = torch.Tensor([0,0])
x.requires_grad_(True)
print(x.grad)
y = torch.nn.ReLU()
z = y(x).sum()
# z = torch.autograd.functional.jacobian(y, x)
z.backward()
print(x)
print(y)
print(z)
print(x.grad)

# 4. 现成函数调用

In [2]:
args = {
        'device':torch.device("cuda:1" if torch.cuda.is_available() else "cpu"),
        # 'device':torch.device("cpu"),
        'dataset':'CIFAR10',
        # 'dataset':'bank',
        # 'dataset':'credit',
        # 'dataset':'purchase',
        'result_dir': '20240702-FIL/',
        'batch_size':1,
        'noise_scale':0, # 防护措施
        }
print(args['device'])

cuda:1


In [3]:
# 加载模型和数据集，并从unit模型中切割出client_model
if args['dataset']=='CIFAR10':
    # 超参数
    testset_len = 10000 # 10000个数据一次 整个测试集合的长度
    # split_layer_list = list(range(len(model_cfg['VGG5'])))
    split_layer = 1 # 定成3吧？
    test_num = 1 # 试验序号

    # 关键路径
    unit_net_route = '/home/dengruijun/data/FinTech/PP-Split/results/trained_models/VGG5/BN+Tanh/VGG5-params-20ep.pth' # VGG5-BN+Tanh # 存储的是模型参数，不包括模型结构
    results_dir  = f"../../results/{args['result_dir']}/VGG5/{test_num}/"
    decoder_route = f"../../results/{args['result_dir']}/VGG5/{test_num}/Decoder-layer{split_layer}.pth"

    # 数据集加载
    trainloader,testloader = get_cifar10_normalize(batch_size = 1)
    one_data_loader = get_one_data(testloader,batch_size = args['batch_size']) #拿到第一个测试数据

    # 切割成client model
    # vgg5_unit.load_state_dict(torch.load(unit_net_route,map_location=torch.device('cpu'))) # 完整的模型
    client_net = VGG('Client','VGG5',split_layer,model_cfg,noise_scale=args['noise_scale'])
    pweights = torch.load(unit_net_route)
    if split_layer < len(model_cfg['VGG5']):
        pweights = split_weights_client(pweights,client_net.state_dict())
    client_net.load_state_dict(pweights)

else:
    exit(-1)

client_net.to(args['device'])
create_dir(results_dir)

features.0.weight
features.0.bias
features.1.weight
features.1.bias
features.1.running_mean
features.1.running_var
features.1.num_batches_tracked


In [28]:
# 我实现的：
# dFIL inverse指标计算

eta_same_layer_list = []
eta_diff_layer_list=[]

metric = dFILInverseMetric()
# 对traingloader遍历计算所有 inverse dFIL
# for j, data in enumerate(tqdm.tqdm(testloader)):
for j, data in enumerate(tqdm.tqdm(one_data_loader)): # 测试第一个testloader
    # if j < 31705:
        # continue
    inputs, labels = data
    inputs, labels = inputs.to(args['device']), labels.to(args['device'])
    
    # inference
    outputs = client_net(inputs)

    eta = metric.quantify(model=client_net, inputs=inputs, outputs=outputs, with_outputs=True)
    # 打印
    # print(str(j)+": "+str(eta.item()))
    eta_same_layer_list.append(eta)
eta_diff_layer_list.append(eta_same_layer_list)

# 结果储存到csv中
matrix = np.array(eta_diff_layer_list) # 有点大
transpose = matrix.T # 一行一条数据，一列代表一个layer 
pd.DataFrame(data=transpose, columns=[split_layer]).to_csv(results_dir + f'inv_dFIL.csv',index=False)


100%|██████████| 1/1 [00:10<00:00, 10.20s/it]


In [25]:
# FIL 计算函数
import torch.autograd.functional as F
import torch
import time

# nips23
from torch.autograd.functional import jvp
import random
import math

def calc_tr(net, x, device, subsample=-1, jvp_parallelism=1): # nips'23 源码
    '''
    calc_tr 函数利用雅可比向量积（JVP）来估计网络对于输入数据的迹，
    这在分析网络的灵敏度或稳定性时非常有用。
    此外，通过支持子采样和并行处理，该函数还提供了一种在保持计算效率的同时估计迹的方法。
    '''
    print(f'x.shape: {x.shape}')
    
    # 定义一个局部函数 jvp_func**：这个函数接受两个参数 x 和 tgt，并返回 net.forward_first 方法的雅可比向量积（JVP）。
    # 这意味着 jvp_func 用于计算网络对于输入 x 在方向 tgt 上的一阶导数
    # tgt 计算雅各比向量积的向量
    def jvp_func(x, tgt):
        # return jvp(net.forward_first, (x,), (tgt,)) #返回 outputs, jacobian product
        return jvp(net.forward, (x,), (tgt,)) #返回 outputs, jacobian product

    # 获取一个batch中第一个数据的维度？d代表的是批次中第一个数据点展平后的特征数量，即输入数据的维度。
    d = x[0].flatten().shape[0] # 把一个batch的x展平，获取input dim

    # 用于存储每个输入数据点的迹
    tr = torch.zeros(x.shape[0], dtype=x.dtype).to(device)
    #print(f'd: {d}, {x.shape}')

    # 加速，矩阵降维，但是这个损伤精度，或许改成特征提取更好点？
    # Randomly subsample pixels for faster execution
    if subsample > 0:
        samples = random.sample(range(d), min(d, subsample))
    else:
        samples = range(d)

    #print(x.shape, d, samples)
    # jvp parallelism是数据并行的粒度？
    # 函数通过分批处理样本来计算迹，每批处理 jvp_parallelism 个样本
    for j in range(math.ceil(len(samples) / jvp_parallelism)): # 对于每个数据块
        tgts = []
        # 遍历每个数据块中的每个维度
        for k in samples[j*jvp_parallelism:(j+1)*jvp_parallelism]: # 提取整个batch中每个数据的特定维度
            tgt = torch.zeros_like(x).reshape(x.shape[0], -1) # 按照batch 排列？# 雅各比向量积的
            # 除了当前样本索引 k 对应的元素设置为 1。这相当于在计算迹时，每次只关注一个特征维度。
            tgt[:, k] = 1. # 提取tgt所有的样本的k的特征 计算雅各比向量积的向量，可用于计算trace
            tgt = tgt.reshape(x.shape) # 又变回x的形状
            tgts.append(tgt)
        tgts = torch.stack(tgts)

        def helper(tgt):
            batch_size = x.shape[0]
            vals_list = []
            grads_list = []
            for i in range(batch_size):
                val, grad = jvp_func(x[i], tgt[i])  # 对每个批次元素调用jvp_func
                vals_list.append(val)
                grads_list.append(grad)
            # 将结果列表转换为张量
            vals = torch.stack(vals_list)
            grad = torch.stack(grads_list)


            # vals, grad = vmap(jvp_func, randomness='same')(x, tgt)
            #print('grad shape: ', grad.shape)
            return torch.sum(grad * grad, dim=tuple(range(1, len(grad.shape)))), vals # 先求迹再求平方

        # vmap被替换
        trs,vals = [],[]
        for item in tgts:
            trs_, vals_ = helper(item)
            trs.append(trs_)
            vals.append(vals_)
        trs,vals = torch.stack(trs),torch.stack(vals)

        # trs, vals = vmap(helper, randomness='same')(tgts) # randomness for randomness control of dropout
        
        # vals are stacked results that are repeated by d (should be all the same)


        tr += trs.sum(dim=0)

    # Scale if subsampled
    if subsample > 0:
        tr *= d / len(samples)

    tr = tr/(d*1.0)
    tr = 1.0/tr

    print('tr: ',tr.shape, tr)
    return tr.cpu().item(), vals[0].squeeze(1)  # squeeze removes one dimension jvp puts

In [27]:
# score matching的结果

# sys.path.append('/home/dengruijun/data/FinTech/VFL/6_quantification/FIL_instance_encoding-nips23/sliced_score_matching/')
# sys.path.append('/home/dengruijun/data/FinTech/VFL/6_quantification/FIL_instance_encoding-nips23/')
# from util import calc_tr
# from evaluate_scores import compute_scores, evaluate_scores

eta_same_layer_list = []
eta_diff_layer_list=[]

# metric_trace = dFILInverseMetric()
for j, data in enumerate(tqdm.tqdm(one_data_loader)): # 测试第一个testloader
    # if j < 31705:
        # continue
    inputs, labels = data
    inputs, labels = inputs.to(args['device']), labels.to(args['device'])
    
    # inference
    # outputs = client_net(inputs)

    # eta = metric_trace.calc_tr(net=client_net, x=inputs, device=args['device'])
    inputs = inputs.unsqueeze(0)
    eta,val = calc_tr(net=client_net, x=inputs, device=args['device'])
    # 打印
    # print(str(j)+": "+str(eta.item()))
    eta_same_layer_list.append(eta)
eta_diff_layer_list.append(eta_same_layer_list)

# 结果储存到csv中
matrix = np.array(eta_diff_layer_list) # 有点大
print("matrix: ",matrix)
transpose = matrix.T # 一行一条数据，一列代表一个layer 
pd.DataFrame(data=transpose, columns=[split_layer]).to_csv(results_dir + f'inv_dFIL_maeng.csv',index=False)



  0%|          | 0/1 [00:00<?, ?it/s]

x.shape: torch.Size([1, 1, 3, 32, 32])


100%|██████████| 1/1 [00:18<00:00, 18.02s/it]

tr:  torch.Size([1]) tensor([0.0691], device='cuda:1')
matrix:  [[0.0691408]]



