# 1. 基础设置

In [54]:
# 导包
import torch
import os
import argparse
import pandas as pd
import tqdm
import numpy as np1
# os.environ['NUMEXPR_MAX_THREADS'] = '48'

# 导入各个指标
import sys
sys.path.append('/home/dengruijun/data/FinTech/PP-Split/')
from ppsplit.quantification.distance_correlation.distCor import distCorMetric
from ppsplit.quantification.fisher_information.dFIL_inverse import dFILInverseMetric
from ppsplit.quantification.shannon_information.mutual_information import MuInfoMetric
from ppsplit.quantification.shannon_information.ULoss import ULossMetric
from ppsplit.quantification.rep_reading.rep_reader import PCA_Reader
from ppsplit.quantification.shannon_information.ITE_tools import Shannon_quantity

# 导入各个baseline模型及其数据集预处理方法
# 模型
from target_model.models.splitnn_utils import split_weights_client
from target_model.models.VGG import VGG,VGG5Decoder,model_cfg
from target_model.models.BankNet import BankNet1,bank_cfg
from target_model.models.CreditNet import CreditNet1,credit_cfg
from target_model.models.PurchaseNet import PurchaseClassifier1,purchase_cfg

# 数据预处理方法
from target_model.data_preprocessing.preprocess_cifar10 import get_cifar10_normalize,deprocess
from target_model.data_preprocessing.preprocess_bank import bank_dataset,preprocess_bank,preprocess_bank_dataset
from target_model.data_preprocessing.preprocess_credit import preprocess_credit
from target_model.data_preprocessing.preprocess_purchase import preprocess_purchase
from target_model.data_preprocessing.dataset import get_one_data

# utils
from ppsplit.utils.utils import create_dir

In [95]:
args = {
        'device':torch.device("cuda:1" if torch.cuda.is_available() else "cpu"),
        # 'device':torch.device("cpu"),
        'dataset':'CIFAR10',
        # 'dataset':'bank',
        # 'dataset':'credit',
        # 'dataset':'purchase',
        'result_dir': '20240702-FIL/',
        'batch_size':2,
        'noise_scale':0, # 防护措施
        }
print(args['device'])

cuda:1


In [96]:
# 加载模型和数据集，并从unit模型中切割出client_model
if args['dataset']=='CIFAR10':
    # 超参数
    testset_len = 10000 # 10000个数据一次 整个测试集合的长度
    # split_layer_list = list(range(len(model_cfg['VGG5'])))
    split_layer = 1 # 定成3吧？
    test_num = 2 # 试验序号

    # 关键路径
    unit_net_route = '/home/dengruijun/data/FinTech/PP-Split/results/trained_models/VGG5/BN+Tanh/VGG5-params-20ep.pth' # VGG5-BN+Tanh # 存储的是模型参数，不包括模型结构
    results_dir  = f"../../results/{args['result_dir']}/VGG5/{test_num}/"
    decoder_route = f"../../results/{args['result_dir']}/VGG5/{test_num}/Decoder-layer{split_layer}.pth"

    # 数据集加载
    trainloader,testloader = get_cifar10_normalize(batch_size = 1)
    one_data_loader = get_one_data(testloader,batch_size = args['batch_size']) #拿到第一个测试数据

    # 切割成client model
    # vgg5_unit.load_state_dict(torch.load(unit_net_route,map_location=torch.device('cpu'))) # 完整的模型
    client_net = VGG('Client','VGG5',split_layer,model_cfg,noise_scale=args['noise_scale'])
    pweights = torch.load(unit_net_route)
    if split_layer < len(model_cfg['VGG5']):
        pweights = split_weights_client(pweights,client_net.state_dict())
    client_net.load_state_dict(pweights)

elif args['dataset']=='credit':
    # 超参数
    test_num = 1 # 试验序号
    testset_len = 61503 # for the mutual information
    split_layer_list = [0,3,6,9]
    split_layer = 3
    # split_layer_list = ['linear1', 'linear2']

    # 关键路径
    unit_net_route = '/home/dengruijun/data/FinTech/PP-Split/results/trained_models/credit/credit-20ep_params.pth'
    results_dir  = f"../results/{args['result_dir']}/Credit/{test_num}/"
    decoder_route = f"../results/{args['result_dir']}/Credit/{test_num}/Decoder-layer{split_layer}.pth"

    # 数据集加载
    trainloader,testloader = preprocess_credit(batch_size=1)
    one_data_loader = get_one_data(testloader,batch_size = args['batch_size']) #拿到第一个测试数据

    # client模型切割加载
    client_net = CreditNet1(layer=split_layer,noise_scale=args['noise_scale'])
    pweights = torch.load(unit_net_route)
    if split_layer < len(credit_cfg):
        pweights = split_weights_client(pweights,client_net.state_dict())
    client_net.load_state_dict(pweights)

elif args['dataset']=='bank':
    # 超参数
    test_num = 1 # 试验序号
    testset_len=8238
    # split_layer_list = ['linear1', 'linear2']
    split_layer_list = [0,2,4,6]
    split_layer = 2

    # 关键路径
    unit_net_route = '/home/dengruijun/data/FinTech/PP-Split/results/trained_models/Bank/bank-20ep_params.pth'
    results_dir  = f"../results/{args['result_dir']}/Bank/{test_num}/"
    decoder_route = f"../results/{args['result_dir']}/Bank/{test_num}/Decoder-layer{split_layer}.pth"
 
    # 数据集加载
    trainloader,testloader = preprocess_bank(batch_size=testset_len)
    # one_data_loader = get_one_data(testloader,batch_size = args['batch_size']) #拿到第一个测试数据 

    # 模型加载
    client_net = BankNet1(layer=split_layer,noise_scale=args['noise_scale'])
    pweights = torch.load(unit_net_route)
    if split_layer < len(bank_cfg):
        pweights = split_weights_client(pweights,client_net.state_dict())
    client_net.load_state_dict(pweights)    
elif args['dataset']=='purchase':
    # 超参数
    test_num = 1 # 试验序号
    testset_len = 39465 # test len
    # split_layer_list = [0,1,2,3,4,5,6,7,8]
    split_layer = 3

    # 关键路径
    unit_net_route = '/home/dengruijun/data/FinTech/PP-Split/results/trained_models/Purchase100/Purchase_bestmodel_param.pth'
    results_dir = f"../../results/{args['result_dir']}/Purchase/{test_num}/"
    decoder_route = f"../../results/{args['result_dir']}/Purchase/{test_num}/Decoder-layer{split_layer}.pth"
    
    # 数据集加载
    trainloader,testloader = preprocess_purchase(batch_size=1)
    one_data_loader = get_one_data(testloader,batch_size = args['batch_size']) #拿到第一个测试数据

    # 模型加载
    client_net = PurchaseClassifier1(layer=split_layer,noise_scale=args['noise_scale'])
    # pweights = torch.load(unit_net_route,map_location=torch.device('cpu'))
    pweights = torch.load(unit_net_route)
    if split_layer < len(purchase_cfg):
        pweights = split_weights_client(pweights,client_net.state_dict())
    client_net.load_state_dict(pweights)

else:
    exit(-1)

client_net.to(args['device'])
create_dir(results_dir)

features.0.weight
features.0.bias
features.1.weight
features.1.bias
features.1.running_mean
features.1.running_var
features.1.num_batches_tracked


# 2. pytorch的自动梯度

In [2]:
# 前向传播
import torch

x = torch.ones(5, requires_grad=True)  # input tensor
y = torch.zeros(3, requires_grad=True)  # expected output
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x, w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)

# 信息打印
print(x.shape)
print(x.requires_grad)
print(z.shape)

torch.Size([5])
True
torch.Size([3])


In [None]:
# 反向传播
loss.backward()
print(x.grad.shape)

In [None]:
# grad/backward 张量求梯度 sum
x = torch.ones(5, requires_grad=True)  # input tensor
y = torch.zeros(3, requires_grad=True)  # expected output
w = torch.randn(5, 3)
b = torch.randn(3)
z = torch.matmul(x, w)+b

# 计算sumed雅可比矩阵
z.backward(torch.ones_like(z))
print(x.grad.shape)
print(x.grad)

# 用grad
from torch.autograd import grad
x = torch.ones(5, requires_grad=True)  # input tensor
y = torch.zeros(3, requires_grad=True)  # expected output
w = torch.eye(5, 3)
b = torch.randn(3)
z = torch.matmul(x, w)+b
xgrad = grad(z, x, grad_outputs=torch.ones_like(z))[0]
print(xgrad)


In [None]:
# jacobian
from torch.autograd.functional import jacobian
from torch.autograd import grad
x = torch.ones(5, requires_grad=True)  # input tensor

def forward(x):
    y = torch.zeros(3, requires_grad=True)  # expected output
    w = torch.eye(5, 3)
    b = torch.randn(3)
    z = torch.matmul(x, w)+b
    return z


# 计算sumed雅可比矩阵
jac = jacobian(forward, x)
print(jac.shape)
print(jac)
print(jac.sum(dim=0))

# 3. 试探FIL计算

In [None]:
# FIL计算：
import torch.autograd.functional as F
# 切割模型通讯量查看
# for i in range(7):
vgg5 = VGG('Client', 'VGG5', 1, model_cfg)

client_outputs = vgg5(images)
print('outputs.shape:',client_outputs.shape)
jacs = F.jacobian(vgg5, images)
print('jacobian: ', jacs)
# print('output size:',torch.prod(torch.tensor(list(client_outputs.shape))[1:]))
print('output size:',torch.prod(torch.tensor(list(client_outputs.shape))))

# 0到6层每层的jacobians
import torch.autograd.functional as F
for i in range(7):
    vgg5 = VGG('Client', 'VGG5', i, model_cfg)
    client_outputs = vgg5(images)
    print('outputs.shape:',client_outputs.shape)
    jacs = F.jacobian(vgg5, images)
    print('jacobian: ', jacs)
    # print('output size:',torch.prod(torch.tensor(list(client_outputs.shape))[1:]))
    print('output size:',torch.prod(torch.tensor(list(client_outputs.shape))))



In [None]:
# FIL 计算，摸索出来一条路
import torch.autograd.functional as F
# 参数：
sigma = 0.01

# 计算jacobian
# 取一个batch的数据
train_iter=iter(trainloader)
inputs,labels = train_iter.next()6
print("inputs.shape: ",inputs.shape)
print("labels.shape: ",labels.shape)
print(f"input.requires_grad: {inputs.requires_grad}")

# 加载模型：
# vgg5 = VGG('Client', 'VGG5', 1, model_cfg)

# 进行前向传播：
inputs.requires_grad_(True) # 需要求导
outputs = vgg5(inputs)
outputs = outputs + sigma * torch.randn_like(outputs) # 加噪声 (0,1] uniform
print("outputs.shape: ",outputs.shape)

# 1. 进行反向传播,计算jacobian
# outputs.backward(torch.ones_like(outputs))
# J = inputs.grad / sigma # 计算jacobian
# print(f"J1.shape: {J.shape}")

# 2. 重新计算jacobian（用torch.autograd.functional.jacobian函数）
J = F.jacobian(vgg5, inputs)
# print(f"J2.shape: {J.shape}, J2.prod: {torch.prod(torch.tensor(list(J.shape)))}")
J = J.reshape(J.shape[0],outputs.numel(),inputs.numel())
print(f"J2.shape: {J.shape}, J2.prod: {torch.prod(torch.tensor(list(J.shape)))}")

# 计算eta 源论文
# J = model.influence_jacobian(train_data)[:, :, :-1] / args.sigma  # 计算FIL（梯度）jacobian
# etas = J.pow(2).sum(1).mean(1).sqrt() # 计算dFIL(这时候不是spectral norm了) 

# 计算eta：drj摸索：
I = torch.matmul(J[0].t(), J[0])
dFIL = I.trace().div(inputs.numel())
eta = dFIL.sqrt()
print(f"eta: {eta}")


In [None]:
# dFIL的两个要求: 可导 + unbiased
# x = torch.rand_like(torch.Tensor([1,5]))
x = torch.Tensor([0,0])
x.requires_grad_(True)
print(x.grad)
y = torch.nn.ReLU()
z = y(x).sum()
# z = torch.autograd.functional.jacobian(y, x)
z.backward()
print(x)
print(y)
print(z)
print(x.grad)

# 4. 现成函数调用

In [6]:
# 我实现的：
# dFIL inverse指标计算

eta_same_layer_list = []
eta_diff_layer_list=[]

metric = dFILInverseMetric()
# 对traingloader遍历计算所有 inverse dFIL
# for j, data in enumerate(tqdm.tqdm(testloader)):
for j, data in enumerate(tqdm.tqdm(one_data_loader)): # 测试第一个testloader
    # if j < 31705:
        # continue
    inputs, labels = data
    inputs, labels = inputs.to(args['device']), labels.to(args['device'])
    
    # inference
    outputs = client_net(inputs)

    eta = metric.quantify(model=client_net, inputs=inputs, outputs=outputs, with_outputs=True)
    # 打印
    # print(str(j)+": "+str(eta.item()))
    eta_same_layer_list.append(eta)
eta_diff_layer_list.append(eta_same_layer_list)

# 结果储存到csv中
matrix = np.array(eta_diff_layer_list) # 有点大
transpose = matrix.T # 一行一条数据，一列代表一个layer 
pd.DataFrame(data=transpose, columns=[split_layer]).to_csv(results_dir + f'inv_dFIL.csv',index=False)


100%|██████████| 1/1 [00:10<00:00, 10.71s/it]


In [25]:
# FIL 计算函数
import torch.autograd.functional as F
import torch
import time

# nips23
from torch.autograd.functional import jvp
import random
import math

def calc_tr(net, x, device, subsample=-1, jvp_parallelism=1): # nips'23 源码
    '''
    calc_tr 函数利用雅可比向量积（JVP）来估计网络对于输入数据的迹，
    这在分析网络的灵敏度或稳定性时非常有用。
    此外，通过支持子采样和并行处理，该函数还提供了一种在保持计算效率的同时估计迹的方法。
    '''
    print(f'x.shape: {x.shape}')
    
    # 定义一个局部函数 jvp_func**：这个函数接受两个参数 x 和 tgt，并返回 net.forward_first 方法的雅可比向量积（JVP）。
    # 这意味着 jvp_func 用于计算网络对于输入 x 在方向 tgt 上的一阶导数
    # tgt 计算雅各比向量积的向量
    def jvp_func(x, tgt):
        # return jvp(net.forward_first, (x,), (tgt,)) #返回 outputs, jacobian product
        return jvp(net.forward, (x,), (tgt,)) #返回 outputs, jacobian product

    # 获取一个batch中第一个数据的维度？d代表的是批次中第一个数据点展平后的特征数量，即输入数据的维度。
    d = x[0].flatten().shape[0] # 把一个batch的x展平，获取input dim

    # 用于存储每个输入数据点的迹
    tr = torch.zeros(x.shape[0], dtype=x.dtype).to(device)
    #print(f'd: {d}, {x.shape}')

    # 加速，矩阵降维，但是这个损伤精度，或许改成特征提取更好点？
    # Randomly subsample pixels for faster execution
    if subsample > 0:
        samples = random.sample(range(d), min(d, subsample))
    else:
        samples = range(d)

    #print(x.shape, d, samples)
    # jvp parallelism是数据并行的粒度？
    # 函数通过分批处理样本来计算迹，每批处理 jvp_parallelism 个样本
    for j in range(math.ceil(len(samples) / jvp_parallelism)): # 对于每个数据块
        tgts = []
        # 遍历每个数据块中的每个维度
        for k in samples[j*jvp_parallelism:(j+1)*jvp_parallelism]: # 提取整个batch中每个数据的特定维度
            tgt = torch.zeros_like(x).reshape(x.shape[0], -1) # 按照batch 排列？# 雅各比向量积的
            # 除了当前样本索引 k 对应的元素设置为 1。这相当于在计算迹时，每次只关注一个特征维度。
            tgt[:, k] = 1. # 提取tgt所有的样本的k的特征 计算雅各比向量积的向量，可用于计算trace
            tgt = tgt.reshape(x.shape) # 又变回x的形状
            tgts.append(tgt)
        tgts = torch.stack(tgts)

        def helper(tgt):
            batch_size = x.shape[0]
            vals_list = []
            grads_list = []
            for i in range(batch_size):
                val, grad = jvp_func(x[i], tgt[i])  # 对每个批次元素调用jvp_func
                vals_list.append(val)
                grads_list.append(grad)
            # 将结果列表转换为张量
            vals = torch.stack(vals_list)
            grad = torch.stack(grads_list)


            # vals, grad = vmap(jvp_func, randomness='same')(x, tgt)
            #print('grad shape: ', grad.shape)
            return torch.sum(grad * grad, dim=tuple(range(1, len(grad.shape)))), vals # 先求迹再求平方

        # vmap被替换
        trs,vals = [],[]
        for item in tgts:
            trs_, vals_ = helper(item)
            trs.append(trs_)
            vals.append(vals_)
        trs,vals = torch.stack(trs),torch.stack(vals)

        # trs, vals = vmap(helper, randomness='same')(tgts) # randomness for randomness control of dropout
        
        # vals are stacked results that are repeated by d (should be all the same)


        tr += trs.sum(dim=0)

    # Scale if subsampled
    if subsample > 0:
        tr *= d / len(samples)

    tr = tr/(d*1.0)
    tr = 1.0/tr

    print('tr: ',tr.shape, tr)
    return tr.cpu().item(), vals[0].squeeze(1)  # squeeze removes one dimension jvp puts

In [5]:
# maeng fisher

eta_same_layer_list = []
eta_diff_layer_list=[]

metric_trace = dFILInverseMetric()
for j, data in enumerate(tqdm.tqdm(one_data_loader)): # 测试第一个testloader
    # if j < 31705:
        # continue
    inputs, labels = data
    inputs, labels = inputs.to(args['device']), labels.to(args['device'])
    
    # inference
    # outputs = client_net(inputs)

    inputs = inputs.unsqueeze(0)
    eta,val = metric_trace.calc_tr(net=client_net, x=inputs, device=args['device'])
    # 打印
    # print(str(j)+": "+str(eta.item()))
    eta_same_layer_list.append(eta)
eta_diff_layer_list.append(eta_same_layer_list)

# 结果储存到csv中
matrix = np.array(eta_diff_layer_list) # 有点大
print("matrix: ",matrix)
transpose = matrix.T # 一行一条数据，一列代表一个layer 
pd.DataFrame(data=transpose, columns=[split_layer]).to_csv(results_dir + f'inv_dFIL_maeng.csv',index=False)



  0%|          | 0/1 [00:00<?, ?it/s]

x.shape: torch.Size([1, 2, 3, 32, 32])


100%|██████████| 1/1 [00:39<00:00, 39.37s/it]

matrix:  [[0.00075323]]





# 5. effective information

In [99]:
# effective fisher 计算函数
import torch.autograd.functional as F
import torch
import time

# nips23
from torch.autograd.functional import jvp
import random
import math

import pandas as pd

def computing_det_with_outputs(model, inputs, outputs, sigmas): # sigma_square
        # batchsize:
        batch_size = inputs.shape[0] # 一个batch的样本数目
        output_size = outputs[0].numel() # 一个样本的outputs长度
        input_size = inputs[0].numel() # 一个样本的outputs长度
        effect_fisher_sum = 0.0

        # 遍历单个样本: 换数据
        for i in range(batch_size):
            input_i = inputs[i].unsqueeze(0)

            # 计算jacobian
            J = F.jacobian(model, input_i)
            # J = J.reshape(J.shape[0],outputs.numel(),inputs.numel()) # (batch, out_size, in_size)
            J = J.reshape(output_size, input_size) # (batch, out_size, in_size)
            # print(f"J2.shape: {J.shape}, J2.prod: {torch.prod(torch.tensor(list(J.shape)))}")
            # 计算eta
            JtJ = torch.matmul(J.t(), J)
            I = 1.0/(sigmas)*JtJ
            I_np = I.cpu().detach().numpy()
            df = pd.DataFrame(I_np)
            df.to_csv(f'{i}.csv',index=False,header=False)


            print("I: ", I)
            # w = torch.det(I)
            print('det I: ', I.det().log() )
            k = torch.logdet(I)
            print('log det I: ',k )
            effect_fisher = 0.5 * (input_size * torch.log(2*torch.pi*torch.exp(torch.tensor(1.0))) - k)
            effect_fisher_sum+=effect_fisher

            print("effect_fisher: ",effect_fisher)

        # print("Jt*J: ", JtJ)
        # print("Jt*J: ", JtJ.shape, JtJ)
        # print("I.shape: ", I.shape)
        # eta = dFIL
        # print(f"eta: {eta}")
        # print('t2-t1=',t2-t1, 't3-t2', t3-t2)
        effect_fisher_mean = effect_fisher_sum / batch_size
        return effect_fisher_mean.cpu().detach().numpy()

In [101]:
# effect fisher 指标计算

effecInfo_same_layer_list = []
Fishermetric = dFILInverseMetric()
# for j, data in enumerate(tqdm.tqdm(testloader)): # 对testloader遍历
for j, data in enumerate(tqdm.tqdm(one_data_loader)): # 测试第一个testloader
    images, labels = data
    images, labels = images.to(args['device']), labels.to(args['device'])
    with torch.no_grad():
        # inference
        outputs = client_net(images).clone().detach()
        # fisher
        outputs = client_net(images)
        # images = images.unsqueeze(0)
        # effectFisher = Fishermetric._computing_det_with_outputs(model=client_net, inputs=images, outputs=outputs,sigmas = 0.01)
        effectFisher = computing_det_with_outputs(model=client_net, inputs=images, outputs=outputs,sigmas = 0.01)

        effecInfo_same_layer_list.append(effectFisher)
print(f"Layer {split_layer} effecInfo: {sum(effecInfo_same_layer_list)/len(effecInfo_same_layer_list)}")


  0%|          | 0/1 [00:00<?, ?it/s]

I:  tensor([[ 4.7762e+02, -1.7653e+01, -3.7860e+01,  ..., -6.3836e-02,
         -2.6662e-01,  5.2777e-02],
        [-1.7653e+01,  6.7384e+02,  2.4491e+02,  ..., -4.7750e-01,
         -7.4096e-01, -4.3659e-01],
        [-3.7860e+01,  2.4491e+02,  6.6185e+02,  ..., -3.0760e-02,
         -1.7476e-01, -2.6107e-01],
        ...,
        [-6.3836e-02, -4.7750e-01, -3.0760e-02,  ...,  7.6587e+02,
          1.3968e+02, -4.9137e+01],
        [-2.6662e-01, -7.4096e-01, -1.7476e-01,  ...,  1.3968e+02,
          9.6689e+02,  3.5150e+01],
        [ 5.2777e-02, -4.3659e-01, -2.6107e-01,  ..., -4.9137e+01,
          3.5150e+01,  7.0952e+02]], device='cuda:1')
det I:  tensor(nan, device='cuda:1')
log det I:  tensor(nan, device='cuda:1')
effect_fisher:  tensor(nan, device='cuda:1')


100%|██████████| 1/1 [00:32<00:00, 32.77s/it]

I:  tensor([[ 3.9688e+02, -1.0111e+02, -1.0212e+02,  ..., -2.9194e-01,
         -8.9430e-01, -4.9021e-01],
        [-1.0111e+02,  1.1254e+03, -9.1680e+01,  ..., -1.1800e+00,
         -2.1911e+00, -1.5687e+00],
        [-1.0212e+02, -9.1680e+01,  9.4364e+02,  ..., -7.5056e-01,
         -1.7241e+00, -6.7552e-01],
        ...,
        [-2.9194e-01, -1.1800e+00, -7.5056e-01,  ...,  1.6392e+03,
          1.3568e+01,  2.4505e+01],
        [-8.9430e-01, -2.1911e+00, -1.7241e+00,  ...,  1.3568e+01,
          1.2277e+03,  3.7108e+02],
        [-4.9021e-01, -1.5687e+00, -6.7552e-01,  ...,  2.4505e+01,
          3.7108e+02,  8.9802e+02]], device='cuda:1')
det I:  tensor(nan, device='cuda:1')
log det I:  tensor(nan, device='cuda:1')
effect_fisher:  tensor(nan, device='cuda:1')
Layer 1 effecInfo: nan





In [102]:
# effect entropy 计算函数
import math
from collections import Counter

import numpy as np
def shannon_entropy(time_series):
    """Calculate Shannon Entropy of the sample data.

    Parameters
    ----------
    time_series: np.ndarray | list[str]

    Returns
    -------
    ent: float
        The Shannon Entropy as float value
    """

    # Calculate frequency counts
    _, counts = np.unique(time_series, return_counts=True)
    total_count = len(time_series)
    # print("total_count: ",total_count)

    # Calculate frequencies and Shannon entropy
    frequencies = counts / total_count
    # print("freq: ",frequencies)
    ent = -np.sum(frequencies * np.log(frequencies))

    return ent

In [105]:
from pyentrp import entropy as ent

# effective entorpy
for j, data in enumerate(tqdm.tqdm(one_data_loader)): # 测试第一个testloader
# for j, data in enumerate(tqdm.tqdm(testloader)): 
    images, labels = data
    images, labels = images.to(args['device']), labels.to(args['device'])
    print(images)
    with torch.no_grad():
        # effectEntro = Shannon_quantity(images)
        # print("effectEntro_ite: ",effectEntro)

        effectEntro_pyent = 0.0
        for i in range(len(images[0])): # 对每个维度
            effectEntro_pyent  += shannon_entropy(images[:,i].flatten().detach().cpu().numpy())
            # print('effectEntro_pyent: ',effectEntro_pyent)
        # effectEntro_pyent = shannon_entropy(images.flatten(start_dim=1).detach().cpu().numpy())
        print('effectEntro_pyent: ',effectEntro_pyent)

100%|██████████| 1/1 [00:00<00:00,  6.66it/s]

tensor([[[[ 0.2392,  0.2471,  0.2941,  ...,  0.0745, -0.0118, -0.0902],
          [ 0.1922,  0.1843,  0.2471,  ...,  0.0667, -0.0196, -0.0667],
          [ 0.1843,  0.1843,  0.2392,  ...,  0.0902,  0.0196, -0.0588],
          ...,
          [-0.4667, -0.6706, -0.7569,  ..., -0.7020, -0.8980, -0.6863],
          [-0.5216, -0.6157, -0.7255,  ..., -0.7961, -0.7725, -0.8431],
          [-0.5765, -0.5608, -0.6471,  ..., -0.8118, -0.7333, -0.8353]],

         [[-0.1216, -0.1294, -0.0902,  ..., -0.2549, -0.2863, -0.3333],
          [-0.1216, -0.1373, -0.1059,  ..., -0.2549, -0.2863, -0.3098],
          [-0.1373, -0.1451, -0.1294,  ..., -0.2314, -0.2549, -0.3020],
          ...,
          [-0.0275, -0.2157, -0.3098,  ..., -0.2392, -0.4980, -0.3333],
          [-0.0902, -0.2000, -0.3333,  ..., -0.3569, -0.3569, -0.4980],
          [-0.1608, -0.1765, -0.3020,  ..., -0.3961, -0.3412, -0.4745]],

         [[-0.6157, -0.6314, -0.6000,  ..., -0.7176, -0.7176, -0.7412],
          [-0.6000, -0.6863, -




In [74]:
y = torch.rand(10,341) # 最大能接受341个特征 
z = torch.randn(100,200)
e1 = Shannon_quantity(y)
e2 = Shannon_quantity(z)
print(e1)
print(e2)
# print(np.log(0))

170.22239461082756
335.1999980582977


In [9]:
import pandas as pd
dataPath = '/home/dengruijun/data/FinTech/DATASET/kaggle-dataset/bank/bank-additional-full.csv'

[X_train, y_train], [X_test, y_test] = preprocess_bank_dataset(dataPath)
df = pd.DataFrame(X_test)
df.describe()

X_train.shape: (32950, 63)
X_test.shape: (8238, 63)
y_train.shape: (32950, 1)
y_test.shape: (8238, 1) <class 'numpy.ndarray'>


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,53,54,55,56,57,58,59,60,61,62
count,8238.0,8238.0,8238.0,8238.0,8238.0,8238.0,8238.0,8238.0,8238.0,8238.0,...,8238.0,8238.0,8238.0,8238.0,8238.0,8238.0,8238.0,8238.0,8238.0,8238.0
mean,0.289148,0.157198,0.023671,0.02185,0.063608,0.088614,0.032168,0.078174,0.064093,0.141539,...,0.284123,0.054413,0.020963,0.838213,0.081903,0.251712,0.335361,0.453437,0.089942,0.344142
std,0.453394,0.36401,0.152031,0.146202,0.244068,0.284203,0.176457,0.268462,0.244934,0.348598,...,0.171524,0.051057,0.030769,0.366935,0.121449,0.146193,0.252907,0.286649,0.055046,0.179637
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.160494,0.022977,0.0,1.0,0.0,0.104167,0.199532,0.192469,0.036953,0.203781
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.234568,0.040057,0.018182,1.0,0.0,0.333333,0.26968,0.41841,0.093403,0.425709
75%,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.382716,0.068524,0.036364,1.0,0.142857,0.333333,0.29696,0.719665,0.143278,0.512287
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,0.769622,0.272727,1.0,1.0,0.479167,1.0,1.0,0.150759,0.512287


In [20]:
# effect Information 指标计算

effecInfo_diff_layer_list = []
effecInfo_same_layer_list = []
EntropyMetric = ULossMetric()
Fishermetric = dFILInverseMetric()

# for j, data in enumerate(tqdm.tqdm(testloader)): # 对testloader遍历
for j, data in enumerate(tqdm.tqdm(one_data_loader)): # 测试第一个testloader
    images, labels = data
    images, labels = images.to(args['device']), labels.to(args['device'])
    with torch.no_grad():
        # inference
        outputs = client_net(images).clone().detach()
        # entropy 
        effecEntro= EntropyMetric._entropy_prob_batch(images) # H(x)
        print("effecEntro: ",effecEntro)

        # fisher
        outputs = client_net(images)
        # effectFisher = Fishermetric._computing_det_with_outputs(model=client_net, inputs=images, outputs=outputs,sigmas = 0.01)
        effectFisher = computing_det_with_outputs(model=client_net, inputs=images, outputs=outputs,sigmas = 0.01)

        # 存储
        effecInfo_same_layer_list.append(effecEntro-effectFisher)
        
print(f"Layer {split_layer} effecInfo: {sum(effecInfo_same_layer_list)/len(effecInfo_same_layer_list)}")
effecInfo_diff_layer_list.append(effecInfo_same_layer_list)

# 保存到csv中
matrix = np.array(effecInfo_diff_layer_list) # 有点大，x
transpose = matrix.T # 一行一条数据，一列代表一个layer 
# pd.DataFrame(data=transpose, columns=[i for i in split_layer_list]).to_csv(results_dir + f'effecInfo-bs{batch_size}.csv',index=False)
pd.DataFrame(data=transpose, columns=[split_layer]).to_csv(results_dir + f'effecInfo.csv',index=False)


  0%|          | 0/1 [00:00<?, ?it/s]

effecEntro:  0.587471032886238
I:  tensor([[ 7.5991,  0.0214, -0.1178,  ...,  0.1394, -0.0462,  0.2453],
        [ 0.0214,  9.0257, -0.1333,  ..., -0.0265, -0.7587, -0.6064],
        [-0.1178, -0.1333,  7.9887,  ...,  0.1769,  0.4307,  0.1134],
        ...,
        [ 0.1394, -0.0265,  0.1769,  ...,  7.8924,  0.1008, -0.3792],
        [-0.0462, -0.7587,  0.4307,  ...,  0.1008,  8.1875, -0.1230],
        [ 0.2453, -0.6064,  0.1134,  ..., -0.3792, -0.1230,  7.2750]],
       device='cuda:1')
det I:  tensor(-inf, device='cuda:1')
log det I:  tensor(-575.6401, device='cuda:1')
effect_fisher:  tensor(1139.1831, device='cuda:1')


100%|██████████| 1/1 [00:02<00:00,  2.01s/it]

I:  tensor([[ 6.5576e+00,  7.9808e-02, -1.4507e-01,  ...,  3.2295e-02,
          4.8863e-02,  1.3413e-01],
        [ 7.9808e-02,  7.9728e+00, -1.3367e-01,  ..., -1.0713e-01,
         -6.0581e-01, -5.0328e-01],
        [-1.4507e-01, -1.3367e-01,  7.2373e+00,  ...,  2.4361e-01,
          2.0677e-01,  3.4368e-03],
        ...,
        [ 3.2295e-02, -1.0713e-01,  2.4361e-01,  ...,  7.0409e+00,
          1.0706e-01, -3.2667e-01],
        [ 4.8863e-02, -6.0581e-01,  2.0677e-01,  ...,  1.0706e-01,
          7.1875e+00, -1.9418e-01],
        [ 1.3413e-01, -5.0328e-01,  3.4368e-03,  ..., -3.2667e-01,
         -1.9418e-01,  6.4056e+00]], device='cuda:1')
det I:  tensor(-inf, device='cuda:1')
log det I:  tensor(-674.2421, device='cuda:1')
effect_fisher:  tensor(1188.4841, device='cuda:1')
Layer 3 effecInfo: -1163.2461471311763



