# 1. 基础设置

In [1]:
'''
Author: Ruijun Deng
Date: 2024-08-14 16:59:47
LastEditTime: 2024-08-25 00:47:50
LastEditors: Ruijun Deng
FilePath: /PP-Split/examples/effectInfo/effectInfo.ipynb
Description: 
'''
# 导包
import torch
import os
import argparse
import pandas as pd
import tqdm
import numpy as np
from torch.nn.functional import avg_pool2d
# os.environ['NUMEXPR_MAX_THREADS'] = '48'

# 导入各个指标
import sys
sys.path.append('/home/dengruijun/data/FinTech/PP-Split/')
from ppsplit.quantification.distance_correlation.distCor import distCorMetric
from ppsplit.quantification.fisher_information.dFIL_inverse import dFILInverseMetric
from ppsplit.quantification.shannon_information.mutual_information import MuInfoMetric
from ppsplit.quantification.shannon_information.ULoss import ULossMetric
from ppsplit.quantification.rep_reading.rep_reader import PCA_Reader
from ppsplit.quantification.shannon_information.ITE_tools import Shannon_quantity

from target_model.task_select import get_dataloader_and_model,get_dataloader_and_model, \
    get_dataloader,get_models,get_infotopo_para

# utils
from ppsplit.utils.utils import create_dir

In [2]:
# %%
# nohup python -u effectInfo1.8.py > ../../results/20240702-effectiveInfo/Resnet18/effectiveInfo1.8/effectInfo1.8-pool4-layer11-gpu.log 2>&1 &
# nohup python -u effectInfo1.8.py > ../../results/20240702-effectiveInfo/VGG5/effectiveInfo1.8/effectInfo1.8-pool4-layer6-gpu.log 2>&1 &
args = {
        'device':torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
        # 'device':torch.device("cpu"),
        'dataset':'CIFAR10',
        # 'dataset':'bank',
        # 'dataset':'credit',
        # 'dataset':'purchase',
        # 'dataset':'Iris',
        # 'model': 'ResNet18',
        'model': 'VGG5',
        # 'result_dir': '20240702-FIL/',
        'result_dir': '20240702-effectiveInfo/',
        'oneData_bs': 1,
        'test_bs': 500,
        'train_bs': 1,
        'noise_scale': 0, # 防护措施
        'split_layer': 2,
        # 'test_num': 'invdFIL', # MI, invdFIL, distCor, ULoss,  # split layer [2,3,5,7,9,11] for ResNet18
        'test_num': 'effectiveInfo1.8.1',
        'no_dense':True,
        }
print(args['device'])
print(args)



cuda:0
{'device': device(type='cuda', index=0), 'dataset': 'CIFAR10', 'model': 'VGG5', 'result_dir': '20240702-effectiveInfo/', 'oneData_bs': 50, 'test_bs': 1, 'train_bs': 1, 'noise_scale': 0, 'split_layer': 2, 'test_num': 'effectiveInfo1.8', 'no_dense': True}


In [3]:
data_msg = get_dataloader(args)
model_msg = get_models(args)
infotopo_msg = get_infotopo_para(args)
msg = {**model_msg,**data_msg,**infotopo_msg}

# 数据集
one_data_loader,trainloader,testloader = data_msg['one_data_loader'],data_msg['trainloader'], data_msg['testloader']

# effectEntropy Infotopo参数
nb_of_values = msg['nb_of_values']
conv = msg['conv']
# conv = False
print("infotopo: nb_of_values: ",nb_of_values)

# 模型
client_net,decoder_net = model_msg['client_net'],model_msg['decoder_net']
decoder_route = model_msg['decoder_route']
image_deprocess = model_msg['image_deprocess']

# 路径
results_dir = model_msg['results_dir']
inverse_dir = results_dir + 'layer' + str(args['split_layer'])+'/'
data_type = 1 if args['dataset'] == 'CIFAR10' else 0
split_layer = args['split_layer']

print('results_dir:', results_dir)
print('inverse_dir:', inverse_dir)
print('decoder_route:', decoder_route)

create_dir(results_dir)

# client_net使用
client_net = client_net.to(args['device'])
client_net.eval()

# for n, p in client_net.named_parameters():
#     print(n, p.shape)

features.0.weight
features.0.bias
features.1.weight
features.1.bias
features.1.running_mean
features.1.running_var
features.1.num_batches_tracked
features.4.weight
features.4.bias
features.5.weight
features.5.bias
features.5.running_mean
features.5.running_var
features.5.num_batches_tracked
train decoder model...
infotopo: nb_of_values:  36
results_dir: ../../results/20240702-effectiveInfo//VGG5/effectiveInfo1.8/
inverse_dir: ../../results/20240702-effectiveInfo//VGG5/effectiveInfo1.8/layer2/
decoder_route: ../../results/20240702-effectiveInfo//VGG5/effectiveInfo1.8//Decoder-layer2.pth


VGG(
  (features): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Tanh()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Tanh()
  )
  (denses): Sequential()
)

# 5. effective information

## 5.1 effect Fisher

In [7]:
# effective fisher 计算函数
import torch.autograd.functional as F
import torch
import time

# nips23
from torch.autograd.functional import jvp
import random
import math


import pandas as pd


# 自己实现的、规规矩矩的 jacobian + logdet 全部用torch的函数
def computing_det_with_outputs(model, inputs, outputs, sigmas): # sigma_square
        # batchsize:
        batch_size = inputs.shape[0] # 一个batch的样本数目
        output_size = outputs[0].numel() # 一个样本的outputs长度
        input_size = inputs[0].numel() # 一个样本的outputs长度
        effect_fisher_sum = 0.0

        # 遍历单个样本: 换数据
        for i in range(batch_size):
            input_i = inputs[i].unsqueeze(0)

            # 计算jacobian
            J = F.jacobian(model, input_i)
            # J = J.reshape(J.shape[0],outputs.numel(),inputs.numel()) # (batch, out_size, in_size)
            J = J.reshape(output_size, input_size) # (batch, out_size, in_size)
            # print(f"J2.shape: {J.shape}, J2.prod: {torch.prod(torch.tensor(list(J.shape)))}")
            # 计算eta
            JtJ = torch.matmul(J.t(), J)
            I = 1.0/(sigmas)*JtJ
            # ddFIL  = I.trace().div(input_size*input_size)

            # 储存I
            # I_np = I.cpu().detach().numpy()
            # df = pd.DataFrame(I_np)
            # df.to_csv(f'{i}.csv',index=False,header=False)

            # print("I: ", I)
            # w = torch.det(I)
            # print('det I: ', I.det().log())

            f1 = input_size * torch.log(2*torch.pi*torch.exp(torch.tensor(1.0)))
            f2 = torch.logdet(I)
            # print('log det I: ',f2 )
            print('f1: ' ,f1)
            print('f2: ' ,f2)
            effect_fisher = 0.5 * (f1 - f2)
            effect_fisher_sum += effect_fisher

            print("effect_fisher: " , effect_fisher)

        # print("Jt*J: ", JtJ)
        # print("Jt*J: ", JtJ.shape, JtJ)
        # print("I.shape: ", I.shape)
        # eta = dFIL
        # print(f"eta: {eta}")
        # print('t2-t1=',t2-t1, 't3-t2', t3-t2)
        effect_fisher_mean = effect_fisher_sum / batch_size
        return effect_fisher_mean.cpu().detach().numpy()


# 用diag 来化简
def computing_diag_det_with_outputs(model, inputs, outputs, sigmas=1.0): # sigma_square
    # batchsize:
    batch_size = inputs.shape[0] # 一个batch的样本数目
    output_size = outputs[0].numel() # 一个样本的outputs长度
    input_size = inputs[0].numel() # 一个样本的outputs长度
    effect_fisher_sum = 0.0

    # avg
    I_diagonal_batch_avg = torch.zeros(input_size).to(args['device']) # batch上做平均
    print("I_diagonal_batch_avg: ",I_diagonal_batch_avg.shape)
    f2_2_avg_outer = torch.tensor(0.0).to(args['device'])
    f2_avg_outer = torch.tensor(0.0).to(args['device'])
    
    # effecti_fisher第一部分
    f1 = input_size * torch.log(2*torch.pi*torch.exp(torch.tensor(1.0)))

    # f2需要求平均？
    # 遍历单个样本: 换数据
    for i in range(batch_size): # 对每个样本
        input_i = inputs[i].unsqueeze(0)

        # 计算jacobian
        J = F.jacobian(model, input_i)
        # J = J.reshape(J.shape[0],outputs.numel(),inputs.numel()) # (batch, out_size, in_size)
        J = J.reshape(output_size, input_size) # (batch, out_size, in_size)
        # print(f"J2.shape: {J.shape}, J2.prod: {torch.prod(torch.tensor(list(J.shape)))}")
        # 计算eta
        JtJ = torch.matmul(J.t(), J)
        I = 1.0/(sigmas)*JtJ

        # I = JtJ
        # print("I: ", I)
        # diagonal fisher information matrix (approximation)
        I_diagonal = torch.diagonal(I,dim1=0,dim2=1) # vector
        # print("I_diagonal: ",I_diagonal.shape)

        I_diag = torch.diag_embed(I_diagonal) # matrix
        # print('drj trace: ',torch.trace(I_diag))
        
        # batch的平均
        I_diagonal_batch_avg += I_diagonal / (batch_size)

        # # 储存I
        # I_np = I.cpu().detach().numpy()
        # df = pd.DataFrame(I_np)
        # df.to_csv(f'{i}.csv',index=False,header=False)

        # print("I: ", I)
        # w = torch.det(I)
        # print('det I: ', I.det().log())
        
        try:
            s,f2 = torch.slogdet(I) # 直接用torch计算
            if s <= 0:
                raise RuntimeError("sign <=0 ")
            print('f2: ', f2)
        except RuntimeError as e:
            print("logdet计算报错")
        # f2_1 = torch.logdet(I_diag) # 和后面的是一样的
        f2_2 = torch.sum(torch.log(I_diagonal+1e-10)) # /I_diagonal.numel() # diagonal后计算

        f2_2_avg_outer += f2_2 / batch_size
        # f2_avg_outer += f2 / batch_size

        # print('log det I: ', f2)
        # print('f1: ' , f1)
        # print('f2: ', f2)
        # print('f2_1: ', f2_1)
        print('f2_2: ', f2_2)

    f2_2_avg_inner = torch.sum(torch.log(I_diagonal_batch_avg+1e-10)) # 用平均后的diagonal 计算

    print('f2_avg_outer: ',f2_avg_outer)
    print('f2_2_avg_outer: ',f2_2_avg_outer)
    # print('f2_2_avg_inner: ',f2_2_avg_inner)
    print('f1: ',f1)

    # effect_fisher = 0.5 * (f1 - f2_2_avg_inner)
    effect_fisher = 0.5 * (f1 - f2_2_avg_outer)
    # effect_fisher = 0.5 * (f1 - f2_avg_outer)
    # effect_fisher_sum+=effect_fisher

    # print("effect_fisher: ",effect_fisher)
    
    # effect_fisher_mean = effect_fisher_sum / batch_size
    return effect_fisher.cpu().detach().numpy()

# arxiv'21 迁移学习领域的log det fisher 计算

# nips'23 fisher trace 计算
def calc_tr(net, x, device, sigmas=0.01, subsample=-1, jvp_parallelism=1): # nips'23 源码
    # 并行粒度=1 意思是，每次只处理一个维度

    print(f'x.shape: {x.shape}')
    
    # 定义一个局部函数 jvp_func**：这个函数接受两个参数 x 和 tgt，并返回 net.forward_first 方法的雅可比向量积（JVP）。
    # 这意味着 jvp_func 用于计算网络对于输入 x 在方向 tgt 上的一阶导数
    # tgt 计算雅各比向量积的向量
    def jvp_func(x, tgt): 
        # return jvp(net.forward_first, (x,), (tgt,)) #返回 outputs, jacobian product
        return jvp(net.forward, (x,), (tgt,)) #返回 outputs, jacobian product

    # 获取一个batch中第一个数据的维度？d代表的是批次中第一个数据点展平后的特征数量，即输入数据的维度。
    d = x[0].flatten().shape[0] # 把一个batch的x展平，获取input dim

    # 用于存储每个输入数据点的迹，求迹的和。
    tr = torch.zeros(x.shape[0], dtype=x.dtype).to(device)
    print(f'tr.shape: {tr.shape}')

    samples = range(d)

    for j in range(math.ceil(d)): # 对于每个数据块 # 每个数据块包含不同的维度
        tgts = []

        # 遍历每个数据块中的每个维度
        '''
        在这个函数中，tgt 是用于计算雅可比向量积（JVP）的向量。具体来说，tgt 的作用如下：
        构建雅可比向量积的向量：tgt 是一个与输入 x 形状相同的张量，但它的元素大部分为零，只有一个特定位置的元素为 1。这个特定位置对应于我们在计算迹时关注的特征维度。
        计算 JVP：在 helper 函数中，tgt 被传递给 jvp_func，用于计算网络对于输入 x 在方向 tgt 上的一阶导数。具体来说，jvp_func 计算的是网络输出相对于输入 x 的雅可比矩阵与 tgt 的乘积。
        估计迹：通过在不同的特征维度上重复上述过程，可以估计网络对于输入数据的迹。迹的计算涉及到对所有特征维度的导数进行求和，而 tgt 的作用就是在每次计算时只关注一个特征维度。
        简而言之，tgt 是一个用于选择特定特征维度的向量，通过它可以逐个计算每个特征维度的导数，从而最终估计整个输入数据的迹。
        '''
        # 对于每一列，构建tgt， 形状和x一样，但是只有一列是1，其他是0
        for k in samples[j:(j+1)]: # 提取整个batch中每个数据的特定维度
            tgt = torch.zeros_like(x).reshape(x.shape[0], -1) # 按照batch 排列？# 雅各比向量积的
            # 除了当前样本索引 k 对应的元素设置为 1。这相当于在计算迹时，每次只关注一个特征维度。
            tgt[:, k] = 1. # 提取tgt所有的样本的k的特征 计算雅各比向量积的向量，可用于计算trace，所有行的特定几列有1值
            tgt = tgt.reshape(x.shape) # 又变回x的形状
            # print(f'tgt.shape: {tgt.shape}')
            tgts.append(tgt) 
        tgts = torch.stack(tgts) # 把多个维度的tgt vstack，一行一行拼接起来，一行是一个维度。


        # 定义一个辅助函数 helper，该函数接受一个目标张量 tgt并返回一个迹的张量和一个值的张量。
        # jvp wrapper，遍历每个batchsize
        def helper(tgt,x=x): # x是一个batch的数据
            batch_size = x.shape[0]
            grads_list = []
            for i in range(batch_size): # 对每个样本
                _, grad = jvp_func(x[i].unsqueeze(0), tgt[i].unsqueeze(0))  # 对每个批次元素调用jvp_func
                grads_list.append(grad)
            # 将结果列表转换为张量, 多个batch的给stack起来
            grad = torch.stack(grads_list)

            # print('grad.shape: ',grad.shape)
            # print('grad: ',grad)

            # grad.reshape(sum(list(x.shape)),-1)
            # I_np = grad.cpu().detach().numpy()
            # df = pd.DataFrame(I_np)
            # df.to_csv(f'{time.time()}.csv',index=False,header=False)

            # print('grad*grad: ',grad*grad)
            # vals, grad = vmap(jvp_func, randomness='same')(x, tgt)
            
            # print('grad shape: ', grad.shape)
            # 因此，矩阵平方的迹和迹的平方通常是不相等的。
            # 先求平方再求迹
            # range(1, len(grad.shape)) 生成一个从 1 到 len(grad.shape) - 1 的整数序列。
            # torch.sum 函数对张量的指定维度进行求和。
            # 这里，它对 grad * grad 沿着 tuple(range(1, len(grad.shape))) 指定的维度进行求和。
            # ？为什么呢？--- 前面有个unsqueeze？
            return torch.sum(grad * grad, dim=tuple(range(1, len(grad.shape))))

        # vmap被替换
        # 遍历每个数据块
        trs,vals = [],[]
        for item in tgts: # 对每个维度
            trs_ = helper(item,x)
            trs.append(trs_) # 每个batch对应一个向量
            # print('trs_: ',trs_.shape)
        trs= torch.stack(trs) 
        trs = torch.log(trs+1e-10) # 为了求 f2 logdet
        # print('trs: ',trs.shape, trs)

        # 对数据，的每个维度的迹求和
        tr += trs.sum(dim=0) 
    print('tr: ',tr)

    return tr  # squeeze removes one dimension jvp puts

def f2_trace(net,x,device):
    tr = calc_tr(net, x, device, sigmas=0.01, subsample=-1, jvp_parallelism=1)
    # f2 = torch.log(tr)
    return tr

In [None]:
# effect fisher 指标
effectFisher_same_layer_list = []
Fishermetric = dFILInverseMetric()
# for j, data in enumerate(tqdm.tqdm(testloader)): # 对testloader遍历
for j, data in enumerate(tqdm.tqdm(one_data_loader)): # 测试第一个testloader
    images, labels = data
    # print('labels: ',labels)
    images, labels = images.to(args['device']), labels.to(args['device'])
    if conv:
        print('images: ', images.shape)
        images= avg_pool2d(images,kernel_size=4)
        print('images_pooled: ',images.shape)
    with torch.no_grad():
        # inference
        outputs = client_net(images).clone().detach()
        # fisher
        outputs = client_net(images)
        # images = images.unsqueeze(0)

        # drj实现的fisher 矩阵 
        # effectFisher = Fishermetric._computing_det_with_outputs(model=client_net, inputs=images, outputs=outputs,sigmas = 0.01)
        # effectFisher = computing_det_with_outputs(model=client_net, inputs=images, outputs=outputs,sigmas = 0.01)
        # effectFisher10 = computing_diag_det_with_outputs(model=client_net, inputs=images, outputs=outputs,sigmas = 1.0)
        effectFisher10 = computing_diag_det_with_outputs(model=client_net, inputs=images, outputs=outputs,sigmas = 0.01)
        print('effectFisher(sigma=1.0): ',effectFisher10)
        # effectFisher001 = computing_diag_det_with_outputs(model=client_net, inputs=images, outputs=outputs,sigmas = 0.01)
        # print('effectFisher(sigma=0.01): ',effectFisher001)

        # meang 实现的fisher trace
        # images = images.unsqueeze(0) # ?
        # trace = calc_tr(client_net, images, args['device'], sigmas=0.01, subsample=-1, jvp_parallelism=1)
        
        # f2_trace = f2_trace(client_net, images, args['device'])
        # print('f2_trace: ',f2_trace)

        # effectFisher_same_layer_list.append(effectFisher)
        
# print(f"Layer {split_layer} effecInfo: {sum(effecInfo_same_la yer_list)/len(effecInfo_same_layer_list)}")
# print(f"effectfisher: {sum(effectFisher_same_layer_list)/len(effectFisher_same_layer_list)}")

## 5.2 effect uniform

In [5]:
# Effect uniform
import numpy as np
import torch
def calculate_effect_normalize(input_vector,interval=2.0):
    # 确定每个维度的取值范围
    a = torch.tensor(interval)
    # 计算每个维度的熵
    entropy_per_dimension = torch.log(a)
    # 总熵是每个维度的熵的总和
    size = input_vector.numel()
    total_entropy = size * entropy_per_dimension
    return total_entropy

# def calculate_effect_normalize_batch(inputs,interval=2.0):
#     # batchsize:
#     batch_size = inputs.shape[0] # 一个batch的样本数目
#     total_entropy = 0.0

#     for i in range(batch_size):
#         input_i = inputs[i].unsqueeze(0)
#         total_entropy += calculate_effect_normalize_hetero(input_i)
    
#     return total_entropy/batch_size


def calculate_effect_normalize_hetero(input_vector):
    size = input_vector.numel()
    input_flattened = input_vector.reshape(-1)
    total_entropy_single = 0.0
    for i in range(size):
        l = 2*torch.min(torch.abs(input_flattened[i]-torch.tensor(-1.0)),torch.abs(input_flattened[i]-torch.tensor(1.0)))
        total_entropy_single += torch.log(l+1e-10)
    print(f"entropy for single_input: {total_entropy_single}")
    return total_entropy_single 


def calculate_effect_normalize_hetero_batch(inputs):
    # batchsize:
    batch_size = inputs.shape[0] # 一个batch的样本数目
    total_entropy = 0.0

    for i in range(batch_size):
        input_i = inputs[i].unsqueeze(0)
        total_entropy += calculate_effect_normalize_hetero(input_i)
    
    return total_entropy/batch_size

# 示例向量
vector = torch.rand(192)
entropy = calculate_effect_normalize(vector)
print(f"Entropy of the vector: {entropy}")

# print(type(entropy))

Entropy of the vector: 133.08425903320312


## 5.3 effect Entropy

In [6]:
# effect entropy 计算函数
import math
import numpy as np
def shannon_entropy_pyent(time_series): # 这个甚至不适合连续值吧
    """Calculate Shannon Entropy of the sample data.

    Parameters
    ----------
    time_series: np.ndarray | list[str]

    Returns
    -------
    ent: float
        The Shannon Entropy as float value
    """

    # Calculate frequency counts
    _, counts = np.unique(time_series, return_counts=True)
    total_count = len(time_series)
    # print('counts: ', counts)
    # print("total_count: ",total_count)

    # Calculate frequencies and Shannon entropy
    frequencies = counts / total_count
    # print("freq: ",frequencies)
    ent = -np.sum(frequencies * np.log(frequencies))

    return ent

# import infotopo
import ppsplit.quantification.shannon_information.infotopo as infotopo
from torch.nn.functional import avg_pool2d
def shannon_entropy_infotopo(x, conv = False):
    information_top = infotopo.infotopo(dimension_max = x.shape[1],
                                        dimension_tot = x.shape[1],
                                        sample_size = x.shape[0],
                                        # nb_of_values = nb_of_values, # 不是很懂这个意思，为什么iris对应9？
                                        # nb_of_values = 17, # 不是很懂这个意思，为什么iris对应9？
                                        nb_of_values = 9, # 不是很懂这个意思，为什么iris对应9？
                                        # forward_computation_mode = True,
                                        )
    if conv:
        images_convol = information_top.convolutional_patchs(x)
        print('images_convol: ',images_convol.shape)
        x = images_convol

    # 计算联合分布的概率？（全排列）
    # joint_prob = information_top._compute_probability(x)
    # print('joint_prob: ',joint_prob)
    
    # 计算联合熵（全排列的）
    joint_prob_ent = information_top.simplicial_entropies_decomposition(x) # log2
    new_joint_prob_ent = {key: value * np.log(2) for key, value in joint_prob_ent.items()} #ln 转2为底 成 e为底
    
    # print("joint_entropy: ",new_joint_prob_ent)
    # ent = information_top._compute_forward_entropies(x)
    # information_top.entropy_simplicial_lanscape(joint_prob_ent) # 画图
    # ent = _entropy(np.array(list(new_joint_prob_ent.values())))

    joint_entropy_final = list(new_joint_prob_ent.values())[-1]
    return joint_entropy_final


In [12]:
# effective entorpy

from pyentrp import entropy as ent

for j, data in enumerate(tqdm.tqdm(one_data_loader)): # 测试第一个testloader
# for j, data in enumerate(tqdm.tqdm(testloader)): 
    images, labels = data
    images, labels = images.to(args['device']), labels.to(args['device'])
    # print(images)
    with torch.no_grad():

        # infotopo # 要给它降维
        if conv:
            print('images: ', images.shape)
            images = avg_pool2d(images,kernel_size=4)
            print('images_pooled: ',images.shape)

        # ITE
        effectEntro = Shannon_quantity(images)
        print("effectEntro_ite: ",effectEntro)
        
        # PyEntropy
        effectEntro_pyent = 0.0
        for i in range(len(images[0])): # 对每个维度
            effectEntro_pyent  += shannon_entropy_pyent(images[:,i].flatten().detach().cpu().numpy())
            # print('effectEntro_pyent: ',effectEntro_pyent)
        # effectEntro_pyent = shannon_entropy(images.flatten(start_dim=1).detach().cpu().numpy())
        print('effectEntro_pyent: ',effectEntro_pyent)

            
        images_flattened = images.flatten(start_dim=1).detach().cpu().numpy()
        
        effectEntro_infotopo = shannon_entropy_infotopo(images_flattened,conv=conv)
        print('effectEntro_infotopo: ',effectEntro_infotopo)




  0%|          | 0/1 [00:00<?, ?it/s]

images:  torch.Size([500, 3, 32, 32])
images_pooled:  torch.Size([500, 3, 8, 8])
effectEntro_ite:  64.05110313624053
effectEntro_pyent:  27.621539482380474
original data_matrix.shape:  (500, 192)
data_matrix_new.shape:  (87840, 169)
images_convol:  (87840, 169)
data_matrix [[5 6 6 ... 3 3 3]
 [6 6 6 ... 3 3 2]
 [6 6 5 ... 3 2 2]
 ...
 [5 3 3 ... 7 8 7]
 [3 3 3 ... 8 7 7]
 [3 3 3 ... 7 7 7]]
Percent of tuples processed : 0


100%|██████████| 1/1 [00:08<00:00,  8.98s/it]

effectEntro_infotopo:  11.383272256729523





## 5.3 effectInfo

In [8]:
effecInfo_diff_layer_list = []
effecInfo_same_layer_list = []
EntropyMetric = ULossMetric()
Fishermetric = dFILInverseMetric()


InversedFIL_same_layer_list = []
NBatch = len(testloader)
image_dimension = -1

for j, data in enumerate(tqdm.tqdm(testloader)): # 对testloader遍历
# for j, data in enumerate(tqdm.tqdm(one_data_loader)): # 测试第一个testloader
    images, labels = data
    images, labels = images.to(args['device']), labels.to(args['device'])
    with torch.no_grad():
        print('images: ', images.shape)
        
        if conv:
            images= avg_pool2d(images,kernel_size=4)
            print('images_pooled: ',images.shape)
        if image_dimension ==-1:
            image_dimension = images[0].numel()


        # effect entropy 
        # infotopo
        # effectEntro = shannon_entropy_infotopo(images.flatten(start_dim=1).detach().cpu().numpy(), conv)
        # 自定一的prob entropy
        # effecEntro= EntropyMetric._entropy_prob_batch(images) # H(x)

        # ITE
        effectEntro = Shannon_quantity(images)
        print("effectEntro_ite: ",effectEntro)

        # PyEntropy
        # effectEntro_pyent = 0.0
        # for i in range(len(images[0])): # 对每个维度
        #     effectEntro_pyent  += shannon_entropy_pyent(images[:,i].flatten().detach().cpu().numpy())
        #     # print('effectEntro_pyent: ',effectEntro_pyent)
        # print('effectEntro_pyent: ',effectEntro_pyent)

        # effect fisher
            # inference
        outputs = client_net(images).clone().detach()
        # inverse_dFIL = Fishermetric.quantify(model=client_net, inputs=images, outputs=outputs,sigmas = 0.01, with_outputs=True)
        # effectFisher = computing_det_with_outputs(model=client_net, inputs=images, outputs=outputs,sigmas = 0.01)
        # effectFisher = computing_diag_det_with_outputs(model=client_net, inputs=images, outputs=outputs,sigmas = 0.01)
        # effectFisher = computing_diag_det_with_outputs(model=client_net, inputs=images, outputs=outputs,sigmas = 0.01)
        effectFisher = computing_diag_det_with_outputs(model=client_net, inputs=images, outputs=outputs,sigmas = 0.01)

        # effect uniform
        # one_image = images[0]
        # effectUniform = calculate_effect_normalize(one_image.flatten())
        effectUniform = calculate_effect_normalize_hetero_batch(images)
        # uniform interval
        effectUniform_interval = calculate_effect_normalize(images[0]) # 用第一张图片就可


        # 存储
        # effecInfo_same_layer_list.append(effectEntro-effectFisher)
        effectInfo = (effectEntro-effectFisher)-(effectUniform_interval-effectUniform)
        effecInfo_same_layer_list.append(effectInfo.detach().cpu().numpy())
        # InversedFIL_same_layer_list.append(inverse_dFIL)

        # 打印一下
        print("effecEntro: ", effectEntro)
        print("effecFisher: ", effectFisher)
        print("effectUniform: ",effectUniform)
        print('effectUniform_interval: ',effectUniform_interval)
        # print("inverse_dFIL: ",inverse_dFIL)

avg_effectInfo = sum(effecInfo_same_layer_list)/len(effecInfo_same_layer_list)
avg_d_effectInfo = avg_effectInfo/image_dimension
print(f"Layer {args['split_layer']} effecInfo: {avg_effectInfo}") # 在多个batch上再求平均，这里有点问题。
print(f"Layer {args['split_layer']} effecInfo_avg_d: {avg_d_effectInfo}") # 在多个batch上再求平均，这里有点问题。
# print(f"Layer {args['split_layer']} InversedFIL: {sum(InversedFIL_same_layer_list)/len(InversedFIL_same_layer_list)}")
effecInfo_diff_layer_list.append(effecInfo_same_layer_list)

# 保存到csv中
matrix = np.array(effecInfo_diff_layer_list) # 有点大，x
transpose = matrix.T # 一行一条数据，一列代表一个layer 
# pd.DataFrame(data=transpose, columns=[i for i in split_layer_list]).to_csv(results_dir + f'effecInfo-bs{batch_size}.csv',index=False)
save_route = results_dir + f'effecInfo-10000.csv'
# if os.path.exists(save_route):
#     df = pd.read_csv(save_route)
#     df[args['split_layer']] = transpose
#     df.to_csv(save_route,index=False)
# else:
#     pd.DataFrame(data=transpose, columns=[args['split_layer']]).to_csv(save_route,index=False)



  0%|          | 0/1 [00:00<?, ?it/s]

images:  torch.Size([50, 3, 32, 32])
images_pooled:  torch.Size([50, 3, 8, 8])
effectEntro_ite:  103.45039928117214
I_diagonal_batch_avg:  torch.Size([192])
logdet计算报错
f2_2:  tensor(1388.1731, device='cuda:0')
logdet计算报错
f2_2:  tensor(1347.2361, device='cuda:0')
logdet计算报错
f2_2:  tensor(1399.8623, device='cuda:0')
logdet计算报错
f2_2:  tensor(1400.6014, device='cuda:0')
logdet计算报错
f2_2:  tensor(1478.2726, device='cuda:0')
logdet计算报错
f2_2:  tensor(1450.1200, device='cuda:0')
logdet计算报错
f2_2:  tensor(1269.6251, device='cuda:0')
logdet计算报错
f2_2:  tensor(1453.4626, device='cuda:0')
logdet计算报错
f2_2:  tensor(1445.1406, device='cuda:0')
logdet计算报错
f2_2:  tensor(1349.1754, device='cuda:0')
logdet计算报错
f2_2:  tensor(1480.9639, device='cuda:0')
logdet计算报错
f2_2:  tensor(1352.7544, device='cuda:0')
logdet计算报错
f2_2:  tensor(1393.3760, device='cuda:0')
logdet计算报错
f2_2:  tensor(1274.2625, device='cuda:0')
logdet计算报错
f2_2:  tensor(1397.9250, device='cuda:0')
logdet计算报错
f2_2:  tensor(1360.6301, device='cuda

100%|██████████| 1/1 [00:25<00:00, 25.11s/it]

entropy for single_input: 53.111629486083984
effecEntro:  103.45039928117214
effecFisher:  -423.70474
effectUniform:  tensor(15.3594, device='cuda:0')
effectUniform_interval:  tensor(133.0843)
Layer 2 effecInfo: 409.4302673339844





In [None]:
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
scaler = MinMaxScaler(feature_range=(0, 1))
# x = next(iter(one_data_loader))
for i,_ in one_data_loader:
        # print(i)
        y = scaler.fit_transform(i)
# print(y)

In [None]:
y = torch.rand(10,341) # 最大能接受341个特征 
z = torch.randn(100,200)
e1 = Shannon_quantity(y)
e2 = Shannon_quantity(z)
print(e1)
print(e2)
# print(np.log(0))

In [None]:
import pandas as pd
dataPath = '/home/dengruijun/data/FinTech/DATASET/kaggle-dataset/bank/bank-additional-full.csv'

[X_train, y_train], [X_test, y_test] = preprocess_bank_dataset(dataPath)
df = pd.DataFrame(X_test)
df.describe()

In [None]:
# 测试diagonal
import torch
x = torch.randn(6,5)
y = torch.diagonal(x)
z = torch.diag_embed(y)
print(x)
print(y)
print(z)


In [None]:
import numpy as np
print(np.sqrt(17))

In [None]:
import torch
from torch.autograd.functional import jvp
x = torch.randn([2,2], requires_grad=True)
f = lambda x: x * torch.tensor([1., 2])
value, grad = jvp(f, (x,), (torch.tensor([[1,1],[1,0]]),))
print(value)
print(grad)

In [None]:
# 改一下 resnet的 模型参数的key
import sys
sys.path.append('/home/dengruijun/data/FinTech/PP-Split/')
import torch

unit_net_route = '/home/dengruijun/data/FinTech/PP-Split/results/trained_models/CIFAR10-models/ResNet18/32bs-ep20-relu-max-adam/resnet18-drj.pth' # VGG5-BN+Tanh # 存储的是模型参数，不包括模型结构
from target_model.task_select import *
client_net = resnet18(pretrained=False, split_layer=13, bottleneck_dim=-1, num_classes=10, activation='gelu', pooling='avg')
pweights = torch.load(unit_net_route)
cweights = client_net.state_dict()

print(len(pweights.keys()),pweights.keys())
print(len(cweights.keys()),cweights.keys())


# new_key = {}
for keyp,keyc in zip(pweights.keys(),cweights.keys()):
    print(keyp,'\t\t\t',keyc)

for i,key in enumerate(pweights.keys()):
    if i<122:
        continue
    print(key)

    # new_key[key.replace('model.','')] = pweights[key]

new_key = {}
for key in cweights.keys():
    new_key[key] = pweights[key.replace('selected_layers','model.layers')]
torch.save(new_key,'/home/dengruijun/data/FinTech/PP-Split/results/trained_models/CIFAR10-models/ResNet18/32bs-ep20-relu-max-adam/resnet18-drj-small.pth')

In [11]:
import torch
torch.cuda.is_available()
x = torch.tensor([3.0,4.0])
x = x.to('cuda:0')