# 计算完整数组的皮尔逊相关性

In [14]:
import json
import pandas as pd
from scipy.stats import pearsonr
import re

def cal_all_data_pearson(filename1,filename2,rootpath="/root/autodl-tmp/methods/mix_quantize/model_info/llama2-7b/"):
    with open(rootpath+filename1, 'r') as f:
        data1 = json.load(f)
    
    with open(rootpath+filename2, 'r') as f:
        data2 = json.load(f)
    
    # 将数据转换为 DataFrame 格式
    df1 = pd.DataFrame(data1).T  # 转置以确保行代表样本
    df2 = pd.DataFrame(data2).T
    
    # 将两组数据展平为一维数组
    flat_data1 = df1.values.flatten()
    flat_data2 = df2.values.flatten()
    
    # 计算总体皮尔逊相关系数
    overall_corr, _ = pearsonr(flat_data1, flat_data2)
    print(f"Overall Pearson Correlation: {overall_corr:.4f}")

def cal_all_data_pearson_with_difference(filename1,filename2,rootpath="/root/autodl-tmp/methods/mix_quantize/model_info/llama2-7b/"):
    with open(rootpath+filename1, 'r') as f:
        data1 = json.load(f)
    
    with open(rootpath+filename2, 'r') as f:
        data2 = json.load(f)

    original_perplexity = float(re.search(r"(\d+\.\d+)", filename2).group(1))
    
    # 将数据转换为 DataFrame 格式
    df1 = pd.DataFrame(data1).T  # 转置以确保行代表样本
    df2 = pd.DataFrame(data2).T 
    
    # 将两组数据展平为一维数组
    flat_data1 = df1.values.flatten()
    flat_data2 = df2.values.flatten() - original_perplexity
    
    # 计算总体皮尔逊相关系数
    overall_corr, _ = pearsonr(flat_data1, flat_data2)
    print(f"Overall Pearson Correlation: {overall_corr:.4f}")

In [15]:
filename1 = "fisher_data_1024_2_2024-12-21-15-55-27.json"
filename2 = "modified_perplexitys_1024_2_2024-12-21-15-55-27_2.7421875.json"
cal_all_data_pearson(filename1,filename2)

Overall Pearson Correlation: 0.1961


In [18]:
cal_all_data_pearson_with_difference(filename1,filename2)

Overall Pearson Correlation: 0.1961


In [19]:
filename1 = "fisher_data_1024_10_2024-12-21-16-44-29.json"
filename2 = "modified_perplexitys_1024_10_2024-12-21-16-44-29_2.60546875.json"
cal_all_data_pearson(filename1,filename2)

Overall Pearson Correlation: -0.0364


In [20]:
filename1 = "fisher_data_4096_20_2024-12-21-15-40-48.json"
filename2 = "modified_perplexitys_4096_20_2024-12-21-15-40-48_2.44140625.json"
cal_all_data_pearson(filename1,filename2)

Overall Pearson Correlation: 0.2216


# 计算每个key的皮尔逊值

In [21]:
import json
import pandas as pd
from scipy.stats import pearsonr

def cal_key_pearson(filename1,filename2,rootpath="/root/autodl-tmp/methods/mix_quantize/model_info/llama2-7b/"):
    with open(rootpath+filename1, 'r') as f:
        data1 = json.load(f)
    
    with open(rootpath+filename2, 'r') as f:
        data2 = json.load(f)
        
    # 将数据转换为 DataFrame 格式
    df1 = pd.DataFrame(data1).T  # 转置以确保行代表样本
    df2 = pd.DataFrame(data2).T
    
    # 对应列计算皮尔逊相关性
    correlation_results = {}
    for column in df1.columns:
        if column in df2.columns:
            corr, _ = pearsonr(df1[column], df2[column])
            correlation_results[column] = corr
    
    # 输出结果
    for key, value in correlation_results.items():
        print(f"Column: {key}, Pearson Correlation: {value:.4f}")

In [22]:
filename1 = "fisher_data_1024_2_2024-12-21-15-55-27.json"
filename2 = "modified_perplexitys_1024_2_2024-12-21-15-55-27_2.7421875.json"
cal_key_pearson(filename1,filename2)

Column: self_attn.q_proj.weight, Pearson Correlation: 0.4893
Column: self_attn.k_proj.weight, Pearson Correlation: 0.0602
Column: self_attn.v_proj.weight, Pearson Correlation: -0.0854
Column: self_attn.o_proj.weight, Pearson Correlation: 0.1791
Column: mlp.gate_proj.weight, Pearson Correlation: 0.2548
Column: mlp.up_proj.weight, Pearson Correlation: 0.1098
Column: mlp.down_proj.weight, Pearson Correlation: 0.2691


In [23]:
filename1 = "fisher_data_4096_20_2024-12-21-15-40-48.json"
filename2 = "modified_perplexitys_4096_20_2024-12-21-15-40-48_2.44140625.json"
cal_key_pearson(filename1,filename2)

Column: self_attn.q_proj.weight, Pearson Correlation: 0.1740
Column: self_attn.k_proj.weight, Pearson Correlation: 0.4538
Column: self_attn.v_proj.weight, Pearson Correlation: -0.1413
Column: self_attn.o_proj.weight, Pearson Correlation: 0.0323
Column: mlp.gate_proj.weight, Pearson Correlation: 0.3305
Column: mlp.up_proj.weight, Pearson Correlation: 0.3184
Column: mlp.down_proj.weight, Pearson Correlation: 0.4135


# 绘制不同位宽设定下的结果表格

In [14]:
import json
import pandas as pd

rootpath="/root/autodl-tmp/methods/mix_quantize/model_info/llama2-7b/"
# Load the data from the provided files
with open(rootpath+'fisher_data_1024_2_2024-12-22-19-12-03.json', 'r') as f:
    fisher_data = json.load(f)
with open(rootpath+'modified_perplexitys_1024_2_2024-12-22-19-12-03_2.7421875.json', 'r') as f:
    perplexity_data = json.load(f)

# 假设数据已经按照量化位宽 -> block_id -> layer_name组织
# reorganized_fisher_data = {width: {block_id: fisher_data[block_id] for block_id in fisher_data.keys()} for width, fisher_data in zip(bit_widths, fisher_data.values())}
# reorganized_perplexity_data = {width: {block_id: perplexity_data[block_id] for block_id in perplexity_data.keys()} for width, perplexity_data in zip(bit_widths, perplexity_data.values())}


In [34]:
# 创建一个用于存储结果的列表
bit_widths = [8,4,3,2]
reorganized_results = []

for block_id in fisher_data["8"].keys():  # 假设 "8" 是一个示例位宽
    for layer in fisher_data["8"][block_id].keys():
        row = {"Block ID": block_id, "Layer Name": layer}
        for width in bit_widths:
            fisher_value = fisher_data.get(f"{width}", {}).get(block_id, {}).get(layer)
            perplexity_value = perplexity_data.get(f"{width}", {}).get(block_id, {}).get(layer)
                     
            if fisher_value is not None and perplexity_value is not None:
                row[f"Width {width} (Fisher, Perplexity)"] = ("{:.2f}".format(fisher_value), "{:.5f}".format(perplexity_value))
            else:
                row[f"Width {width} (Fisher, Perplexity)"] = (None, None)  # 处理缺失值
        
        reorganized_results.append(row)


# 创建DataFrame
df_reorganized = pd.DataFrame(reorganized_results)

# 查看表格的前几行
df_reorganized.head(21)

Unnamed: 0,Block ID,Layer Name,"Width 8 (Fisher, Perplexity)","Width 4 (Fisher, Perplexity)","Width 3 (Fisher, Perplexity)","Width 2 (Fisher, Perplexity)"
0,0,self_attn.q_proj,"(0.46, 2.74219)","(0.41, 2.74414)","(0.34, 2.74219)","(0.69, 2.74219)"
1,0,self_attn.k_proj,"(4.75, 2.74219)","(4.80, 2.74219)","(4.59, 2.74219)","(4.91, 2.74414)"
2,0,self_attn.v_proj,"(1.29, 2.74219)","(1.22, 2.74219)","(1.27, 2.74609)","(894.50, 2.85547)"
3,0,self_attn.o_proj,"(1.13, 2.74219)","(1.12, 2.74219)","(1.04, 2.74219)","(1.20, 2.74609)"
4,0,mlp.gate_proj,"(0.98, 2.74219)","(0.99, 2.74414)","(0.96, 2.74219)","(1.13, 2.74609)"
5,0,mlp.up_proj,"(1.24, 2.74219)","(1.25, 2.74219)","(1.22, 2.74414)","(1.24, 2.74414)"
6,0,mlp.down_proj,"(1.16, 2.74219)","(1.20, 2.74414)","(1.17, 2.74414)","(1.27, 2.74609)"
7,1,self_attn.q_proj,"(0.67, 2.74219)","(0.67, 2.74219)","(0.68, 2.74414)","(0.54, 2.73828)"
8,1,self_attn.k_proj,"(0.97, 2.74219)","(0.97, 2.74414)","(1.04, 2.74219)","(1.31, 2.74609)"
9,1,self_attn.v_proj,"(1.26, 2.74414)","(1.41, 2.74609)","(1.70, 2.74609)","(3.34, 2.74609)"
