In [1]:
import os
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com' # set to hf-mirror if you cannot access huggingface.co
os.environ['HF_HOME'] = '/mnt/ssd1/zhanghanxiu/cache/huggingface'
os.environ["CUDA_VISIBLE_DEVICES"] = '2' # set to your GPU id
import warnings
warnings.filterwarnings("ignore")
import torch
torch.set_num_threads(4)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from fingerprint.utils import *

In [2]:
base_model_llama2 = "meta-llama/Llama-2-7b-hf"
base_model_qwen = "Qwen/Qwen2.5-7B"
finetune_model_list_qwen = ["Qwen/Qwen2.5-7B-Instruct", "Qwen/Qwen2.5-Math-7B","Qwen/Qwen2.5-Coder-7B","tablegpt/TableGPT2-7B","WangCa/Qwen2.5-7B-Medicine", "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2"]
finetune_model_list_llama2 = ["meta-llama/Llama-2-7b-chat-hf", "meta-llama/CodeLlama-7b-hf", "lmsys/vicuna-7b-v1.5", "WizardLMTeam/WizardMath-7B-V1.0", "EleutherAI/llemma_7b", "cxllin/Llama2-7b-Finance","LinkSoul/Chinese-Llama-2-7b"]
prune_model_list_llama2 = ["princeton-nlp/Sheared-LLaMA-2.7B", "nm-testing/SparseLlama-2-7b-pruned_50.2of4", "princeton-nlp/Sheared-LLaMA-1.3B-Pruned","princeton-nlp/Sheared-LLaMA-1.3B-ShareGPT","princeton-nlp/Sheared-LLaMA-2.7B-Pruned","princeton-nlp/Sheared-LLaMA-2.7B-ShareGPT","princeton-nlp/Sheared-LLaMA-1.3B","MBZUAI-LLM/GBLM-Pruner-LLaMA-2-7B",'/mnt/ssd1/zhanghanxiu/model/prune_wanda','/mnt/ssd1/zhanghanxiu/model/prune_sparsegpt']
unrelated_model_list_llama2 = ["mistralai/Mistral-7B-v0.3", "Qwen/Qwen1.5-7B", "baichuan-inc/Baichuan2-7B-Base", "internlm/internlm2_5-7b", "openai-community/gpt2-large","cerebras/Cerebras-GPT-1.3B","zai-org/chatglm2-6b", "facebook/opt-6.7b", "EleutherAI/pythia-6.9b", "mosaicml/mpt-7b"]
unrelated_model_list_qwen = ["mistralai/Mistral-7B-v0.3", "meta-llama/Llama-2-7b-hf", "baichuan-inc/Baichuan2-7B-Base", "internlm/internlm2_5-7b", "openai-community/gpt2-large","cerebras/Cerebras-GPT-1.3B","zai-org/chatglm2-6b", "facebook/opt-6.7b", "EleutherAI/pythia-6.9b", "mosaicml/mpt-7b"]

In [3]:
base_model_list_ds = ["Qwen/Qwen2.5-Math-1.5B","Qwen/Qwen2.5-Math-7B","meta-llama/Llama-3.1-8B"]
finetune_model_list_ds = ["deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B","deepseek-ai/DeepSeek-R1-Distill-Qwen-7B","deepseek-ai/DeepSeek-R1-Distill-Llama-8B"]

In [None]:
def print_distances(compare_func):

    print('--------------Qwen2.5-7B finetune--------------')
    for model_path in finetune_model_list_qwen:
        dist = compare_func(base_model_qwen, model_path)
        print(f"{model_path:60} {dist:>10.8f}")
    print('--------------Qwen2.5-7B unrelated--------------')
    for model_path in unrelated_model_list_qwen:
        dist = compare_func(base_model_qwen, model_path)
        print(f"{model_path:60} {dist:>10.8f}")
    print('--------------llama-2-7b unrelated--------------')
    for model_path in unrelated_model_list_llama2:
        dist = compare_func(base_model_llama2, model_path)
        print(f"{model_path:60} {dist:>10.8f}")
    print('--------------llama-2-7b finetune--------------')
    for model_path in finetune_model_list_llama2:
        dist = compare_func(base_model_llama2, model_path)
        print(f"{model_path:60} {dist:>10.8f}")
    print('-------------llama-2-7b pruned--------------')
    for model_path in prune_model_list_llama2:
        dist = compare_func(base_model_llama2, model_path)
        print(f"{model_path:60} {dist:>10.8f}")
    from itertools import combinations
    print('--------------pairwise distances for unrelated models--------------')
    for a, b in combinations(unrelated_model_list_llama2, 2):
        dist = compare_func(a, b)
        print(f"{a:40} <-> {b:40} {dist:>10.8f}")
    print('--------------deepseek models--------------')
    for i in range(3):
        dist = compare_func(base_model_list_ds[i], finetune_model_list_ds[i])
        print(f"{finetune_model_list_ds[i]:60} {dist:>10.8f}")

In [5]:
qk_ev_filename = "fingerprint/fp_data/qk_ev.csv"
qk_sv_filename = "fingerprint/fp_data/qk_sv.csv"
vo_ev_filename = "fingerprint/fp_data/vo_ev.csv"
vo_sv_filename = "fingerprint/fp_data/vo_sv.csv"
file_list = [qk_ev_filename, qk_sv_filename, vo_ev_filename, vo_sv_filename]
k = 256
m = 8

In [6]:
compare_func = lambda model_path_1, model_path_2: \
fp_new_dist_from_path(model_path_1=model_path_1, model_path_2=model_path_2, file_list=file_list, k=k, m=m)
print_distances(compare_func)

--------------Qwen2.5-7B finetune--------------
Qwen/Qwen2.5-7B-Instruct                                     0.02982667
Qwen/Qwen2.5-Math-7B                                         0.46922469
Qwen/Qwen2.5-Coder-7B                                        0.49459100
tablegpt/TableGPT2-7B                                        0.05275063
WangCa/Qwen2.5-7B-Medicine                                   0.03282635
huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2                 0.03072947
--------------Qwen2.5-7B unrelated--------------
mistralai/Mistral-7B-v0.3                                    1.30501926
meta-llama/Llama-2-7b-hf                                     1.51121461
baichuan-inc/Baichuan2-7B-Base                               2.07678580
internlm/internlm2_5-7b                                      1.57167721
openai-community/gpt2-large                                  1.61430013
cerebras/Cerebras-GPT-1.3B                                   2.00055289
zai-org/chatglm2-6b                    

In [9]:
compare_func = lambda model_path: \
fp_new_sim_from_path(model_path=model_path, simnet_path ='simnet/model/simnet_llama2.pth', file_list=file_list)

print('--------------llama-2-7b unrelated--------------')
for model_path in unrelated_model_list_llama2:
    dist = compare_func(model_path)
    print(f"{model_path:60} {dist:>10.8f}")
print('--------------llama-2-7b finetune--------------')
for model_path in finetune_model_list_llama2:
    dist = compare_func(model_path)
    print(f"{model_path:60} {dist:>10.8f}")
print('-------------llama-2-7b pruned--------------')
for model_path in prune_model_list_llama2:
    dist = compare_func(model_path)
    print(f"{model_path:60} {dist:>10.8f}")

--------------llama-2-7b unrelated--------------
mistralai/Mistral-7B-v0.3                                    0.00503707
Qwen/Qwen1.5-7B                                              0.29016349
baichuan-inc/Baichuan2-7B-Base                               0.18620875
internlm/internlm2_5-7b                                      0.00503607
openai-community/gpt2-large                                  0.23723441
cerebras/Cerebras-GPT-1.3B                                   0.28735927
zai-org/chatglm2-6b                                          0.00572342
facebook/opt-6.7b                                            0.00091707
EleutherAI/pythia-6.9b                                       0.00017777
mosaicml/mpt-7b                                              0.00503128
--------------llama-2-7b finetune--------------
meta-llama/Llama-2-7b-chat-hf                                0.99500012
meta-llama/CodeLlama-7b-hf                                   0.96413082
lmsys/vicuna-7b-v1.5                   

In [10]:
compare_func = lambda model_path: \
fp_new_sim_from_path(model_path=model_path, simnet_path ='simnet/model/simnet_qwen2.5.pth', file_list=file_list)

print('--------------qwen-2.5-7b unrelated--------------')
for model_path in unrelated_model_list_qwen:
    dist = compare_func(model_path)
    print(f"{model_path:60} {dist:>10.8f}")
print('--------------qwen-2.5-7b finetune--------------')
for model_path in finetune_model_list_qwen:
    dist = compare_func(model_path)
    print(f"{model_path:60} {dist:>10.8f}")

--------------qwen-2.5-7b unrelated--------------
mistralai/Mistral-7B-v0.3                                    0.00500988
meta-llama/Llama-2-7b-hf                                     0.00199948
baichuan-inc/Baichuan2-7B-Base                               0.00093871
internlm/internlm2_5-7b                                      0.01386367
openai-community/gpt2-large                                  0.00499727
cerebras/Cerebras-GPT-1.3B                                   0.00110746
zai-org/chatglm2-6b                                          0.00905215
facebook/opt-6.7b                                            0.00045265
EleutherAI/pythia-6.9b                                       0.01550702
mosaicml/mpt-7b                                              0.00500852
--------------qwen-2.5-7b finetune--------------
Qwen/Qwen2.5-7B-Instruct                                     0.99498498
Qwen/Qwen2.5-Math-7B                                         0.99786770
Qwen/Qwen2.5-Coder-7B                