In [1]:
import os
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com' # set to hf-mirror if you cannot access huggingface.co
os.environ['HF_HOME'] = '/mnt/ssd1/zhanghanxiu/cache/huggingface'
os.environ['HF_HUB_OFFLINE'] = '1' # set to '1' to enable offline mode
os.environ["CUDA_VISIBLE_DEVICES"] = '3' # set to your GPU id
import warnings
warnings.filterwarnings("ignore")
import torch
torch.set_num_threads(4)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from utils import *

In [2]:
base_model_llama2 = "meta-llama/Llama-2-7b-hf"
base_model_qwen = "Qwen/Qwen2.5-7B"
finetune_model_list_qwen = ["Qwen/Qwen2.5-7B-Instruct", "Qwen/Qwen2.5-Math-7B","Qwen/Qwen2.5-Coder-7B","tablegpt/TableGPT2-7B","WangCa/Qwen2.5-7B-Medicine", "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2"]
finetune_model_list_llama2 = ["meta-llama/Llama-2-7b-chat-hf", "meta-llama/CodeLlama-7b-hf", "lmsys/vicuna-7b-v1.5", "WizardLMTeam/WizardMath-7B-V1.0", "EleutherAI/llemma_7b", "cxllin/Llama2-7b-Finance","LinkSoul/Chinese-Llama-2-7b"]
prune_model_list_llama2 = ["princeton-nlp/Sheared-LLaMA-2.7B", "nm-testing/SparseLlama-2-7b-pruned_50.2of4", "princeton-nlp/Sheared-LLaMA-1.3B-Pruned","princeton-nlp/Sheared-LLaMA-1.3B-ShareGPT","princeton-nlp/Sheared-LLaMA-2.7B-Pruned","princeton-nlp/Sheared-LLaMA-2.7B-ShareGPT","princeton-nlp/Sheared-LLaMA-1.3B","MBZUAI-LLM/GBLM-Pruner-LLaMA-2-7B",'/mnt/ssd1/zhanghanxiu/model/prune_wanda','/mnt/ssd1/zhanghanxiu/model/prune_sparsegpt']
unrelated_model_list_llama2 = ["mistralai/Mistral-7B-v0.3", "Qwen/Qwen1.5-7B", "baichuan-inc/Baichuan2-7B-Base", "internlm/internlm2_5-7b", "openai-community/gpt2-large","cerebras/Cerebras-GPT-1.3B","zai-org/chatglm2-6b", "facebook/opt-6.7b", "EleutherAI/pythia-6.9b", "mosaicml/mpt-7b"]
unrelated_model_list_qwen = ["mistralai/Mistral-7B-v0.3", "meta-llama/Llama-2-7b-hf", "baichuan-inc/Baichuan2-7B-Base", "internlm/internlm2_5-7b", "openai-community/gpt2-large","cerebras/Cerebras-GPT-1.3B","zai-org/chatglm2-6b", "facebook/opt-6.7b", "EleutherAI/pythia-6.9b", "mosaicml/mpt-7b"]

In [3]:
base_model_list_ds = ["Qwen/Qwen2.5-Math-1.5B","Qwen/Qwen2.5-Math-7B","meta-llama/Llama-3.1-8B"]
finetune_model_list_ds = ["deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B","deepseek-ai/DeepSeek-R1-Distill-Qwen-7B","deepseek-ai/DeepSeek-R1-Distill-Llama-8B"]

In [4]:
def print_distances(compare_func):

    print('--------------Qwen2.5-7B finetune--------------')
    for model_path in finetune_model_list_qwen:
        dist = compare_func(base_model_qwen, model_path)
        print(f"{model_path:60} {dist:>10.8f}")
    print('--------------Qwen2.5-7B unrelated--------------')
    for model_path in unrelated_model_list_qwen:
        dist = compare_func(base_model_qwen, model_path)
        print(f"{model_path:60} {dist:>10.8f}")
    print('--------------llama-2-7b unrelated--------------')
    for model_path in unrelated_model_list_llama2:
        dist = compare_func(base_model_llama2, model_path)
        print(f"{model_path:60} {dist:>10.8f}")
    print('--------------llama-2-7b finetune--------------')
    for model_path in finetune_model_list_llama2:
        dist = compare_func(base_model_llama2, model_path)
        print(f"{model_path:60} {dist:>10.8f}")
    print('-------------llama-2-7b pruned--------------')
    for model_path in prune_model_list_llama2:
        dist = compare_func(base_model_llama2, model_path)
        print(f"{model_path:60} {dist:>10.8f}")
    from itertools import combinations
    print('--------------pairwise distances for unrelated models--------------')
    for a, b in combinations(unrelated_model_list_llama2, 2):
        dist = compare_func(a, b)
        print(f"{a:40} <-> {b:40} {dist:>10.8f}")
    print('--------------deepseek models--------------')
    for i in range(3):
        dist = compare_func(base_model_list_ds[i], finetune_model_list_ds[i])
        print(f"{finetune_model_list_ds[i]:60} {dist:>10.8f}")

In [5]:
compare_func = lambda model_path_1, model_path_2: \
fp_dist_from_path(model_path_1=model_path_1, model_path_2=model_path_2, file_list=file_list, k=k, m=m)
print_distances(compare_func)

--------------Qwen2.5-7B finetune--------------
Qwen/Qwen2.5-7B-Instruct                                     0.63304102
Qwen/Qwen2.5-Math-7B                                         13.21428299
Qwen/Qwen2.5-Coder-7B                                        20.97656250
tablegpt/TableGPT2-7B                                        1.31297255
WangCa/Qwen2.5-7B-Medicine                                   0.68328893
huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2                 0.63304102
--------------Qwen2.5-7B unrelated--------------
mistralai/Mistral-7B-v0.3                                    44.92657852
meta-llama/Llama-2-7b-hf                                     57.42338943
baichuan-inc/Baichuan2-7B-Base                               70.00093079
internlm/internlm2_5-7b                                      55.70484924
openai-community/gpt2-large                                  66.68096924
cerebras/Cerebras-GPT-1.3B                                   88.02114105
zai-org/chatglm2-6b            