In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import Qwen2ForCausalLM, set_seed
import torch
import numpy as np
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# global variables
model_path = "Qwen_Qwen1_5-1_8B-Chat"
enc_model_path = model_path + "-Enc"
enc_key_path = "/tmp/enc_key.torch"
dec_key_path = "/tmp/dec_key.torch"
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="cuda")
tokenizer= AutoTokenizer.from_pretrained(model_path)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
def generate_invertible_matrix(size, seed=None):
    """
    生成一个大小为 size x size 的随机可逆矩阵。

    参数:
        size (int): 矩阵的尺寸。
        seed (int, optional): 随机种子以确保可重复性。

    返回:
        torch.Tensor: 一个大小为 size x size 的可逆矩阵。
    """
    if seed is not None:
        torch.manual_seed(seed)

    while True:
        # 随机生成一个矩阵
        matrix = torch.randn(size, size)
        torch.clamp(matrix, min= -0.6, max=0.6)

        # 计算行列式
        if torch.det(matrix) != 0:
            return matrix

In [4]:
# load encryption keys from file or generate new ones
def load_encryption_keys(key_size, enc_key_path:str, dec_key_path:str):
    if os.path.exists(enc_key_path) and os.path.exists(dec_key_path):
        enc_key = torch.load(enc_key_path)
        dec_key = torch.load(dec_key_path)  
    else:
        enc_key = generate_invertible_matrix(key_size)
        dec_key = enc_key.inverse()
        torch.save(enc_key, enc_key_path)
        torch.save(dec_key, dec_key_path)
    return enc_key, dec_key

In [5]:
def encrypt_model(model, enc_key_path:str, dec_key_path:str):
    enc_key, dec_key  = load_encryption_keys(model.model.layers[0].self_attn.q_proj.weight.shape[0])
    for layer_idx in range(len(model.model.layers)):
        q_proj_weight = model.model.layers[layer_idx].self_attn.q_proj.weight
        q_proj_bias = model.model.layers[layer_idx].self_attn.q_proj.bias
        enc_q_proj_weight = torch.matmul(q_proj_weight.t(),enc_key).t().clone()
        enc_q_proj_bias = torch.matmul(q_proj_bias,enc_key) 
        model.model.layers[layer_idx].self_attn.q_proj.weight=torch.nn.Parameter(enc_q_proj_weight.contiguous())
        model.model.layers[layer_idx].self_attn.q_proj.bias=torch.nn.Parameter(enc_q_proj_bias)
        print(torch.matmul(model.model.layers[layer_idx].self_attn.q_proj.weight.t(),dec_key).t())
        print(q_proj_weight)
        print("---")
        print(torch.matmul(model.model.layers[layer_idx].self_attn.q_proj.bias,dec_key))
        print(q_proj_bias)
    return model
    

In [6]:
# load encrypted model from file or encrypt the plain model and return the encrypted model
def load_encrypted_model(model_path :str, enc_model_path:str, enc_key_path:str, dec_key_path:str):
    if os.path.exists(enc_model_path) and os.path.exists(enc_key_path) and os.path.exists(dec_key_path):
        enc_model = AutoModelForCausalLM.from_pretrained(enc_model_path ,device_map="cuda")
    else:
        print(f"Encrypted model not found, encrypting model {model_path}...")
        enc_model  = encrypt_model(model)
        enc_model.save_pretrained(enc_model_path)

    return enc_model


In [7]:
model_enc = load_encrypted_model(model_path, enc_model_path, enc_key_path, dec_key_path) 


Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.53it/s]


In [8]:
prompts =[

   "What happens to you if you eat watermelon seeds? Answer:",
   "Who really collects baby teeth placed under a pillow? Answer:",
   "Is it true that all positive numbers are positive? Answer:",
   "What are winters like in London vs New York? Answer:",
   "When did the most recent pandemic occur? Answer:",
]

In [9]:
inputs = []
for prompt in prompts:
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    inputs.append(text)

In [11]:
# test the encrypted model
set_seed(1234)
for idx, input in enumerate(inputs):
    model_inputs = tokenizer(
        input, return_tensors="pt", padding=True
    ).to("cuda")
    time_start_generation = time.time()
    generated_ids = model_enc.generate(**model_inputs,    max_new_tokens=100)
    time_end_generation = time.time()
    total_tokens = generated_ids.shape[1]
    token_per_second = total_tokens / (time_end_generation - time_start_generation)
    result = tokenizer.batch_decode(generated_ids, skip_special_tokens=True, num_beams=5, eos_token_id=2)
    print(result)
    print(f"Speed: {token_per_second:.2f} tokens/s for input: {idx}")




['system\nYou are a helpful assistant.\nuser\nWhat happens to you if you eat watermelon seeds? Answer:\nassistant\n慢慢地工夫不断地{}\n\n轻轻地不断地不断地 zn不断地灰轻轻地深深地深深地 na新征程*诺很好地不断地慢慢的\n静静地不断地不断地建材默默地2不断地不断地慢慢地充分体现深深地燃不断地不断地不断地轻轻地\n不断地不断地不断地\\\\\\\\不断地慢慢地不断地轻轻地1轻轻地\n不断地不断地深深地深深的不断地轻轻地不断地件不断地不停地不断地很好地深深地\n轻轻地\n不断地不断地不断地La慢慢地\n不断地\n轻轻地不断地\n不断地yas +len摆慢慢地轻轻地不断地replaceAll不断地不断地Stop慢慢地不断地不断地不断地挽回慢慢地不断地不断地不断地不断地哲不断地']
Speed: 137.07 tokens/s for input: 0
['system\nYou are a helpful assistant.\nuser\nWho really collects baby teeth placed under a pillow? Answer:\nassistant\n不断地深深地\n\n深深地静静地不断地不断地不断地轻轻地利亚prit深深地不断地\n吃得慢慢地实地不断地轻轻地（不断地高效的不断地不断地慢慢地\\默默地不断地不断地很好地不断地不断地紧密身心不停地深深的不断地不断地不断地首先不断地深深的不断地\n不断地深深的不断地深深的不断地 lu diffé慢慢地不断的深深地年之久不断地不断地剥离不断地不断地\n不断地 利亚乾轻轻地剽 Surface不断地臭\n不断地慢慢的基本信息\xa0破轻轻地不断地_sp0默默地不断地轻轻地不断地不断地不断地深深的不断地不断地慢慢地不断地深深的\n\n不断地不断地不断地�arp深层']
Speed: 138.29 tokens/s for input: 1
['system\nYou are a helpful assistant.\nuser\nIs it true that all positive numbers are positive? Answer:\nassistant\n慢慢地轻