注意，PeftModel只支持相同adapter的模型，而PeftMixedModel支持不同adapter的模型。

PeftModel一次只能激活一个adapter，而PeftMixedModel可以激活多个adapter。

In [1]:
from peft.tuners import MixedModel

base_model = 'Qwen/Qwen2-7B-Instruct'  # load the base model, e.g. from transformers

In [2]:
# load base model
from transformers import AutoModel
model = AutoModel.from_pretrained(base_model)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [3]:
# load the model with PEFT
from peft import PeftMixedModel, LoraConfig

rank = 4
LoRA_amount = 3

peft_config = LoraConfig(
    inference_mode=False,
    r=rank,
    lora_alpha=32,
    lora_dropout=0.1
)
model = PeftMixedModel(model, peft_config, adapter_name="0")
for LoRA_index in range(1, LoRA_amount):
    model.add_adapter(str(LoRA_index), peft_config)

'NoneType' object has no attribute 'cadam32bit_grad_fp32'


  warn("The installed version of bitsandbytes was compiled without GPU support. "


In [4]:
model

PeftMixedModel(
  (base_model): MixedModel(
    (model): Qwen2Model(
      (embed_tokens): Embedding(152064, 3584)
      (layers): ModuleList(
        (0-27): 28 x Qwen2DecoderLayer(
          (self_attn): Qwen2SdpaAttention(
            (q_proj): lora.Linear(
              (base_layer): Linear(in_features=3584, out_features=3584, bias=True)
              (lora_dropout): ModuleDict(
                (0): Dropout(p=0.1, inplace=False)
                (1): Dropout(p=0.1, inplace=False)
                (2): Dropout(p=0.1, inplace=False)
              )
              (lora_A): ModuleDict(
                (0): Linear(in_features=3584, out_features=4, bias=False)
                (1): Linear(in_features=3584, out_features=4, bias=False)
                (2): Linear(in_features=3584, out_features=4, bias=False)
              )
              (lora_B): ModuleDict(
                (0): Linear(in_features=4, out_features=3584, bias=False)
                (1): Linear(in_features=4, out_features=3584,

In [5]:
model.set_adapter(['0', '1', '2'])

In [6]:
model.active_adapters

['0', '1', '2']

In [7]:
# 保存adapter的参数
save_dir = "output/lora/"
model.save_pretrained(save_directory=save_dir, selected_adapters='0')
model.save_pretrained(save_directory=save_dir, selected_adapters='1')
model.save_pretrained(save_directory=save_dir, selected_adapters='2')

  ptr = (id(tensor.storage()), tensor.storage_offset(), tensor.size())


Adapters 0 saved to output/lora/
Adapters 1 saved to output/lora/
Adapters 2 saved to output/lora/


In [3]:
import torch
from transformers import AutoModel
from peft import PeftConfig, PeftMixedModel
from safetensors import safe_open

# 基础模型
base_model_name = "Qwen/Qwen2-7B-Instruct"
base_model = AutoModel.from_pretrained(base_model_name, torch_dtype=torch.float16)

# 假设你的adapter目录如下：
# output/lora/0/
#   adapter_config.json
#   adapter_model.safetensors
# output/lora/1/
#   adapter_config.json
#   adapter_model.safetensors
# output/lora/2/
#   adapter_config.json
#   adapter_model.safetensors

# 初始要加载0号adapter
adapter_dir_0 = "output/lora/0"
adapter_name_to_load = "0"

# 后续要添加1和2号adapter
adapter_dir_others = ["output/lora/1", "output/lora/2"]
adapter_names_to_add = ["1", "2"]

# 1. 加载0号adapter的配置
adapter_config_0 = PeftConfig.from_pretrained(adapter_dir_0)

# 2. 使用0号adapter配置构建PeftMixedModel
mixed_model = PeftMixedModel(base_model, adapter_config_0, adapter_name=adapter_name_to_load)

# 3. 加载0号adapter的权重
adapter_weights_path_0 = f"{adapter_dir_0}/adapter_model.safetensors"
adapter_state_dict_0 = {}
with safe_open(adapter_weights_path_0, framework="pt", device="cpu") as f:
    for key in f.keys():
        adapter_state_dict_0[key] = f.get_tensor(key)

mixed_model.load_state_dict(adapter_state_dict_0, strict=False)
mixed_model.set_adapter(adapter_name_to_load)

print(f"Adapter {adapter_name_to_load} loaded successfully.")

# 4. 依次添加并加载其他adapter (1号和2号)
for adapter_path, adapter_name in zip(adapter_dir_others, adapter_names_to_add):
    # 加载adapter配置
    adapter_config_i = PeftConfig.from_pretrained(adapter_path)
    # 添加adapter结构
    mixed_model.add_adapter(adapter_name, adapter_config_i)
    # 加载该adapter的权重
    adapter_weights_path_i = f"{adapter_path}/adapter_model.safetensors"
    adapter_state_dict_i = {}
    with safe_open(adapter_weights_path_i, framework="pt", device="cpu") as f:
        for key in f.keys():
            adapter_state_dict_i[key] = f.get_tensor(key)
    mixed_model.load_state_dict(adapter_state_dict_i, strict=False)
    print(f"Adapter {adapter_name} loaded successfully.")

# 5. 设置所有adapter为active
mixed_model.set_adapter([adapter_name_to_load] + adapter_names_to_add)
print("All adapters are set and ready to use.")


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Adapter 0 loaded successfully.
Adapter 1 loaded successfully.
Adapter 2 loaded successfully.
All adapters are set and ready to use.


In [6]:
mixed_model.set_adapter(['0', '1', '2'])

In [7]:
mixed_model.active_adapters

['0', '1', '2']