In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# 加载预训练的 GPT-2 模型和分词器
tokenizer = AutoTokenizer.from_pretrained('gpt2')
model = AutoModelForCausalLM.from_pretrained('gpt2')

print(model)

  from .autonotebook import tqdm as notebook_tqdm


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)


In [2]:
# 使用 peft 库，轻松的将 LoRA 集成到模型中：
from peft import get_peft_model, LoraConfig, TaskType

# 配置 LoRA
lora_config = LoraConfig(
    task_type = TaskType.CAUSAL_LM,
    inference_mode = False,
    r = 8,
    lora_alpha = 32,
    lora_dropout=0.1,
)

# 将 LoRA 应用到模型中
model = get_peft_model(model, lora_config)

'NoneType' object has no attribute 'cadam32bit_grad_fp32'


  warn("The installed version of bitsandbytes was compiled without GPU support. "


In [3]:
# 查看当前模型架构
print(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GPT2LMHeadModel(
      (transformer): GPT2Model(
        (wte): Embedding(50257, 768)
        (wpe): Embedding(1024, 768)
        (drop): Dropout(p=0.1, inplace=False)
        (h): ModuleList(
          (0-11): 12 x GPT2Block(
            (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (attn): GPT2Attention(
              (c_attn): lora.Linear(
                (base_layer): Conv1D(nf=2304, nx=768)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=768, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=2304, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
           

In [4]:
# 查看增加的参数量
model.print_trainable_parameters()

trainable params: 294,912 || all params: 124,734,720 || trainable%: 0.2364


In [5]:
# 自定义函数查看参数
def print_trainable_parameters(model):
    trainable_params = 0
    all_params = 0
    for _, param in model.named_parameters():
        num_params = param.numel()
        all_params += num_params
        if param.requires_grad:
            trainable_params += num_params
    print(f"可训练参数量: {trainable_params}")
    print(f"总参数量: {all_params}")
    print(f"可训练参数占比: {100 * trainable_params / all_params:.2f}%")
    
print_trainable_parameters(model)    

可训练参数量: 294912
总参数量: 124734720
可训练参数占比: 0.24%


In [6]:
# 准备数据集并进行微调
from transformers import Trainer, TrainingArguments

# 定义训练参数
training_args = TrainingArguments(
    output_dir='./results',         # 模型保存和日志输出的目录路径
    num_train_epochs=3,             # 训练的总轮数（epochs）
    per_device_train_batch_size=16, # 每个设备（如GPU或CPU）上的训练批次大小，16表示每次输入模型的数据数量
    learning_rate=5e-5,             # 学习率
    logging_steps=10,               # 每隔多少步（steps）进行一次日志记录
    save_steps=100,                 # 每隔多少步保存模型
)

# 创建 Trainer
trainer = Trainer(
    model=model,                    # 训练的模型对象，需要事先加载好
    args=training_args,             # 上面定义的训练参数配置
    train_dataset=train_dataset,    # 需要对应替换成已经处理过的dataset
)

# 开始训练
trainer.train()

NameError: name 'train_dataset' is not defined

In [None]:
# 保存和加载 LoRA 微调的模型
# 训练完成后，可以保存或者加载 LoRA 微调的参数
model.save_pretrained("./lora_model")

# 在推理时，加载原始的预训练模型和 LoRA 参数
# 加载原始模型
base_model = AutoModelForCausalLM.from_pretrained('gpt2')

# 加载 LoRA 参数
from peft import PeftModel

model = PeftModel.from_pretrained(base_model, './lora_model')

SyntaxError: invalid syntax (1417036531.py, line 3)