## 微调模型之后的测试推理阶段
1. 使用构造的数据集微调llama3-8b-instruct
2. 可以进行单句的推理测试
3. 模型和lora参数保存在jupyterlab中

In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

In [4]:
import torch
import json
from transformers import (
    AutoConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    GenerationConfig
)
from peft import PeftModel
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_path = 'model/models--meta-llama--Meta-Llama-3-8B/snapshots/62bd457b6fe961a42a631306577e622c83876cb6'
lora_path = 'lora/checkpoint-11630'
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

ModuleNotFoundError: No module named 'torch'

In [4]:
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    config=config,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
)
model = PeftModel.from_pretrained(
    model,
    lora_path,
)
model.eval()

Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00,  6.36it/s]


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): Linear(in

In [5]:
def infer(text_string):
    system_prompt = '<<SYS>>\n 你是一个乐于助人的助手。\n<</SYS>>\n\n'
    input_data = {
        "instruction": "你是一个安全事故领域专家。请根据input的问题作出回答。",
        "input": text_string
    }
    sintruct = json.dumps(input_data, ensure_ascii=False)
    sintruct = '[INST]' + system_prompt + sintruct + '[/INST]\n'
    # sintruct = '<reserved_106>' + system_prompt + sintruct + '<reserved_107>'

    input_ids = tokenizer.encode(
        sintruct,
        return_tensors="pt",
    ).to(device)
    input_length = input_ids.size(1)
    print("Token数量:", input_length)

    with torch.no_grad():
        generation_output = model.generate(
            input_ids=input_ids,
            generation_config=GenerationConfig(
                max_length=2048,
                max_new_tokens=512,
                return_dict_in_generate=True,
                # do_sample=True,
                # temperature=1
            ),
            pad_token_id=tokenizer.eos_token_id)

    generation_output = generation_output.sequences[0]
    generation_output = generation_output[input_length:]
    output = tokenizer.decode(generation_output, skip_special_tokens=True)
    print(output)
    return output

In [8]:
# infer("在某次海上搜救任务中，搜救中心收到了一条紧急信息，一艘载有200人的客轮在远洋航行中遇到了暴风雨，导致船体严重受损，水进入船舱，船上的人员处于极度危险之中。客轮目前已失去动力，随时可能沉没。考虑到此次海上突发事件对人命安全的严重威胁，搜救中心需要判断并上报此事件为最高级别。此次事件应归类为哪个险情等级？")
infer("Please answer what is one plus one?")

Token数量: 56



''