In [1]:
from datasets import load_dataset, load_from_disk
import os
from pathlib import Path
from typing import Dict
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    AutoModelForSequenceClassification,
    DataCollatorForSeq2Seq,
    DataCollatorWithPadding,
    TrainingArguments,
    Seq2SeqTrainingArguments,
    Trainer,
    Seq2SeqTrainer,
    PreTrainedTokenizerFast,
)
from functools import partial
from transformers.models.qwen2.tokenization_qwen2_fast import Qwen2TokenizerFast
from transformers.models.qwen2.modeling_qwen2 import Qwen2ForCausalLM, Qwen2ForSequenceClassification
from peft import PrefixTuningConfig, get_peft_model, TaskType
from peft.peft_model import PeftModelForCausalLM, PeftModelForSequenceClassification
from rouge_score import rouge_scorer
import numpy as np
from transformers import GenerationConfig
import torch

In [2]:
torch.cuda.empty_cache()

In [None]:
cache_dir = '/root/autodl-tmp'
# cache_dir = Path.home()
base_model_id = "Qwen/Qwen2.5-3B-Instruct"
base_model_path = str(
        Path(cache_dir, ".cache/modelscope/hub", base_model_id)
    )

sft_model_path = "/root/llm_adv_qa/resources/sft_models/keyword/best"

base_model_path, sft_model_path


('/root/autodl-tmp/.cache/modelscope/hub/Qwen/Qwen2.5-3B-Instruct',
 '/root/llm_adv_qa/resources/sft_models/keyword/best')

In [4]:
tokenizer = AutoTokenizer.from_pretrained(base_model_path)
type(tokenizer)

transformers.models.qwen2.tokenization_qwen2_fast.Qwen2TokenizerFast

In [5]:
base_model = AutoModelForCausalLM.from_pretrained(base_model_path)
type(base_model)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

transformers.models.qwen2.modeling_qwen2.Qwen2ForCausalLM

In [6]:
prompt_template = """
        请帮我从以下句子中提取关键词。这些关键词是句子中最重要、最能概括句子主题的词汇。通过这些关键词，你可以更好地理解句子的内容。
        你只需要回答文本中的关键词,不要回答其他内容. 用户输入： "{question}""
        """

questions = [
        "无形资产是什么?",
        "什么是归属于母公司所有者的综合收益总额？",
        "航发动力2019年的非流动负债比率保留两位小数是多少？",
        "在上海注册的所有上市公司中，2021年货币资金最高的前3家上市公司为？金额为？",
    ]

In [8]:
sft_model = PeftModelForCausalLM.from_pretrained(base_model, sft_model_path)

In [9]:
for q in questions:
    prompt = prompt_template.format(question=f"{q}")
    messages = [{"role": "user", "content": prompt}]
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    # print(text)
    inputs = tokenizer(text, return_tensors="pt")
    with torch.no_grad():
        generated_ids = sft_model.generate(inputs.input_ids, max_new_tokens=64)
    generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, generated_ids)]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    print(response)
    print("---")

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


无形资产
---
归属于母公司所有者的综合收益总额
---
非流动负债比率
---
货币资金最高
---
