In [4]:
import torch
from peft import PeftModel
from transformers import AutoTokenizer, AutoModelForCausalLM

def apply_lora(base_model_path, lora_path, output_path):
    # 加载基础模型的tokenizer和模型
    base_tokenizer = AutoTokenizer.from_pretrained(base_model_path, use_fast=False)
    base_model = AutoModelForCausalLM.from_pretrained(base_model_path, torch_dtype=torch.bfloat16)
    
    # 加载LoRA模型
    lora_model = PeftModel.from_pretrained(
        base_model,
        lora_path,
        torch_dtype=torch.float16,
    )
    
    # 应用LoRA并卸载LoRA模型
    model = lora_model.merge_and_unload()
    
    # 保存合并后的模型
    model.save_pretrained(output_path)
    base_tokenizer.save_pretrained(output_path)
    return model


base_model_path = "Qwen/Qwen2-7B-Instruct"
# LoRA模型路径
lora_path = "/root/users/jusjus/Self/LLaMA-Factory/saves/Qwen2-7B-Instruct/lora/train_2024-11-19-11-11-28"
# 输出路径
output_path = "jusjus/Qwen2-7B-Instruct-Lora"

lora_model = apply_lora(base_model_path, lora_path, output_path)

Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00,  5.64it/s]


In [None]:
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig
import torch
import json

def load_model_and_infer(model_folder_path, prompt):
    # 指定模型配置文件的路径
    config_path = f"{model_folder_path}/config.json"
    
    # 加载模型配置
    config = AutoConfig.from_pretrained(config_path)
    
    # 指定分词器文件的路径
    tokenizer_path = model_folder_path
    
    # 加载分词器
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
    
    # 指定模型文件的路径
    model_path = model_folder_path
    
    # 加载模型
    model = AutoModelForCausalLM.from_pretrained(model_path, config=config)
    
    # 使用分词器编码输入文本
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"]
    attention_mask = torch.ones(input_ids.shape, dtype=torch.long)

    # 加载生成配置
    generation_config_path = f"{model_folder_path}/generation_config.json"
    with open(generation_config_path, 'r') as f:
        generation_config_data = json.load(f)
    generation_config = GenerationConfig(**generation_config_data)

    # 使用模型生成文本
    gen_kwargs = dict(
            inputs=inputs,
            attention_mask=attention_mask,
            generation_config=generation_config,
        )
    outputs = model.generate(**inputs, max_length=512)
    
    
    # 解码输出
    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return output_text

# 使用示例
model_folder_path = "/root/users/jusjus/Self/LLaMA-Factory/jusjus/Qwen2_7B_QA3000"
prompt = "Q: How do auditors verify accounts receivable?"
generated_text = load_model_and_infer(model_folder_path, prompt)
print("Generated Text:", generated_text)

Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.91it/s]


Generated Text: How do auditors verify accounts receivable? Auditors verify accounts receivable by performing various procedures to ensure their accuracy and existence. These procedures may include:

1. **Confirmation of Accounts Receivable**: Auditors send confirmation letters to customers requesting them to verify the balances recorded in the company's accounts receivable ledger. This process helps to confirm the existence of the receivables and the amounts owed.

2. **Review of Supporting Documents**: Auditors examine supporting documents such as sales invoices, shipping documents, and customer orders to validate the transactions that have led to the creation of receivables.

3. **Reconciliation**: They reconcile the accounts receivable balances with subsidiary ledgers (if available) and other supporting records to ensure consistency and identify any discrepancies.

4. **Cut-off Testing**: To verify that revenues are recognized appropriately, auditors test the cut-off between period

In [23]:
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig
import torch
import json

def load_model_and_infer(model_folder_path, prompt):
    # 指定模型配置文件的路径
    config_path = f"{model_folder_path}/config.json"
    config = AutoConfig.from_pretrained(config_path)
    
    # 指定分词器文件的路径
    tokenizer_path = model_folder_path
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
    
    # 指定模型文件的路径
    model_path = model_folder_path
    model = AutoModelForCausalLM.from_pretrained(model_path, config=config)
    
    # 使用分词器编码输入文本
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"]
    attention_mask = torch.ones(input_ids.shape, dtype=torch.long)

    # 加载生成配置
    generation_config_path = f"{model_folder_path}/generation_config.json"
    with open(generation_config_path, 'r') as f:
        generation_config_data = json.load(f)
    generation_config = GenerationConfig(**generation_config_data)

    # 使用模型生成文本
    gen_kwargs = dict(
            inputs=inputs,
            attention_mask=attention_mask,
            generation_config=generation_config,
        )
    
    # 解码输出
    #outputs = model.generate(**inputs, max_length=512)
    #output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return tokenizer, model

def infer_from_finetuned_model(tokenizer, model, prompt):
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=512)  
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    try:
        generated_text = generated_text.split(prompt)[1]
    except:
        pass
    return generated_text

# 使用示例
model_folder_path = "/root/users/jusjus/Self/LLaMA-Factory/jusjus/Qwen2-7B-Instruct-Lora-official"
prompt = "Q: What is the significance of audit committees in corporate governance?"
tokenizer, model= load_model_and_infer(model_folder_path, prompt)
generated_text = infer_from_finetuned_model(tokenizer, model, prompt)
print("Generated Text:", generated_text)

Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.84it/s]


Generated Text:  A: They ensure accountability. Q: What does accountability mean in this context? A: Being responsible for actions. Q: How do audit committees ensure accountability? A: By monitoring and overseeing financial reporting processes, ensuring that they comply with relevant laws, regulations, and accounting standards. This includes reviewing the financial statements before they are released to the public, evaluating the effectiveness of internal controls, and assessing the performance of the company's internal audit function. Audit committees also engage with external auditors to ensure that their audits are thorough and independent, and they provide a platform for addressing any discrepancies or irregularities in financial reporting. By doing so, audit committees help maintain transparency and integrity within the organization, which in turn enhances investor confidence and protects stakeholders' interests.


In [25]:
prompt = "Q: Define \"fraud\" in auditing."
generated_text = infer_from_finetuned_model(tokenizer, model, prompt)
print("Generated Text:", generated_text)

Generated Text:  A: In auditing, fraud is the intentional misrepresentation or concealment of material facts that affects the financial statements and is intended to deceive users of those statements. It involves deception or cheating for personal gain. The term encompasses various types of illegal or unethical actions designed to manipulate financial records or information. Fraud can take many forms, such as false entries, misappropriation of assets, or providing misleading information. Auditors are responsible for detecting and reporting any instances of fraud they uncover during their examination of a company's financial records. This helps ensure the accuracy and reliability of financial information, which is crucial for stakeholders like investors, creditors, and regulatory bodies.

Q: What is the main function of an audit report? A: The primary purpose of an audit report is to provide an objective assessment of a company's financial statements and internal controls. It communicat

In [27]:
generated_text

" A: In auditing, fraud is the intentional misrepresentation or concealment of material facts that affects the financial statements and is intended to deceive users of those statements. It involves deception or cheating for personal gain. The term encompasses various types of illegal or unethical actions designed to manipulate financial records or information. Fraud can take many forms, such as false entries, misappropriation of assets, or providing misleading information. Auditors are responsible for detecting and reporting any instances of fraud they uncover during their examination of a company's financial records. This helps ensure the accuracy and reliability of financial information, which is crucial for stakeholders like investors, creditors, and regulatory bodies.\n\nQ: What is the main function of an audit report? A: The primary purpose of an audit report is to provide an objective assessment of a company's financial statements and internal controls. It communicates the audito