In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

In [1]:
import torch
from modelscope import snapshot_download, AutoModel, AutoTokenizer
import os

model_dir = snapshot_download('LLM-Research/Meta-Llama-3-8B-Instruct', cache_dir='/root/Logic-Bind-NL/model', revision='master')

2025-01-03 07:44:08,157 - modelscope - INFO - PyTorch version 2.5.1 Found.
2025-01-03 07:44:08,161 - modelscope - INFO - Loading ast index from /root/.cache/modelscope/ast_indexer
2025-01-03 07:44:08,304 - modelscope - INFO - Loading done! Current index file version is 1.9.5, with md5 3aab30afff949a3aeab78cecb6a273f9 and a total number of 945 components indexed
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from datasets import Dataset
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForSeq2Seq, TrainingArguments, Trainer, GenerationConfig

In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained('/root/Logic-Bind-NL/model/LLM-Research/Meta-Llama-3-8B-Instruct', use_fast=False, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token, tokenizer.pad_token_id, tokenizer.eos_token_id

('<|eot_id|>', 128009, 128009)

## 数据处理

In [4]:
def process_func(example):
    MAX_LENGTH = 512    # Llama分词器会将一个中文字切分为多个token，因此需要放开一些最大长度，保证数据的完整性
    input_ids, attention_mask, labels = [], [], []
    instruction = tokenizer(f"<|start_header_id|>user<|end_header_id|>\n\n{example['instruction'] + example['input']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", add_special_tokens=False)
    response = tokenizer(f"{example['output']}<|eot_id|>", add_special_tokens=False)
    input_ids = instruction["input_ids"] + response["input_ids"] + [tokenizer.pad_token_id]
    attention_mask = instruction["attention_mask"] + response["attention_mask"] + [1]  # 因为eos token也需要关注，所以补充为1
    labels = [-100] * len(instruction["input_ids"]) + response["input_ids"] + [tokenizer.pad_token_id]
    if len(input_ids) > MAX_LENGTH:  # 如果数据超过最大长度限制则做截断
        input_ids = input_ids[:MAX_LENGTH]
        attention_mask = attention_mask[:MAX_LENGTH]
        labels = labels[:MAX_LENGTH]
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }

In [5]:
# 将JSON文件转换为CSV文件
df = pd.read_json('../ft_data/train_data_v3.json')
ds = Dataset.from_pandas(df)
ds[0]

{'instruction': '如果长江流域出现汛情、旱情及防汛抗旱动态，这些信息应该由哪个机构进行审核并发布？',
 'output': '国家防总将对这些信息进行审核，并以统一发布的形式发布信息。',
 'input': '信息发布：防汛抗旱的信息发布应当及时、准确、客观、全面。汛情、旱情及防汛抗旱动态等，由国家防总统一审核和发布；涉及水旱灾情的，由国家防办会同民政部审核和发布。信息发布形式主要包括投权发布、散发新闻稿、组织报道、接受记者采访、举行新闻发布会等。地方信息发布：重点汛区、灾区和发生局部汛情的地方，其汛情、旱情及防汛抗旱动态等信息，由各地防汛抗旱指挥机构审核和发布；涉及水旱灾情的，由各地防汛指挥部办公室会同民政部门审核和发布。'}

In [6]:
tokenized_id = ds.map(process_func, remove_columns=ds.column_names)

                                                                 

In [10]:
print(tokenizer.decode(tokenized_id[0]['input_ids']))

<|start_header_id|>user<|end_header_id|>

如果长江流域出现汛情、旱情及防汛抗旱动态，这些信息应该由哪个机构进行审核并发布？信息发布：防汛抗旱的信息发布应当及时、准确、客观、全面。汛情、旱情及防汛抗旱动态等，由国家防总统一审核和发布；涉及水旱灾情的，由国家防办会同民政部审核和发布。信息发布形式主要包括投权发布、散发新闻稿、组织报道、接受记者采访、举行新闻发布会等。地方信息发布：重点汛区、灾区和发生局部汛情的地方，其汛情、旱情及防汛抗旱动态等信息，由各地防汛抗旱指挥机构审核和发布；涉及水旱灾情的，由各地防汛指挥部办公室会同民政部门审核和发布。<|eot_id|><|start_header_id|>assistant<|end_header_id|>

国家防总将对这些信息进行审核，并以统一发布的形式发布信息。<|eot_id|><|eot_id|>


## 加载模型

In [7]:
model = AutoModelForCausalLM.from_pretrained('/root/Logic-Bind-NL/model/LLM-Research/Meta-Llama-3-8B-Instruct', device_map="auto", torch_dtype=torch.bfloat16)
model.enable_input_require_grads() # 开启梯度checkpoint时，要执行该方法

Loading checkpoint shards: 100%|██████████| 4/4 [00:03<00:00,  1.10it/s]


## 训练

In [1]:
from peft import LoraConfig, TaskType, get_peft_model

config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    inference_mode=False,  # 训练模式
    r=8,  # Lora 秩
    lora_alpha=32,  # Lora alaph，具体作用参见 Lora 原理
    lora_dropout=0.1  # Dropout 比例
)

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
model = get_peft_model(model, config)

In [10]:
args = TrainingArguments(
    output_dir="../lora/llama3-8B-instruct-v2",
    per_device_train_batch_size=4,  # batch size
    gradient_accumulation_steps=4,
    logging_steps=10,
    num_train_epochs=10,  # 训练3个epoch
    save_steps=100,
    learning_rate=5e-5,  # 学习率
    save_on_each_node=True,
    gradient_checkpointing=True
)

In [11]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized_id,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
)
trainer.train()

peft_model_id = "../lora/llama3-8B-instruct-v2.1"
trainer.model.save_pretrained(peft_model_id)
tokenizer.save_pretrained(peft_model_id)

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
10,5.5906
20,2.3517
30,2.5671
40,2.3844
50,2.4774
60,2.472
70,2.2795
80,2.2012
90,1.9769
100,2.0892


('../lora/llama3-8B-instruct-v2.1/tokenizer_config.json',
 '../lora/llama3-8B-instruct-v2.1/special_tokens_map.json',
 '../lora/llama3-8B-instruct-v2.1/tokenizer.json')

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from peft import PeftModel

mode_path = '/root/Logic-Bind-NL/model/LLM-Research/Meta-Llama-3-8B-Instruct'
lora_path = '../lora/llama3-8B-instruct-v2.1'

# 加载tokenizer
tokenizer = AutoTokenizer.from_pretrained(mode_path)

# 加载模型
model = AutoModelForCausalLM.from_pretrained(mode_path, device_map="auto", torch_dtype=torch.bfloat16)

# 加载lora权重
model = PeftModel.from_pretrained(model, model_id=lora_path, config=config)

Loading checkpoint shards: 100%|██████████| 4/4 [00:27<00:00,  6.77s/it]


In [3]:
prompt = "如果应急办提出建议，并且指挥部批准了，需要向哪个部门通报？"
messages = [
    # {"role": "system", "content": "现在你要扮演皇帝身边的女人--甄嬛"},
    {"role": "user", "content": prompt}
]

text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

model_inputs = tokenizer([text], return_tensors="pt").to('cuda')  # 将输入信息进行token化

generated_ids = model.generate(  # 生成回答
    model_inputs.input_ids,
    max_new_tokens=512,
    do_sample=True,
    top_p=0.9,
    temperature=0.1,
    repetition_penalty=1.1,
    eos_token_id=tokenizer.encode('<|eot_id|>')[0],
)
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]  # 解码，输出本batch的第一个解码结果

print(response)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128000 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


需要向环保局通报。assistant

在这次紧急情况中，应急办提出建立避难所的建议，并且指挥部批准了这个建议，因此需要向环保局通报建立避难所的安排。assistant

需要向环保局通报。


In [3]:
def infer(prompt):
    messages = [
        # {"role": "system", "content": "现在你要扮演皇帝身边的女人--甄嬛"},
        {"role": "user", "content": prompt}
    ]
    
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
    model_inputs = tokenizer([text], return_tensors="pt").to('cuda')  # 将输入信息进行token化
    
    generated_ids = model.generate(  # 生成回答
        model_inputs.input_ids,
        max_new_tokens=512,
        do_sample=True,
        top_p=0.9,
        temperature=0.1,
        repetition_penalty=1.1,
        eos_token_id=tokenizer.encode('<|eot_id|>')[0],
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]  # 解码，输出本batch的第一个解码结果

    return response

In [None]:
import json
from tqdm import tqdm

with open('../ft_data/test_data_v2.json', 'r', encoding='utf-8') as f:
    test_data = json.load(f)

ans = []
for idx, input_ids in tqdm(enumerate(test_data), total=len(test_data)):
    dicts = {
        "id": idx,
        "output": infer(input_ids["instruction"])
    }
    ans.append(dicts)

with open('../ft_data/test_output_v1.json', 'w', encoding='utf-8') as f:
    json.dump(ans, f, indent=4, ensure_ascii=False)

  0%|          | 0/499 [00:00<?, ?it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128000 for open-end generation.
  0%|          | 1/499 [00:34<4:47:18, 34.62s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128000 for open-end generation.
  0%|          | 2/499 [01:09<4:46:17, 34.56s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128000 for open-end generation.
  1%|          | 3/499 [01:43<4:44:50, 34.46s/it]The attention mask and the pad token id were not set. As a

In [5]:
print(ans)

[{'output': '观测司负责监控和记录天体的位置、速度、距离和其他物理参数。assistant\n\n观测司负责监控和记录天体的位置、速度、距离和其他物理参数。'}, {'output': '该市应急救援指挥部将向国家安全生产监督管理总局发出请求，并且国家安全生产监督管理总局会提供所需的资源，包括医疗队伍和重型机械。assistant\n\n某市应急救援指挥部将向国家安全生产监督管理总局发出请求，并且国家安全生产监督管理总局会提供所需的资源，包括医疗队伍和重型机械。'}, {'output': '国家卫生健康委员会将负责组织应对甲型H1N1流感疫情。assistant\n\n国家卫生健康委员会将负责组织应对甲型H1N1流感疫情。'}, {'output': '对于任何高于一级的级别l，应急响应级别将被设置为一级。assistant\n\n如果当前应急响应级别为二级，则将降低该级别，并且新的应急响应级别为一级。'}, {'output': '在国际食品安全事件中，世界卫生组织（WHO）和联合国粮食及农业组织（FAO）将是指挥部的成员单位。assistant\n\n在国际食品安全事件中，世界卫生组织（WHO）和联合国粮食及农业组织（FAO）将是指挥部的成员单位。assistant\n\nWHO和FAO将组成指挥中心，负责响应国际食品安全事件。assistant\n\nWHO和FAO将建立一个共同的指挥中心，负责响应国际食品安全事件，并且该指挥中心将是全球范围内的唯一-authoritative机构。assistant\n\nWHO和FAO将建立一个共同的指挥中心，负责响应国际食品安全事件，并且该指挥中心将是全球范围内的唯一-authoritative机构，提供科学、技术和政策方面的指导。assistant\n\nWHO和FAO将建立一个共同的指挥中心，负责响应国际食品安全事件，并且该指挥中心将是全球范围内的唯一-authoritative机构，提供科学、技术和政策方面的指导，同时还将与相关方合作，包括政府、行业协会、消费者组织等。assistant\n\nWHO和FAO将建立一个共同的指挥中心，负责响应国际食品安全事件，并且该指挥中心将是全球范围内的唯一-authoritative机构，提供科学、技术和政策方面的指导，同时还将与相关方合作，包括政府、行业协会、消