In [None]:
!pip install torch modelscope accelerate==0.27.0

In [None]:
!modelscope download --dataset liucong/Chinese-DeepSeek-R1-Distill-data-110k-SFT --local_dir ./Chinese-DeepSeek-R1-Distill-data-110k-SFT

In [None]:
with open('Chinese-DeepSeek-R1-Distill-data-110k-SFT/distill_r1_110k_sft.jsonl', 'r') as f:
    for count, line in enumerate(f, start=1):
        print(line)
        if count == 4:
            break

In [None]:
!pwd
%cd demo5

导入需要的包

In [3]:
from transformers import AutoTokenizer, DataCollatorForSeq2Seq
from datasets import load_dataset
from transformers import AutoModelForCausalLM, TrainingArguments, Trainer
import os
import torch
os.environ["CUDA_VISIBLE_DEVICES"] = "6,7"

In [4]:
dataset_path = "./Chinese-DeepSeek-R1-Distill-data-110k-SFT"
dataset = load_dataset(dataset_path, split="train")
dataset = dataset.shuffle(seed=42).select(range(10))

tokenizer = AutoTokenizer.from_pretrained("/nvme/models/models/Qwen2.5-7B-Instruct")

In [5]:
system_prompt = "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."
def generate_r1_prompt(prompt, completion):
    input_ids, attention_mask, labels = [], [], []
    instruction = [
        {
            "role": "system",
            "content": system_prompt
        },
        {
            "role": "user",
            "content": prompt
        },
    ]
    response = [
        {
            "role": "assistant",
            "content": completion    
        }
    ]

    full = instruction + response

    tokenized_instruction = tokenizer.apply_chat_template(instruction, tokenize=True, return_dict=True)
    tokenized_full = tokenizer.apply_chat_template(full, tokenize=True, return_dict=True)

    input_ids = tokenized_full["input_ids"]
    attention_mask = tokenized_full["attention_mask"]
    labels = input_ids.copy()
    instruction_length = len(tokenized_instruction["input_ids"])
    labels[:instruction_length] = [-100] * instruction_length
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }
 
dataset = dataset.map(lambda x: generate_r1_prompt(x["instruction"], x["output"]), remove_columns=["instruction", "output"])



In [None]:
print(tokenizer.decode(dataset[0]["input_ids"]))

In [None]:
print(tokenizer.decode(list(filter(lambda x: x != -100, dataset[0]["labels"]))))

In [None]:
model = AutoModelForCausalLM.from_pretrained("/nvme/models/models/Qwen2.5-7B-Instruct").to("cuda")

# 自定义输入
prompt = "1.11和1.9哪个大"
inputs = tokenizer.apply_chat_template([{"role": "user", "content": prompt}],
                                       add_generation_prompt=True,
                                       tokenize=True,
                                       return_tensors="pt",
                                       return_dict=True
                                       ).to("cuda")

# 贪婪采样设置，生成时仅考虑概率最高的词
gen_kwargs = {"max_new_tokens": 100, "do_sample": True, "top_k": 1}

# 模型生成
with torch.no_grad():
    outputs = model.generate(**inputs, **gen_kwargs)
    print("原始模型推理结果：\n", tokenizer.decode(outputs[0], skip_special_tokens=False))


In [None]:
training_args = TrainingArguments(
    output_dir="./fine_tuned_qwen",
    per_device_train_batch_size=1,
    num_train_epochs=10,
    save_strategy="no",
    logging_dir="./logs",
    logging_steps=1,
    evaluation_strategy="no",
    save_total_limit=1,
    deepspeed="deepspeed_config.json",
    fp16=True,
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    eval_dataset=None,
    tokenizer=tokenizer,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True)
)

In [None]:
!bash run.sh