In [None]:
## Setting up

# 注意⚠️
如果huggingface 无法连接，deepseek-r1-llama-8b模型和数据集无法下载。因此为了方便大家，提供一个网盘下载地址，请到该地址，即可下载模型和相应的训练数据集：
> 模型下载地址：https://pan.quark.cn/s/d45a88061e3d
> 训练数据集下载地址：https://pan.quark.cn/s/fea6a395deff

In [1]:
# import unsloth
import torch
print(f"PyTorch 版本: {torch.__version__}")
print(f"CUDA 是否可用: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA 版本: {torch.version.cuda}")
    print(f"GPU 名称: {torch.cuda.get_device_name(0)}")

PyTorch 版本: 2.6.0+cu126
CUDA 是否可用: True
CUDA 版本: 12.6
GPU 名称: NVIDIA GeForce RTX 4060 Laptop GPU


In [2]:

from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
load_in_4bit = True


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


## Loading the model and tokenizer

In [3]:

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "E:/DS_8b/tmp",
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
    device_map = {"": 0},  # 强制指定GPU 0
    # 或者试试这个：
    # device_map = "cuda:0",
)

==((====))==  Unsloth 2025.6.2: Fast Llama patching. Transformers: 4.52.4.
   \\   /|    NVIDIA GeForce RTX 4060 Laptop GPU. Num GPUs = 1. Max memory: 7.996 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.6.0+cu126. CUDA: 8.9. CUDA Toolkit: 12.6. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

## Model inference before fine-tuning

In [4]:
# 在模型加载后运行这个测试
try:
    import xformers.ops
    print("✅ xFormers ops module loaded successfully")

    # 测试 memory efficient attention 是否可用
    if hasattr(xformers.ops, 'memory_efficient_attention'):
        print("✅ Memory efficient attention is available")
    else:
        print("❌ Memory efficient attention is NOT available")

except ImportError as e:
    print(f"❌ xFormers import failed: {e}")

✅ xFormers ops module loaded successfully
✅ Memory efficient attention is available


In [5]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,           #oringed=16
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)


Unsloth 2025.6.2 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


## Loading and processing the dataset

In [6]:
# train_prompt_style =  """以下是描述任务的指令，配有提供进一步背景的输入。请写出适当完成请求的回应。在回答之前，请仔细思考并逐步分析显微镜观察结果。
#
# ### Instruction:
# 您是一名病理学家或实验室专家。**严格**基于所提供的显微镜描述（例如：组织病理学、细胞学、皮肤镜检查等各类病理学检查），请按照标准病理报告格式提供完整的病理学分析：
#
# **注意**：本分析基于提供的显微镜描述，最终诊断需结合完整的临床信息、影像学检查及其他辅助检查结果。
#
# ### Question:
# {}
#
# ### Response:
# <think>
# {}
# </think>
# {}"""

#
#
train_prompt_style = """你是一名临床医学专家，擅长根据医学检查报告中的观察所见进行推理分析。

### Instruction:
请根据以下观察描述生成详细的医学推理链，最终给出诊断结论。

**推理要求**：
请在<think></think>标签内进行详细的分步推理分析，必须包含以下4个步骤：
- Step 1: 分析报告中描述的影像或皮肤镜所见（逐项解读）
- Step 2: 解释这些表现所提示的组织或病理学改变及其医学意义
- Step 3: 结合这些改变，推断可能涉及的常见疾病类型或病理过程
- Step 4: 说明本次推理的局限性

然后在标签外给出简洁的诊断结论。

### Question:
{}

### Response:
<think>
{}
</think>
{}"""

In [7]:
EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN


def formatting_prompts_func(examples):
    inputs = examples["Question"]
    cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for input, cot, output in zip(inputs, cots, outputs):
        text = train_prompt_style.format(input, cot, output) + EOS_TOKEN
        texts.append(text)
    return {
        "text": texts,
    }

#
# EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN
#
# def formatting_prompts_func(examples):
#     inputs = examples["Question"]
#     cots = examples["Complex_CoT"]
#     outputs = examples["Response"]
#     texts = []
#     for input, cot, output in zip(inputs, cots, outputs):
#         text = train_prompt_style.format(input, cot, output) + EOS_TOKEN
#         texts.append(text)
#     return {
#         "text": texts,
#     }

In [8]:
######################
# 1. 数据集拆分 & 预处理
######################
from datasets import load_dataset

data_path = "E:/Deepseek/data_sorce/HZ/medical_data_with_cot_reasoner_4010_newprompt-forstep.json"

raw_dataset = load_dataset("json", data_files={"data": data_path})

split_dataset = raw_dataset["data"].train_test_split(
    test_size=0.3,
    seed=3407,
    shuffle=True
)

train_raw = split_dataset["train"]
test_raw  = split_dataset["test"]

train_dataset = train_raw.map(formatting_prompts_func, batched=True)
test_dataset  = test_raw.map(formatting_prompts_func,  batched=True)

print(f"Train set size: {len(train_dataset)}")
print(f"Test  set size: {len(test_dataset)}")

Train set size: 2807
Test  set size: 1203


## Setting up the model

In [9]:
######################
# 2. 配置 SFTTrainer
######################
from trl import SFTTrainer
from transformers import TrainingArguments, EarlyStoppingCallback
from unsloth import is_bfloat16_supported

training_args = TrainingArguments(
    output_dir="outputs",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    eval_strategy="steps",
    eval_steps=10,
    save_strategy="steps",
    save_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    warmup_steps=5,
    learning_rate=2e-4,
    fp16=not is_bfloat16_supported(),
    bf16=is_bfloat16_supported(),
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="linear",
    logging_steps=10,
    seed=3407,
    report_to=[],  # 空列表
)

# 重新创建trainer
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=1,
    args=training_args,
    callbacks=[
        EarlyStoppingCallback(early_stopping_patience=3)
    ],
)

Unsloth: Tokenizing ["text"]:   0%|          | 0/1203 [00:00<?, ? examples/s]

## Model training

In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,807 | Num Epochs = 3 | Total steps = 1,053
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040/8,000,000,000 (0.52% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss,Validation Loss


Unsloth: Not an error, but LlamaForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import os

# 提取训练和测试损失数据
log_history = trainer.state.log_history
train_steps, train_losses = [], []
eval_steps, eval_losses = [], []

for r in log_history:
    if "loss" in r:
        train_steps.append(r["step"])
        train_losses.append(r["loss"])
    if "eval_loss" in r:
        eval_steps.append(r["step"])
        eval_losses.append(r["eval_loss"])

# 绘制损失曲线图
plt.figure(figsize=(12, 7))
plt.plot(train_steps, train_losses, 'b-', label="训练损失", linewidth=2)
plt.plot(eval_steps, eval_losses, 'r-', label="测试损失", linewidth=2)
plt.xlabel("步骤数", fontsize=12)
plt.ylabel("损失值", fontsize=12)
plt.title("训练与测试损失曲线", fontsize=14)
plt.legend(fontsize=11)
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()

# 显示图像
plt.show()

# 打印一些统计信息
print(f"训练步数: {len(train_steps)}")
print(f"评估步数: {len(eval_steps)}")
print(f"最终训练损失: {train_losses[-1]:.4f}")
print(f"最终测试损失: {eval_losses[-1]:.4f}")
print(f"最低测试损失: {min(eval_losses):.4f}")


In [None]:
# 添加保存CSV和图片的代码
# 创建两个单独的DataFrame
train_df = pd.DataFrame({
    'step': train_steps,
    'loss': train_losses,
    'type': ['train'] * len(train_steps)
})

eval_df = pd.DataFrame({
    'step': eval_steps,
    'loss': eval_losses,
    'type': ['eval'] * len(eval_steps)
})

# 合并两个DataFrame
all_data = pd.concat([train_df, eval_df], ignore_index=True)

# 保存到CSV文件
csv_path = 'all_loss_4010-newprompt-forstep-newtrainprompt.csv'
all_data.to_csv(csv_path, index=False)
print(f"所有损失数据已保存到: {os.path.abspath(csv_path)}")

# 重新绘制并保存图像
plt.figure(figsize=(12, 7))
plt.plot(train_steps, train_losses, 'b-', label="训练损失", linewidth=2)
plt.plot(eval_steps, eval_losses, 'r-', label="测试损失", linewidth=2)
plt.xlabel("步骤数", fontsize=12)
plt.ylabel("损失值", fontsize=12)
plt.title("训练与测试损失曲线", fontsize=14)
plt.legend(fontsize=11)
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()

# 保存图像
fig_path = 'loss_curve_4010-newprompt-forstep-newtrainpromt.png'
plt.savefig(fig_path, dpi=300, bbox_inches='tight')
print(f"损失曲线图已保存到: {os.path.abspath(fig_path)}")


## Saving the model locally

In [None]:
# new_model_online = "kingabzpro/DeepSeek-R1-Medical-COT"
new_model_local = "DeepSeek-R1-Medical-COT_4010-newprompt-forstep-newtrainprompt"
model.save_pretrained(new_model_local) # Local saving
tokenizer.save_pretrained(new_model_local)