In [None]:
import json
import pandas as pd
import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq
from peft import LoraConfig, TaskType, get_peft_model
import os
# import swanlab

# os.environ["SWANLAB_PROJECT"] = "qwen3-sft-medical"
PROMPT = "你是一个医学专家，你需要根据用户的问题，给出带有思考的回答。"
MAX_LENGTH = 2048

# swanlab.config.update({
#     "model": "Qwen/Qwen3-1.7B",
#     "prompt": PROMPT,
#     "data_max_length": MAX_LENGTH,
# })


def dataset_jsonl_transfer(origin_path, new_path):
    """
    将原始数据集转换为大模型微调所需数据格式的新数据集
    """
    messages = []

    # 读取旧的JSONL文件
    with open(origin_path, "r") as file:
        for line in file:
            # 解析每一行的json数据
            data = json.loads(line)
            input = data["question"]
            think = data["think"]
            answer = data["answer"]
            output = f"<think>{think}</think> \n {answer}"
            message = {
                "instruction": PROMPT,
                "input": f"{input}",
                "output": output,
            }
            messages.append(message)

    # 保存重构后的JSONL文件
    with open(new_path, "w", encoding="utf-8") as file:
        for message in messages:
            file.write(json.dumps(message, ensure_ascii=False) + "\n")


def process_func(example):
    """
    将数据集进行预处理
    """
    input_ids, attention_mask, labels = [], [], []
    instruction = tokenizer(
        f"<|im_start|>system\n{PROMPT}<|im_end|>\n<|im_start|>user\n{example['input']}<|im_end|>\n<|im_start|>assistant\n",
        add_special_tokens=False,
    )
    response = tokenizer(f"{example['output']}", add_special_tokens=False)
    input_ids = instruction["input_ids"] + response["input_ids"] + [tokenizer.pad_token_id]
    attention_mask = (
        instruction["attention_mask"] + response["attention_mask"] + [1]
    )
    labels = [-100] * len(instruction["input_ids"]) + response["input_ids"] + [tokenizer.pad_token_id]
    if len(input_ids) > MAX_LENGTH:  # 做一个截断
        input_ids = input_ids[:MAX_LENGTH]
        attention_mask = attention_mask[:MAX_LENGTH]
        labels = labels[:MAX_LENGTH]
    return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}


def predict(messages, model, tokenizer):
    device = "cuda"
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    model_inputs = tokenizer([text], return_tensors="pt").to(device)

    generated_ids = model.generate(
        model_inputs.input_ids,
        max_new_tokens=MAX_LENGTH,
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    return response


# 从Hugging Face下载模型，缓存到本地路径
model_name = "Qwen/Qwen3-1.7B"
model_dir = "./qwen3_model"

# Transformers加载模型权重
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, trust_remote_code=True, cache_dir=model_dir)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.bfloat16, cache_dir=model_dir)
model.enable_input_require_grads()  # 开启梯度检查点时，要执行该方法

# 配置lora
config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    inference_mode=False,   # 训练模式
    r=8,                    # Lora 秩
    lora_alpha=32,          # Lora alaph，具体作用参见 Lora 原理
    lora_dropout=0.1,       # Dropout 比例
)

model = get_peft_model(model, config)

# 加载、处理数据集和测试集
train_dataset_path = "train.jsonl"
test_dataset_path = "val.jsonl"

train_jsonl_new_path = "train_format.jsonl"
test_jsonl_new_path = "val_format.jsonl"

if not os.path.exists(train_jsonl_new_path):
    dataset_jsonl_transfer(train_dataset_path, train_jsonl_new_path)
if not os.path.exists(test_jsonl_new_path):
    dataset_jsonl_transfer(test_dataset_path, test_jsonl_new_path)

# 得到训练集
train_df = pd.read_json(train_jsonl_new_path, lines=True)
train_ds = Dataset.from_pandas(train_df)
train_dataset = train_ds.map(process_func, remove_columns=train_ds.column_names)

# 得到验证集
eval_df = pd.read_json(test_jsonl_new_path, lines=True)
eval_ds = Dataset.from_pandas(eval_df)
eval_dataset = eval_ds.map(process_func, remove_columns=eval_ds.column_names)

args = TrainingArguments(
    output_dir="./output/Qwen3-1.7B",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=4,
    eval_strategy="steps",
    eval_steps=100,
    logging_steps=10,
    num_train_epochs=2,
    save_steps=400,
    learning_rate=1e-4,
    save_on_each_node=True,
    gradient_checkpointing=True,
    # report_to="swanlab",
    # run_name="qwen3-1.7B-medical-lora",
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
)

trainer.train()

# 用测试集的前3条，主观看模型
test_df = pd.read_json(test_jsonl_new_path, lines=True)[:3]

test_text_list = []

for index, row in test_df.iterrows():
    instruction = row['instruction']
    input_value = row['input']

    messages = [
        {"role": "system", "content": f"{instruction}"},
        {"role": "user", "content": f"{input_value}"}
    ]

    response = predict(messages, model, tokenizer)

    response_text = f"""
    Question: {input_value}

    LLM:{response}
    """

    # test_text_list.append(swanlab.Text(response_text))
    print(response_text)

# swanlab.log({"Prediction": test_text_list})

# swanlab.finish()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/726 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

Loading weights:   0%|          | 0/311 [00:00<?, ?it/s]



generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Map:   0%|          | 0/2219 [00:00<?, ? examples/s]

Map:   0%|          | 0/241 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss


In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

def predict(messages, model, tokenizer):
    if torch.backends.mps.is_available():
        device = "mps"
    elif torch.cuda.is_available():
        device = "cuda"
    else:
        device = "cpu"

    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([text], return_tensors="pt").to(device)

    generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=2048)
    generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    return response

# 指定预先下载的model存储路径
model_dir = "./qwen3_model"

# Transformers加载模型权重
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, trust_remote_code=True, cache_dir=model_dir)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.bfloat16, cache_dir=model_dir)
# 加载lora模型
model = PeftModel.from_pretrained(model, model_id="./output/Qwen3-1.7B/checkpoint-1084")

test_texts = {
    'instruction': "你是一个医学专家，你需要根据用户的问题，给出带有思考的回答。",
    'input': "医生，我胸口正中间疼，你能告诉我是什么原因引起的吗"
}

instruction = test_texts['instruction']
input_value = test_texts['input']

messages = [
    {"role": "system", "content": f"{instruction}"},
    {"role": "user", "content": f"{input_value}"}
]

response = predict(messages, model, tokenizer)
print(response)

Loading weights:   0%|          | 0/311 [00:00<?, ?it/s]



<think>嗯，用户问的是胸骨中段疼痛的常见原因，还提到自己胸口中间疼，想知道原因。首先，我需要回忆一下常见的胸部疼痛原因。胸骨中段疼痛可能涉及多个系统，比如心脏、肺部、消化系统，还有可能和周围结构有关。

首先想到的是心脏问题，比如心绞痛或心肌梗死。胸骨中段疼痛可能与心脏供血有关，比如冠状动脉狭窄或阻塞，导致心肌缺血。这时候疼痛通常与活动有关，活动后加剧，休息后缓解，这符合典型的心绞痛症状。不过用户的问题里没有提到活动或休息后的变化，可能需要考虑其他原因。

然后是肺部问题，比如肋间神经痛或者肺部感染。肋间神经痛通常在深呼吸或咳嗽时疼痛加剧，而肺部感染如肺炎或气胸也可能导致胸痛。但用户没有提到呼吸或咳嗽的症状，所以可能需要排除这些。

消化系统的问题也不能忽视，比如胃食管反流病（GERD）或消化性溃疡。胸骨上端的疼痛可能与胃酸反流刺激食道有关，但用户提到的是胸骨中段，可能更接近胃的位置，但需要进一步考虑。

还有可能与肋骨骨折或肌肉拉伤有关，但这种情况比较少见，通常伴随外伤史。另外，胸廓外伤也可能导致疼痛，但同样需要具体症状支持。

另外，还要考虑其他系统的问题，比如心脏神经官能症，这种情况下疼痛可能与情绪或压力有关，但通常不会伴随其他明显症状。还有可能与心脏或大血管的问题有关，比如主动脉夹层，但这种情况疼痛通常更剧烈且持续。

用户提到的“胸口正中间”具体位置，可能需要考虑胸骨中段的区域，比如第2~4肋间。这里的位置可能涉及胸骨柄和肋骨交界处，所以可能与心脏、大血管或肋间神经有关。

在回答时，应该先列出可能的原因，按可能性排序，然后给出建议。需要确保信息准确，同时避免引起不必要的恐慌，建议就医检查。可能还需要注意区分不同原因的症状，比如心绞痛的活动性，而胃食管反流可能有反酸、烧心等症状。

总结下来，常见的原因包括心脏问题（心绞痛、心肌梗死）、肋间神经痛、肺部感染或炎症、消化系统问题（胃食管反流、溃疡）、胸廓外伤，以及心脏神经官能症。需要逐一分析，然后给出建议，比如尽快就医检查，包括心电图、X光、超声心动图等，以确定具体原因。
</think> 
 当然可以。胸骨中段疼痛可能是由多种原因引起的，比如心脏问题、肋间神经痛、肺部感染、消化系统疾病等。由于您描述的症状比较笼统，建议您尽快到医院进行详细检查，以便我们能够准确诊断并给予相应的治疗建议。


Collecting modelscope
  Downloading modelscope-1.34.0-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.3/43.3 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Collecting transformers
  Downloading transformers-5.1.0-py3-none-any.whl.metadata (31 kB)
Downloading modelscope-1.34.0-py3-none-any.whl (6.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.1/6.1 MB[0m [31m39.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading transformers-5.1.0-py3-none-any.whl (10.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.3/10.3 MB[0m [31m36.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: modelscope, transformers
  Attempting uninstall: modelscope
    Found existing installation: modelscope 1.22.0
    Uninstalling modelscope-1.22.0:
      Successfully uninstalled modelscope-1.22.0
  Attempting uninstall: transformers
    Found existing installation: transformers 5.0.0
    Uninstalling transf

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

def predict(messages, model, tokenizer):
    if torch.backends.mps.is_available():
        device = "mps"
    elif torch.cuda.is_available():
        device = "cuda"
    else:
        device = "cpu"

    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([text], return_tensors="pt").to(device)

    generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=2048)
    generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    return response

# 指定预先下载的model存储路径
model_name = "Qwen/Qwen3-1.7B"
model_dir = "./qwen3_model"

# Transformers加载模型权重
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, trust_remote_code=True, cache_dir=model_dir)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.bfloat16, cache_dir=model_dir)
# 加载lora模型
model = PeftModel.from_pretrained(model, model_id="./output/Qwen3-1.7B/checkpoint-1084")
test_texts = {
    'instruction': "你是一个医学专家，你需要根据用户的问题，给出带有思考的回答。",
    'input': "医生，我最近被诊断为糖尿病，听说碳水化合物的选择很重要，我应该选择什么样的碳水化合物呢？"
}


instruction = test_texts['instruction']
input_value = test_texts['input']

messages = [
    {"role": "system", "content": f"{instruction}"},
    {"role": "user", "content": f"{input_value}"}
]

response = predict(messages, model, tokenizer)
print(response)

Loading weights:   0%|          | 0/311 [00:00<?, ?it/s]



<think>嗯，用户问的是糖尿病患者应该选择什么样的碳水化合物。首先，我需要回忆一下糖尿病管理中的碳水化合物选择原则。记得之前学过，碳水化合物对血糖影响很大，所以需要分清简单和复杂碳水化合物的区别。

首先，简单碳水化合物，比如精制的糖类和高升糖指数的碳水化合物，比如白面包、甜点这些，应该避免。因为它们快速分解，血糖上升快，容易引起高血糖。而复杂碳水化合物，比如全谷物、豆类、水果里的纤维含量高，消化吸收较慢，所以升糖指数低，更适合糖尿病患者。

然后，用户提到自己被诊断为糖尿病，可能需要调整饮食结构。这时候应该建议低升糖指数的食物，比如燕麦、糙米、红薯这些。同时，控制总热量摄入，避免暴饮暴食，这也是管理血糖的重要部分。

另外，可能需要提醒用户注意碳水化合物的分配，比如在血糖控制良好的时候可以适量增加，但要避免在血糖高时摄入。还有，建议咨询营养师或医生，因为每个人的情况不同，比如胰岛素敏感度、其他并发症等，需要个性化调整。

有没有可能用户还有其他疑问？比如如何计算碳水化合物的摄入量，或者如何将这些食物融入日常饮食？但根据问题，用户主要问的是选择什么食物，所以重点放在食物种类上。可能还需要提到蛋白质和脂肪的摄入，但答案里已经提到要控制总热量，可能足够。需要确保回答准确且易于理解，避免专业术语过多，让用户能清楚明白。
</think> 
 您好，对于糖尿病患者来说，碳水化合物的选择确实非常重要。为了更好地控制血糖，建议您选择低升糖指数的碳水化合物，比如燕麦、糙米、红薯等。这些食物不仅升糖指数较低，而且富含纤维，有助于减缓碳水化合物的吸收速度。同时，控制总热量的摄入也是关键，避免暴饮暴食。如果有任何疑问，建议您咨询专业的营养师或糖尿病医生，他们可以根据您的具体情况给出更个性化的建议。


In [None]:
merged_model = model.merge_and_unload()
merged_model.save_pretrained("merged_model/")
tokenizer.save_pretrained("merged_model/")

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

('merged_model/tokenizer_config.json',
 'merged_model/chat_template.jinja',
 'merged_model/tokenizer.json')

In [None]:
# Login HuggingFace
from huggingface_hub import login
login()

from huggingface_hub import create_repo

create_repo("KevinXie0131/my_lora_finetuning_delicate_medical_r1_data_qwen3_1.7B", private=False)

RepoUrl('https://huggingface.co/KevinXie0131/my_lora_finetuning_delicate_medical_r1_data_qwen3_1.7B', endpoint='https://huggingface.co', repo_type='model', repo_id='KevinXie0131/my_lora_finetuning_delicate_medical_r1_data_qwen3_1.7B')

In [None]:
from huggingface_hub import upload_folder

upload_folder(
    folder_path="merged_model/",
    repo_id="KevinXie0131/my_lora_finetuning_delicate_medical_r1_data_qwen3_1.7B",
)



Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...rged_model/tokenizer.json: 100%|##########| 11.4MB / 11.4MB            

  ...d_model/model.safetensors:   1%|1         | 41.8MB / 4.06GB            

No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/KevinXie0131/my_lora_finetuning_delicate_medical_r1_data_qwen3_1.7B/commit/8221783acc687a6b91ea56678634c0ce05f69106', commit_message='Upload folder using huggingface_hub', commit_description='', oid='8221783acc687a6b91ea56678634c0ce05f69106', pr_url=None, repo_url=RepoUrl('https://huggingface.co/KevinXie0131/my_lora_finetuning_delicate_medical_r1_data_qwen3_1.7B', endpoint='https://huggingface.co', repo_type='model', repo_id='KevinXie0131/my_lora_finetuning_delicate_medical_r1_data_qwen3_1.7B'), pr_revision=None, pr_num=None)