<a href="https://colab.research.google.com/github/JoshuaFZ/-/blob/main/owon_train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip uninstall -y unsloth unsloth_zoo
!pip install --no-cache-dir -U \
  git+https://github.com/unslothai/unsloth.git \
  git+https://github.com/unslothai/unsloth-zoo.git \
  trl peft accelerate bitsandbytes datasets


In [None]:
import torch
from unsloth import FastLanguageModel

# 6. 配置模型参数
max_seq_length = 2048
dtype = None
load_in_4bit = True

# 7. 加载模型
model_name = "Qwen/Qwen3-0.6B"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

# 8. 转换模型为 LoRA 模式
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
)

In [None]:
# 现在调试用代码
import json
from datasets import load_dataset

# 1. 改回标准 Alpaca 模板 (使用 ### Response:)
alpaca_prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token

def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # 【核心修改】清洗数据：如果 output 是字典，去掉所有值为 None 的键
        if isinstance(output, dict):
            # 这一行是关键：只保留值不为 None 的字段
            clean_output = {k: v for k, v in output.items() if v is not None}
            output_str = json.dumps(clean_output, ensure_ascii=False)
        else:
            output_str = str(output)

        text = alpaca_prompt.format(instruction, input, output_str) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

# 加载数据
dataset = load_dataset("json", data_files="/content/drive/MyDrive/train.jsonl", split="train")
dataset = dataset.map(formatting_prompts_func, batched = True)

# 3. 【新增】打印一条处理后的数据，检查格式是否正确！
print("检查第一条训练数据格式：")
print(dataset["text"][0])
# 务必确认这里看到的是 {"intent": ...} (双引号)，而不是 {'intent': ...} (单引号)

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 4, # 显存允许的话，越小更新越频繁
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # max_steps = 60,  <-- 删掉这个
        num_train_epochs = 15, # 【修改】直接指定跑 15 轮，确保学会格式
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

trainer_stats = trainer.train()

In [None]:
FastLanguageModel.for_inference(model)

# 必须使用完全一致的 Prompt 模板
inputs = tokenizer(
[
    alpaca_prompt.format(
        "你是一个工业级示波器指令解析引擎。你必须只输出 JSON，不得包含任何解释性文本。",
        "将触发源改为外部",
        "", # 这里留空
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True)
# 解码时跳过 prompt 部分
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
# 建议在 Drive 里建一个专门的文件夹，比如 'oscilloscope_project'
import os
save_path = "/content/drive/MyDrive/oscilloscope_project/lora_model"

if not os.path.exists(save_path):
    os.makedirs(save_path)

model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
print(f"✅ 模型已安全保存到 Google Drive: {save_path}")

In [None]:
# 1. 确保已挂载 Google Drive
from google.colab import drive
drive.mount('/content/drive')

# 2. 定义 Google Drive 中的保存路径
# 建议保存在专属文件夹中，方便下载到 Ubuntu
save_directory = "/content/drive/MyDrive/oscilloscope_project/qwen_merged_hf"

print(f"正在合并模型并保存至: {save_directory} ...")

# 3. 合并并导出为 16bit 格式 (RKLLM 转换的最佳兼容格式)
model.save_pretrained_merged(
    save_directory,
    tokenizer,
    save_method = "merged_16bit",
)

print("✅ 模型合并完成！你现在可以在 Google Drive 网页端看到 qwen_merged_hf 文件夹。")

In [None]:
from google.colab import files

save_directory = "/content/drive/MyDrive/oscilloscope_project/qwen_merged_hf"
!zip -r merged_model.zip {save_directory}
files.download('merged_model.zip')

In [None]:
from unsloth import FastLanguageModel
save_directory = "/content/drive/MyDrive/oscilloscope_project/qwen_merged_hf"
print(f"正在从 {save_directory} 加载合并后的模型...")

# 由于是合并后的模型，可以直接加载为标准的 Hugging Face 模型
# 注意：此处使用的 FastLanguageModel.from_pretrained 是针对 Unsloth 优化过的模型加载方式
# 对于完全合并的模型，也可以使用 AutoModelForCausalLM.from_pretrained
# 但为了保持一致性，我们继续使用 FastLanguageModel
merged_model, merged_tokenizer = FastLanguageModel.from_pretrained(
    model_name = save_directory, # 指向保存合并模型的目录
    max_seq_length = max_seq_length,
    dtype = None, # 自动检测数据类型
    load_in_4bit = False, # 合并后的模型通常不需要再进行 4bit 量化加载
)

print("✅ 合并模型加载完成！")

In [None]:
#用CPU测试
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

save_directory = "/content/drive/MyDrive/oscilloscope_project/qwen_merged_hf"
print(f"正在从 {save_directory} 加载合并后的模型到 CPU...")

# 对于完全合并的模型，我们可以使用 AutoModelForCausalLM 和 AutoTokenizer
# 显式指定 device_map='cpu' 来在 CPU 上加载
merged_model = AutoModelForCausalLM.from_pretrained(
    save_directory,
    torch_dtype=torch.float32, # CPU 上通常使用 float32
    device_map="cpu",
)
merged_tokenizer = AutoTokenizer.from_pretrained(save_directory)

print("✅ 合并模型已成功加载到 CPU！")

In [None]:
# CPU测试合并后的模型
# 在 CPU 上运行时，无需 FastLanguageModel.for_inference

# 必须使用与训练时完全一致的 Prompt 模板
alpaca_prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

inputs = merged_tokenizer(
[
    alpaca_prompt.format(
        "你是一个工业级示波器指令解析引擎。你必须只输出 JSON，不得包含任何解释性文本。",
        "将触发源改为外部",
        "", # 这里留空，让模型生成响应
    )
], return_tensors = "pt").to("cpu") # 将输入也移动到 CPU

outputs = merged_model.generate(**inputs, max_new_tokens = 128, use_cache = True)
# 解码时跳过 prompt 部分
print(merged_tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
#GPU 测试合并后的模型
import torch
from unsloth import FastLanguageModel

# 定义保存合并模型的目录
save_directory = "/content/drive/MyDrive/oscilloscope_project/qwen_merged_hf"
print(f"正在从 {save_directory} 加载合并后的模型到 GPU...")

# 使用 FastLanguageModel 加载合并后的模型
# 确保运行时类型已设置为 GPU
merged_model_gpu, merged_tokenizer_gpu = FastLanguageModel.from_pretrained(
    model_name = save_directory, # 指向保存合并模型的目录
    max_seq_length = 2048, # 与训练时保持一致
    dtype = None, # FastLanguageModel 会自动检测并使用最佳数据类型
    load_in_4bit = False, # 合并后的模型通常不需要再进行 4bit 量化加载
)

print("✅ 合并模型已成功加载到 GPU！")

FastLanguageModel.for_inference(merged_model_gpu)

# 必须使用与训练时完全一致的 Prompt 模板
alpaca_prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

inputs = merged_tokenizer_gpu(
[
    alpaca_prompt.format(
        "你是一个工业级示波器指令解析引擎。你必须只输出 JSON，不得包含任何解释性文本。",
        "将触发源改为外部",
        "", # 这里留空，让模型生成响应
    )
], return_tensors = "pt").to("cuda") # 将输入移动到 GPU

outputs = merged_model_gpu.generate(**inputs, max_new_tokens = 128, use_cache = True)
# 解码时跳过 prompt 部分
print(merged_tokenizer_gpu.decode(outputs[0], skip_special_tokens=True))