In [None]:
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments

In [None]:
# Loading the Model
model_name = "Qwen/Qwen3-4B-Instruct-2507"
max_seq_length = 2048     # 可根据显存调，2048 比较稳
dtype = None              # 自动选择 fp16 / bf16
load_in_4bit = True       # 4bit 量化，强烈建议

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    local_files_only = True,
)


In [None]:
# Applying LoRA with PEFT
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,                      # LoRA rank（8/16 都可以）
    target_modules = [
        "q_proj", "k_proj", "v_proj",
        "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha = 16,
    lora_dropout = 0.0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",  # 节省显存
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

In [None]:
# Loading and formatting the Dataset
from datasets import load_dataset

prompt = """
### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token

def format_prompt(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

dataset = load_dataset('json',data_files = 'train.jsonl')
dataset = dataset.map(format_prompt, batched = True,)

In [None]:
training_args = TrainingArguments(
    output_dir = "Qwen/qwen3-4b-12.26",
    per_device_train_batch_size = 2,
    gradient_accumulation_steps = 4,   # 等效 batch=8
    warmup_steps = 50,
    max_steps = 500,
    fp16 = not torch.cuda.is_bf16_supported(),
    bf16 = torch.cuda.is_bf16_supported(),
    logging_steps = 1,
    optim = "adamw_8bit",
    weight_decay = 0.01,
    lr_scheduler_type = "linear",
    seed = 3407,
)

In [None]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset['train'],
    eval_dataset = None,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    packing = False,
    args = training_args,
    preprocessing_num_workers=256,
)


In [None]:
# Start Training
trainer.train()

In [None]:
# Saving the Merged Model
model.save_pretrained_merged(
    "Qwen/Qwen3-4B-merged",
    tokenizer,
    save_method = "merged_16bit",
)

In [None]:
# Plotting the Training Loss
import matplotlib.pyplot as plt

losses = [log["loss"] for log in trainer.state.log_history if "loss" in log]
steps = [log["step"] for log in trainer.state.log_history if "loss" in log]

plt.plot(steps, losses)
plt.xlabel("Steps")
plt.ylabel("Loss")
plt.title("Training Loss")
plt.legend()
plt.grid()
plt.savefig("training_loss.png")
plt.show()