# Qwen3 4B Instruct - Redox Rust/Iron Finetune

This notebook adapts the Unsloth Qwen3 flow for the Redox dataset exports.

Before running, upload or mount files under:
- `/content/data/pilot/foundation_v1/unsloth/`

Expected files:
- `rust_train.jsonl`, `rust_val.jsonl`, `rust_test.jsonl`
- `iron_train.jsonl`, `iron_val.jsonl`, `iron_test.jsonl`


In [None]:
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch
    v = re.match(r'[\d]{1,}\.[\d]{1,}', str(torch.__version__)).group(0)
    xformers = 'xformers==' + {'2.10':'0.0.34','2.9':'0.0.33.post1','2.8':'0.0.32.post2'}.get(v, "0.0.34")
    !pip install sentencepiece protobuf "datasets==4.3.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth_zoo bitsandbytes accelerate {xformers} peft trl triton unsloth
!pip install transformers==4.56.2
!pip install --no-deps trl==0.22.2


In [None]:
from unsloth import FastLanguageModel
import torch

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen3-4B-Instruct-2507",
    max_seq_length = 2048,
    load_in_4bit = True,
    load_in_8bit = False,
    full_finetuning = False,
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 32,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 32,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)


In [None]:
# ARM = "rust" or "iron"
ARM = "rust"

# Default local path for uploaded files
BASE_DIR = "/content/data/pilot/foundation_v1/unsloth"

# If using Google Drive, uncomment:
# from google.colab import drive
# drive.mount('/content/drive')
# BASE_DIR = "/content/drive/MyDrive/redox/data/pilot/foundation_v1/unsloth"

train_file = f"{BASE_DIR}/{ARM}_train.jsonl"
val_file   = f"{BASE_DIR}/{ARM}_val.jsonl"
test_file  = f"{BASE_DIR}/{ARM}_test.jsonl"

print('Using arm:', ARM)
print('Train:', train_file)
print('Val:', val_file)
print('Test:', test_file)


In [None]:
from datasets import load_dataset
from unsloth.chat_templates import get_chat_template, standardize_data_formats

dataset = load_dataset(
    "json",
    data_files={
        "train": train_file,
        "validation": val_file,
        "test": test_file,
    },
)

train_dataset = dataset["train"]
val_dataset = dataset["validation"]
test_dataset = dataset["test"]

tokenizer = get_chat_template(tokenizer, chat_template="qwen3-instruct")

train_dataset = standardize_data_formats(train_dataset)
val_dataset = standardize_data_formats(val_dataset)
test_dataset = standardize_data_formats(test_dataset)

def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [
        tokenizer.apply_chat_template(c, tokenize=False, add_generation_prompt=False)
        for c in convos
    ]
    return {"text": texts}

train_dataset = train_dataset.map(formatting_prompts_func, batched=True)
val_dataset = val_dataset.map(formatting_prompts_func, batched=True)
test_dataset = test_dataset.map(formatting_prompts_func, batched=True)

print(dataset)
print(train_dataset[0]["text"][:400])


In [None]:
from trl import SFTTrainer, SFTConfig
from unsloth.chat_templates import train_on_responses_only

run_name = f"qwen3-4b-redox-{ARM}-seed3407"

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    args=SFTConfig(
        dataset_text_field="text",
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        num_train_epochs=1,
        learning_rate=2e-4,
        logging_steps=10,
        eval_steps=50,
        eval_strategy="steps",
        save_steps=50,
        save_total_limit=2,
        optim="adamw_8bit",
        weight_decay=0.001,
        lr_scheduler_type="linear",
        seed=3407,
        report_to="none",
        output_dir=f"./outputs/{run_name}",
    ),
)

trainer = train_on_responses_only(
    trainer,
    instruction_part="<|im_start|>user\n",
    response_part="<|im_start|>assistant\n",
)


In [None]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")


In [None]:
train_result = trainer.train()
print(train_result)


In [None]:
save_dir = f"./outputs/qwen3-4b-redox-{ARM}-seed3407/final_adapter"
trainer.model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)
print('Saved adapter to', save_dir)
