In [2]:
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments
from datasets import load_dataset
from vllm import LLM, SamplingParams

import os

ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.


AttributeError: module 'torch._inductor' has no attribute 'config'

In [None]:
DATASETS_DIR = "./datasets"
os.makedirs(DATASETS_DIR, exist_ok=True)

In [None]:
mbpp = load_dataset("google-research-datasets/mbpp", "sanitized", cache_dir=DATASETS_DIR)

In [None]:
os.makedirs("models", exist_ok=True)

# MODEL_NAME = "Qwen/Qwen3-0.6B"
MODEL_NAME = "Qwen/Qwen3-4B-Instruct-2507"
# MODEL_PATH = "models/qwen3-0.6b"
MODEL_PATH = "models/qwen3-4B-instruct-2507"
# OUTPUT_MODEL_DIR = "models/qwen3-0.6b-sft"
OUTPUT_MODEL_DIR = MODEL_PATH + "-sft"

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
	model_name = MODEL_PATH,
	max_seq_length=2048,
	dtype=None,
	load_in_4bit = True,
)

==((====))==  Unsloth 2026.1.3: Fast Qwen3 patching. Transformers: 4.57.3. vLLM: 0.13.0.
   \\   /|    NVIDIA GeForce RTX 4070 Ti SUPER. Num GPUs = 1. Max memory: 15.992 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu128. CUDA: 8.9. CUDA Toolkit: 12.8. Triton: 3.5.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
# 2. –ù–∞–≤–µ—à–∏–≤–∞–µ–º –∞–¥–∞–ø—Ç–µ—Ä—ã LoRA
model = FastLanguageModel.get_peft_model(
    model,
    r = 64,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 64,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
)

Unsloth 2026.1.3 patched 36 layers with 36 QKV layers, 36 O layers and 36 MLP layers.


In [None]:
def extract_signature_from_mbpp_code(code: str) -> str:
    # –ë–µ—Ä—ë–º –ø–µ—Ä–≤—É—é —Å—Ç—Ä–æ–∫—É, –Ω–∞—á–∏–Ω–∞—é—â—É—é—Å—è —Å def ...
    for line in code.splitlines():
        line = line.strip()
        if line.startswith("def "):
            return line.rstrip(":")  # –º–æ–∂–Ω–æ –æ—Å—Ç–∞–≤–∏—Ç—å –∏–ª–∏ —É–±—Ä–∞—Ç—å :
    raise ValueError("No function signature found")


def build_mbpp_prompt(example: dict) -> dict:
    # –ò—Å–ø—Ä–∞–≤–ª–µ–Ω–æ: -> dict, —Ç–∞–∫ –∫–∞–∫ –≤–æ–∑–≤—Ä–∞—â–∞–µ–º —Å–ª–æ–≤–∞—Ä—å
    task_text = example["prompt"]
    code_solution = example["code"]

    # –ü—Ä–µ–¥–ø–æ–ª–æ–∂—É, —á—Ç–æ —ç—Ç–∞ —Ñ—É–Ω–∫—Ü–∏—è –∏–º–ø–æ—Ä—Ç–∏—Ä–æ–≤–∞–Ω–∞ –∏–ª–∏ –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∞ –≥–¥–µ-—Ç–æ —Ä—è–¥–æ–º
    signature_line = extract_signature_from_mbpp_code(example["code"])

    system_msg = (
        "You are an expert Python coding assistant. "
        "Given a problem description and function signature, "
        "implement the function body so that it passes all tests."
    )

    user_msg = (
        "Problem:\n"
        f"{task_text}\n\n"
        "Use the following function signature:\n"
        f"{signature_line}:\n\n"
        "Write the full Python function implementation. "
        "Do NOT change the function name or arguments. "
        "Return only Python code."
    )

    assistant_msg = (
        f"```python\n{code_solution}\n```"
    )

    messages = [
        {"role": "system", "content": system_msg},
        {"role": "user", "content": user_msg},
        {"role": "assistant", "content": assistant_msg},
    ]

    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False,
    )

    return {"text": prompt}

In [None]:
dataset = mbpp.map(build_mbpp_prompt)
dataset

DatasetDict({
    train: Dataset({
        features: ['source_file', 'task_id', 'prompt', 'code', 'test_imports', 'test_list', 'text'],
        num_rows: 120
    })
    test: Dataset({
        features: ['source_file', 'task_id', 'prompt', 'code', 'test_imports', 'test_list', 'text'],
        num_rows: 257
    })
    validation: Dataset({
        features: ['source_file', 'task_id', 'prompt', 'code', 'test_imports', 'test_list', 'text'],
        num_rows: 43
    })
    prompt: Dataset({
        features: ['source_file', 'task_id', 'prompt', 'code', 'test_imports', 'test_list', 'text'],
        num_rows: 7
    })
})

In [None]:
# 4. –¢—Ä–µ–Ω–µ—Ä
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset["train"],
    dataset_text_field = "text",
    max_seq_length = 2048,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 4,
        gradient_accumulation_steps = 4,
        max_steps = 100, # –û–±—É—á–∞–µ–º –Ω–µ–º–Ω–æ–≥–æ –¥–ª—è —Ç–µ—Å—Ç–∞
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 10,
        output_dir = "checkpoints",
        optim = "adamw_8bit",
    ),
)

In [None]:
print("–ù–∞—á–∏–Ω–∞–µ–º –æ–±—É—á–µ–Ω–∏–µ...")
trainer.train()

# 5. –°–û–•–†–ê–ù–ï–ù–ò–ï (–°–∞–º–æ–µ –≤–∞–∂–Ω–æ–µ –¥–ª—è —Ç–≤–æ–µ–≥–æ —Å–∫—Ä–∏–ø—Ç–∞)
# –ú—ã —Å–ª–∏–≤–∞–µ–º –≤–µ—Å–∞ (Merge) –∏ —Å–æ—Ö—Ä–∞–Ω—è–µ–º –≤ —Ñ–æ—Ä–º–∞—Ç–µ, –ø–æ–Ω—è—Ç–Ω–æ–º vLLM
print(f"–°–æ—Ö—Ä–∞–Ω—è–µ–º –æ–±—ä–µ–¥–∏–Ω–µ–Ω–Ω—É—é –º–æ–¥–µ–ª—å –≤ {OUTPUT_MODEL_DIR}...")
model.save_pretrained_merged(OUTPUT_MODEL_DIR, tokenizer, save_method="merged_16bit")
tokenizer.save_pretrained(OUTPUT_MODEL_DIR)
print("–ì–æ—Ç–æ–≤–æ! –¢–µ–ø–µ—Ä—å –∑–∞–ø—É—Å–∫–∞–π —Å–≤–æ–π —Å–∫—Ä–∏–ø—Ç –∏–Ω—Ñ–µ—Ä–µ–Ω—Å–∞.")

The model is already on multiple devices. Skipping the move to device specified in `args`.


–ù–∞—á–∏–Ω–∞–µ–º –æ–±—É—á–µ–Ω–∏–µ...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 120 | Num Epochs = 13 | Total steps = 100
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 132,120,576 of 4,154,588,672 (3.18% trained)


Step,Training Loss
10,1.165
20,0.397
30,0.296
40,0.1742
50,0.0954
60,0.0632
70,0.0455
80,0.0402
90,0.0354
100,0.0343


–°–æ—Ö—Ä–∞–Ω—è–µ–º –æ–±—ä–µ–¥–∏–Ω–µ–Ω–Ω—É—é –º–æ–¥–µ–ª—å –≤ models/qwen3-4B-instruct-2507-sft...
Detected local model directory: /home/pavel/projects/mouse-learning/cl-rl-course-work/models/qwen3-4B-instruct-2507
Found HuggingFace hub cache directory: /home/pavel/.cache/huggingface/hub


Unsloth: Preparing safetensor model files: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3/3 [00:00<00:00, 12098.95it/s]
Unsloth: Merging weights into 16bit: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3/3 [00:34<00:00, 11.38s/it]

Unsloth: Merge process complete. Saved to `/home/pavel/projects/mouse-learning/cl-rl-course-work/models/qwen3-4B-instruct-2507-sft`
–ì–æ—Ç–æ–≤–æ! –¢–µ–ø–µ—Ä—å –∑–∞–ø—É—Å–∫–∞–π —Å–≤–æ–π —Å–∫—Ä–∏–ø—Ç –∏–Ω—Ñ–µ—Ä–µ–Ω—Å–∞.





In [None]:
def build_mbpp_prompt_test(example: dict) -> dict:
    task_text = example["prompt"]
    # –ü—Ä–µ–¥–ø–æ–ª–æ–∂—É, —á—Ç–æ —ç—Ç–∞ —Ñ—É–Ω–∫—Ü–∏—è –∏–º–ø–æ—Ä—Ç–∏—Ä–æ–≤–∞–Ω–∞ –∏–ª–∏ –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∞ –≥–¥–µ-—Ç–æ —Ä—è–¥–æ–º
    signature_line = extract_signature_from_mbpp_code(example["code"])

    system_msg = (
        "You are an expert Python coding assistant. "
        "Given a problem description and function signature, "
        "implement the function body so that it passes all tests."
    )

    user_msg = (
        "Problem:\n"
        f"{task_text}\n\n"
        "Use the following function signature:\n"
        f"{signature_line}:\n\n"
        "Write the full Python function implementation. "
        "Do NOT change the function name or arguments. "
        "Return only Python code."
    )

    messages = [
        {"role": "system", "content": system_msg},
        {"role": "user", "content": user_msg},
    ]

    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
    )

    return {"text": prompt}

In [None]:
dataset_test = mbpp["test"].map(build_mbpp_prompt_test)
dataset_test[0]["text"]

NameError: name 'mbpp' is not defined

In [None]:
model_inputs = tokenizer([dataset_test[0]["text"]], return_tensors="pt").to(model.device)

generated_ids = model.generate(
    **model_inputs,
    max_new_tokens=250
)

output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
print(tokenizer.decode(output_ids).strip("\n"))

```python
def sort_matrix(M): 
    result = [j for i in M for j in i]
    result.sort()
    result = [result[i:i+len(M[0])] for i in range(0, len(result), len(M[0]))]
    return result
```<|im_end|>
