# SFT of Qwen3

## Libraries

In [1]:
import os
import torch
from datasets import load_dataset
from trl import SFTConfig, SFTTrainer
from peft import LoraConfig
from transformers import AutoTokenizer
import re
import random
from rich.console import Console
from rich.panel import Panel
from rich.rule import Rule

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"

## Global variables

In [3]:
model_id = "Qwen/Qwen3-0.6B"
dataset_id = "AxelDlv00/EULAI"
output_dir = "./EULAI-Qwen3-Instruct"

## Dataset preparation

In [4]:
dataset = load_dataset(dataset_id)

def preprocess_function(example):
    return {
        "messages": [
            {"role": "user", "content": example["policy"]},
            {"role": "assistant", "content": example["summary"]}
        ]
    }

In [5]:
dataset = dataset.map(preprocess_function, remove_columns=dataset["train"].column_names)
# dataset["train"] = dataset["train"].shuffle(seed=42).select(range(500))
# dataset["test"] = dataset["test"].shuffle(seed=42).select(range(min(50, len(dataset["test"]))))

# Tokenizer

In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

## Training preparation

In [7]:
training_args = SFTConfig(
    output_dir=output_dir,
    per_device_train_batch_size=2, 
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=1e-4,
    num_train_epochs=3, 
    logging_steps=20,   
    save_strategy="epoch",
    eval_strategy="epoch", 
    lr_scheduler_type="cosine",
    warmup_ratio=0.03, 
    max_length=2048,   
    packing=True,
    
    dataset_kwargs={
        "add_special_tokens": False,  
    },
    
    bf16=torch.cuda.is_bf16_supported() if torch.cuda.is_available() else False,
    fp16=not (torch.cuda.is_bf16_supported() if torch.cuda.is_available() else False),
)

peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

trainer = SFTTrainer(
    model=model_id,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    args=training_args,
    processing_class=tokenizer,
)



/Data/AxelDlv/condaenvs/SJTU/compiler_compat/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
/Data/AxelDlv/condaenvs/SJTU/compiler_compat/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
Padding-free training is enabled, but the attention implementation is not set to a supported flash attention variant. Padding-free training flattens batches into a single sequence, and only the following implementations are known to reliably support this: flash_attention_2, flash_attention_3, kernels-community/flash-attn2, kernels-community/flash-attn3, kernels-community/vllm-flash-attn3. Using other implementations may lead to unexpected behavior. To ensure compatibility, set `attn_implementation` in the model configuration to one of these supported options or verify that your attention mechanism can handle flattened sequences.
You are using packing, but the attention implementation is not set to a supported flash a

## Training

In [None]:
trainer.train()
trainer.save_model(output_dir)

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.


Epoch,Training Loss,Validation Loss


## Print results

In [None]:
def parse_generative_output(text: str):
    """Extrait les points clés du texte généré par le modèle."""
    results = []
    pattern = re.compile(r"^-\s*\[(BAD|GOOD|NEUTRAL|BLOCKER)\]\s*:\s*([^:]+):\s*(.+)$", re.MULTILINE)
    
    for match in pattern.finditer(text):
        results.append({
            "label": match.group(1).upper(),
            "title": match.group(2).strip(),
            "explanation": match.group(3).strip()
        })
    return results

def visualize_inference(trainer, tokenizer, dataset_id, num_samples=3):
    """
    Visualisation sur des exemples NON-PACKÉS.
    On recharge le dataset de test brut pour voir comment le modèle se comporte
    sur une vraie requête utilisateur unique.
    """
    console = Console()
    console.print(Rule("[bold magenta]Visualisation Inférence Finale Qwen3[/bold magenta]"))
    
    # On recharge le dataset de test "frais" pour ne pas avoir les versions "packées" du trainer
    try:
        raw_test_dataset = load_dataset(dataset_id, split="test")
    except:
        raw_test_dataset = load_dataset(dataset_id)["test"]
        
    # Sélection aléatoire sur TOUT le dataset de test
    indices = random.sample(range(len(raw_test_dataset)), min(num_samples, len(raw_test_dataset)))
    selected_items = [raw_test_dataset[i] for i in indices]
    
    model = trainer.model
    model.eval()
    
    for item in selected_items:
        policy_text = item['policy']
        
        messages = [{"role": "user", "content": policy_text}]
        
        try:
            # enable_thinking=False pour forcer le format strict
            inputs = tokenizer.apply_chat_template(
                messages, 
                tokenize=True, 
                add_generation_prompt=True, 
                return_tensors="pt",
                enable_thinking=False 
            ).to(model.device)
        except TypeError:
            inputs = tokenizer.apply_chat_template(
                messages, 
                tokenize=True, 
                add_generation_prompt=True, 
                return_tensors="pt"
            ).to(model.device)
        
        with torch.no_grad():
            outputs = model.generate(
                inputs, 
                max_new_tokens=512, 
                temperature=0.1, 
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id
            )
        
        full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
        points = parse_generative_output(full_output)
        
        console.print(f"\n[bold underline white on blue] FINAL MODEL TEST (ID: {item.get('id', 'N/A')[:6]}...) [/bold underline white on blue]")
        
        if not points:
             console.print(Panel(full_output, title="⚠️ Raw Output (Format non détecté)", border_style="red"))
        
        for p in points:
            label = p['label']
            title = p['title']
            expl = p['explanation']
            
            if label == "BLOCKER":
                color = "white on red" 
                icon = "⛔"
                border = "red"
            elif label == "BAD":
                color = "red"
                icon = "❌"
                border = "red"
            elif label == "GOOD":
                color = "green"
                icon = "✅"
                border = "green"
            else:
                color = "yellow"
                icon = "ℹ️"
                border = "yellow"
            
            p_content = f"[{color}][bold]{title}[/bold][/{color}]\n[white]{expl}[/white]"
            
            console.print(Panel(
                p_content,
                title=f"{icon} {label}",
                title_align="left",
                border_style=border,
                width=100
            ))

In [None]:
visualize_inference(trainer, tokenizer, dataset_id)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
