# 1. Setup

The pipeline follows like this: Setup environment -> Load base model -> Configure QLoRA -> Prepare dataset -> Fine-tune LLM -> Save checkpoints -> Run inference

In [None]:
!nvidia-smi

In [None]:
!pip install -q -U wandb
!pip install -q -U bitsandbytes
!pip install -q -U datasets
!pip install -q -U transformers
!pip install -q -U accelerate
!pip install -q -U peft
!pip install -q -U huggingface_hub
!pip install -q -U torch
!pip install -q -U scikit-learn
!pip install -q -U tqdm

In [None]:
import os
import bitsandbytes as bnb
import torch

from datasets import load_dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
import wandb
wb_token = "79126da44d32381139323a9fc5fc6ba0e32b99c4"
wandb.login(key=wb_token)
run = wandb.init(
    project='Finetuning Selection LLM', 
    job_type="training", 
    anonymous="allow"
)

from huggingface_hub import login
API_KEY = "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"
login(token=API_KEY)

In [None]:
MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
SEED = 42
TRAIN_PATH = "selection_train.json"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token_id = tokenizer.eos_token_id

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=[
        "q_proj",
        "v_proj",
    ],
    lora_dropout=0.05,
    bias='none',
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, peft_config)

# 3. Dataset

In [None]:
dataset = load_dataset("json", data_files=TRAIN_PATH, split="train")
dataset = dataset.train_test_split(test_size=0.1, shuffle=True, seed=SEED)

In [None]:
dataset

In [None]:
QWEN25_SYSTEM_PROMPT = """You are the Selection module in the Selection-Inference framework.
When given input consisting of numbered sentences:
sent 0: …
sent 1: …
…

followed by a question, you must output **only** the selection in exactly this format:

sent <X>. We know that sent <Y> [and sent <Z>]. Therefore, <conclusion>.

Do **not** output any other text, explanation, or formatting."""

def preprocess(batch):
    # build chat-style strings
    chats = []
    for src, tgt in zip(batch["prompt"], batch["target"]):
        user = src.strip() + "\nSelection:"
        # apply_chat_template sẽ nối sẵn system + user + assistant
        chat = tokenizer.apply_chat_template(
            [
                {"role": "system", "content": QWEN25_SYSTEM_PROMPT},
                {"role": "user",   "content": user},
                {"role": "assistant", "content": tgt.strip()},
            ],
            tokenize=False,
            add_generation_prompt=False,
        )
        chats.append(chat)
    tok = tokenizer(
        chats,
        padding="max_length",
        truncation=True,
        max_length=512, # 1024
    )
    tok["labels"] = tok["input_ids"].copy()
    return tok

In [None]:
dataset = dataset.map(
    preprocess,
    batched=True,
    num_proc=4,
    remove_columns=["prompt", "target"],
)

train_ds = dataset["train"]
eval_ds  = dataset["test"]

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False, pad_to_multiple_of=8)

# 3. Modeling

In [None]:
training_args = TrainingArguments(
    # Saving model
    output_dir="./qwen_2.5-7b-instruct-selection-llm",
    overwrite_output_dir=True,
    seed=SEED,

    # Training & Evaluation
    do_train=True,
    do_eval=True,
    eval_strategy="epoch",           # run evaluation at end of each epoch
    save_strategy="epoch", 
    logging_strategy="epoch",
    save_total_limit=3,
    num_train_epochs=3,
    logging_steps=100,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,

    # Batch & Gradient accumulation
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,

    # hyperparameters
    learning_rate=5e-5,
    weight_decay=0.01,
    warmup_steps=1000,
    lr_scheduler_type="cosine",  # hoặc "linear", "polynomial", "cosine_with_restarts",...

    # ---- precision & performance ----
    optim="paged_adamw_8bit",
    fp16=True,
    gradient_checkpointing=True,
    dataloader_num_workers=4,
    dataloader_drop_last=True,
)

model.config.use_cache = False
model.enable_input_require_grads()

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=eval_ds,
    data_collator=data_collator,
)

trainer.train()

In [None]:
model.save_pretrained("./qwen_2.5-selection-llm")
PEFT_MODEL = "Savoxism/InstructionTuning-Qwwn2.5-7B-Selection-LLM"

model.push_to_hub(
    PEFT_MODEL,
)

# 4. Inference