In [2]:
!pip install -q accelerate pynvml huggingface_hub hf_transfer "transformers==4.57.0" 
!pip install -q trl peft scikit-learn bitsandbytes pillow

Reason for being yanked: Error in the setup causing installation issues[0m[33m
[0m

In [None]:
import os
import json
import random
from typing import Any, Dict, List

import torch
from transformers import (
    Qwen3VLForConditionalGeneration,
    AutoProcessor,
    BitsAndBytesConfig,
)
from peft import LoraConfig
from trl import SFTConfig, SFTTrainer
from PIL import Image


# 기본 설정
MODEL_ID = "Qwen/Qwen3-VL-8B-Instruct"
DATA_PATH = "tuning_data_final8_image_change.jsonl"
OUTPUT_DIR = "qwen3-vl-8b-kbo-scoreboard-qlora"
SEED = 42

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"device = {device}")

# 유틸 함수
def set_seed(seed: int = 42):
    random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

def load_raw_jsonl(path: str) -> List[Dict[str, Any]]:
    data: List[Dict[str, Any]] = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line: continue
            data.append(json.loads(line))
    return data

set_seed(SEED)
print("유틸/설정 로드 완료")


[INFO] device = cuda
[INFO] 유틸/설정 로드 완료


In [None]:
# 데이터 로드 & Qwen3-VL 포맷 변환
raw_data = load_raw_jsonl(DATA_PATH)

# 데이터셋 분할 로직
def train_eval_test_split(data, train_ratio=0.8, eval_ratio=0.1, seed=42):
    rng = random.Random(seed)
    data_shuffled = list(data)
    rng.shuffle(data_shuffled)
    n_total = len(data_shuffled)
    n_train = int(n_total * train_ratio)
    n_eval = int(n_total * eval_ratio)
    return data_shuffled[:n_train], data_shuffled[n_train:n_train+n_eval], data_shuffled[n_train+n_eval:]

train_dataset, eval_dataset, test_dataset = train_eval_test_split(raw_data, seed=SEED)
print(f"[INFO] Train: {len(train_dataset)}, Eval: {len(eval_dataset)}")

[INFO] Train: 263, Eval: 32


In [None]:
# 4bit Qlora 양자화 Qwen3-VL 모델 / 프로세서 로드
if torch.cuda.is_available():
    major, minor = torch.cuda.get_device_capability()
    if major >= 8:
        compute_dtype = torch.bfloat16
    else:
        compute_dtype = torch.float16
else:
    compute_dtype = torch.float32

print(f"compute_dtype = {compute_dtype}")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
)

model = Qwen3VLForConditionalGeneration.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    dtype=compute_dtype,
    device_map="auto",
    attn_implementation="sdpa" 
)

min_pixels = 256 * 28 * 28 # 기본값보다 높게
max_pixels = 1920 * 1080

processor = AutoProcessor.from_pretrained(
    MODEL_ID,
    min_pixels=min_pixels, 
    max_pixels=max_pixels
)

# 패딩/캐시 설정
if hasattr(model, "config"):
    model.config.use_cache = False

model.gradient_checkpointing_enable()
if hasattr(model, "enable_input_require_grads"):
    model.enable_input_require_grads()

if hasattr(processor, "tokenizer"):
    tokenizer = processor.tokenizer
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

print("모델 / 프로세서 로드 완료")


[INFO] compute_dtype = torch.bfloat16


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

[INFO] 모델 / 프로세서 로드 완료


In [None]:
# LoRA 설정, 학습 설정, SFTTrainer 생성

peft_config = LoraConfig(
    r=64,
    lora_alpha=128,
    lora_dropout=0.05,
    bias="none",
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj", 
        "gate_proj", "up_proj", "down_proj"
    ], # 모든 리니어 레이어 타겟팅
    task_type="CAUSAL_LM",
    modules_to_save=["embed_tokens", "lm_head"], # 토큰 임베딩 학습 허용 (특수 포맷 적응력 향상)
)

peft_config = LoraConfig(
    r=64,                 # 기존 16 -> 64: 시각적 디테일 학습을 위해 표현력 증대
    lora_alpha=128,       # 기존 32 -> 128: alpha는 보통 rank의 2배 설정
    lora_dropout=0.05,    # 0.1 -> 0.05: 데이터가 적으므로 너무 많이 끄지 않음
    bias="none",
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj", 
        "gate_proj", "up_proj", "down_proj"
    ], # 모든 리니어 레이어 타겟팅
    task_type="CAUSAL_LM",
    modules_to_save=["embed_tokens", "lm_head"], # 토큰 임베딩 학습 허용 (특수 포맷 적응력 향상)
)

training_args = SFTConfig(
    output_dir=OUTPUT_DIR,
    
    max_length=4096,  # FHD 이미지(우리 데이터 대부분의 크기)는 토큰 약 2000개 이상 차지.. 텍스트 포함 여유 있게 4096 설정
    
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,      
    
    num_train_epochs=5,
    learning_rate=2e-4,
    weight_decay=0.01,
    warmup_ratio=0.1,
    lr_scheduler_type="cosine",
    
    logging_steps=10,
    eval_strategy="steps",
    eval_steps=10,
    save_strategy="steps",
    save_steps=10,
    save_total_limit=1,
    
    bf16=True,
    max_grad_norm=1.0,
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False},
    remove_unused_columns=False,
    dataset_text_field="", 
)

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    processing_class=processor,
)

print("SFTTrainer 생성 완료")




[INFO] SFTTrainer 생성 완료


In [10]:
print("파인튜닝 시작...")
train_result = trainer.train()
print("학습 완료")

# 어댑터 저장
trainer.save_model(OUTPUT_DIR)
print(f"어댑터 저장 완료: {OUTPUT_DIR}")

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 151645, 'bos_token_id': None, 'pad_token_id': 151643}.


파인튜닝 시작...


Step,Training Loss,Validation Loss,Entropy,Num Tokens,Mean Token Accuracy
10,10.0647,5.006711,6.36007,548948.0,0.343032
20,1.4115,0.013466,0.027932,1067069.0,0.997461
30,0.0047,0.001989,0.003306,1615905.0,0.999281
40,0.0014,0.001015,0.001605,2134018.0,0.999554
50,0.001,0.000905,0.00135,2682972.0,0.999654
60,0.0008,0.000723,0.001022,3201346.0,0.999709
70,0.0005,0.00066,0.000913,3719357.0,0.999727
80,0.0006,0.000644,0.000874,4268145.0,0.999718


학습 완료
어댑터 저장 완료: qwen3-vl-8b-kbo-scoreboard-qlora


In [None]:
from huggingface_hub import login
from huggingface_hub import HfApi
HF_REPO_ID = "SeHee8546/qwen3-vl-8b-kbo-scoreboard-qlora-final-V2"
HF_TOKEN='<토큰값>'


api = HfApi(token=HF_TOKEN)
api.create_repo(repo_id=HF_REPO_ID, exist_ok=True)

api.upload_folder(
    repo_id=HF_REPO_ID,
    folder_path=OUTPUT_DIR
)

print(f"Hugging Face Hub 업로드 완료: {HF_REPO_ID}")


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

[INFO] Hugging Face Hub 업로드 완료: SeHee8546/qwen3-vl-8b-kbo-scoreboard-qlora-final-V2
