# 패키지 업데이트

In [None]:
!pip install -U trl bitsandbytes -q

# 환경설정

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
os.environ["WANDB_MODE"] = "disabled"

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
from datasets import load_dataset

# 모델 불러오기

In [None]:
model_name = "KORMo-Team/KORMo-sft"

# 4bit 양자화 설정
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# 모델 로드 (4bit 양자화)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

# 토크나이저 로드
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True
)

print(f"✓ 모델 로드 완료: {model_name}")

# 모델 추론

In [None]:
test_prompt = "영희가 연필 12개를 가지고 있었는데 철수가 절반을 가져가고 영수가 공책 3개를 가져갔으면 영희에게 남은 연필의 갯수는 몇개인가요?"

In [None]:
# Think 추론
print("[Think 추론]")
model.eval()
messages_think = [{"role": "user", "content": test_prompt}]
input_text_think = tokenizer.apply_chat_template(messages_think, tokenize=False, add_generation_prompt=True, enable_thinking=True)
inputs_think = tokenizer(input_text_think, return_tensors="pt").to(model.device)

with torch.inference_mode():
    outputs_think = model.generate(
        **inputs_think,
        max_new_tokens=1024,
        do_sample=True,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )
response_think = tokenizer.decode(outputs_think[0], skip_special_tokens=True)
print(f"입력: {test_prompt}")
print(f"출력: {response_think}")

In [None]:
# Non-think 추론
print("[Non-think 추론]")
messages = [{"role": "user", "content": test_prompt}]
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, enable_thinking=False)
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)

with torch.inference_mode():
    outputs = model.generate(
        **inputs,
        max_new_tokens=1024,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"입력: {test_prompt}")
print(f"출력: {response}")



# KORMo SFT 데이터 일부 불러오기

In [None]:
dataset = load_dataset(
    'KORMo-Team/KORMo-tutorial-datasets',
    name='sft',
    split='train'
)

dataset = dataset.shuffle(seed=42).select(range(1000))
print(f"✓ 데이터셋 로드 완료: {len(dataset)}개 샘플")
print(f"✓ 데이터셋 컬럼: {dataset.column_names}")
print(f"\n샘플 예시:")
dataset[105]['conversation']

In [None]:
# Chat template 적용 모습 확인

templated_text = tokenizer.apply_chat_template(
    conversation=dataset[105]['conversation'],
    tokenize=False,
    add_generation_prompt=False,
)
print(templated_text)

# Tokenize & Label 준비
- user의 instruction에는 label에 -100 처리를 통해 최적화에서 제외
- assistant turn만 학습하기 위함

In [None]:
from transformers import PreTrainedTokenizer

def _prepare_inputs(conversation, tokenizer):
    input_ids = []
    labels = []
    think_token_id = tokenizer.convert_tokens_to_ids("<think>")

    # For multi-turn conversations
    for conv in conversation[:-2]:
        _input_ids = tokenizer.apply_chat_template(
            [conv],
            tokenize=True
        )
        if conv['role'] == 'user':
            _labels = [-100] * len(_input_ids)
        elif conv['role'] == 'assistant':
            _labels = [-100] * 4 + _input_ids[4:-1] + [-100]
        
        input_ids += _input_ids
        labels += _labels

    _input_ids = tokenizer.apply_chat_template(
        conversation[-2:],
        tokenize=True
    )

    think_token_index = _input_ids.index(think_token_id) + 2
    input_ids += _input_ids
    labels += [-100]*think_token_index + _input_ids[think_token_index:]

    return input_ids, labels


def prepare_inputs(examples, tokenizer):
    input_ids_list = []
    labels_list = []

    for conversation in examples['conversation']:
        input_ids, labels = _prepare_inputs(conversation, tokenizer)
        input_ids_list.append(input_ids)
        labels_list.append(labels)
    return{
        "input_ids": input_ids_list,
        "labels": labels_list
    }

dataset = dataset.map(prepare_inputs, batched=True, fn_kwargs={'tokenizer': tokenizer})

In [None]:
# check tokenized data sample
print(tokenizer.decode(dataset[3]['input_ids']))

In [None]:
# comment: reasoning 종료 시점을 원활히 학습하기 위해 </think>은 label에서 제외하지 않았습니다.

sample = dataset[3]

print(f"{'Token':<15}{'input_ids':<15}{'label':<10}")
print("-"*35)
for ids, label in zip(sample['input_ids'], sample['labels']):
    token = tokenizer.decode(ids)
    print(f"{token.replace("\n", "\\n"):<15}{ids:<15}{label:<10}")

In [None]:
from dataclasses import dataclass
from torch.utils.data import DataLoader

K=1024
@dataclass
class DataCollatorForSFT:
    tokenizer: PreTrainedTokenizer

    def __call__(self, instances):
        input_ids = [instance["input_ids"][:20*K] for instance in instances]
        input_ids = torch.nn.utils.rnn.pad_sequence(
            input_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id
        )

        labels = [instance["labels"][:20*K] for instance in instances]
        labels = torch.nn.utils.rnn.pad_sequence(
            labels, batch_first=True, padding_value=self.tokenizer.pad_token_id
        )
        labels = labels.masked_fill(labels == self.tokenizer.pad_token_id, -100)

        return dict(
            input_ids=input_ids,
            labels=labels,
        )
    
collator = DataCollatorForSFT(tokenizer)

dataset.set_format('torch')
data_loader = DataLoader(
    dataset,
    collate_fn=collator,
    batch_size=4,
)

In [None]:
next(iter(data_loader))

# 모델 학습

In [None]:
# LoRA 설정
peft_config = LoraConfig(
    r=128,
    lora_alpha=256,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules="all-linear",
)

# 학습 인자 설정
training_args = SFTConfig(
    output_dir="./KORMo-sft-step-qlora-sft",
    num_train_epochs=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    optim="adamw_bnb_8bit",
    logging_steps=10,
    save_strategy="epoch",
    learning_rate=5e-5,
    bf16=True,
    warmup_ratio=0.01,
    lr_scheduler_type="cosine",
    packing=True,
    report_to=None,
)

# SFT Trainer 초기화
trainer = SFTTrainer(
    model=model,
    processing_class=tokenizer,
    train_dataset=dataset,
    peft_config=peft_config,
    args=training_args,
)

In [None]:
# 학습 시작
print("✓ 학습 시작...")
trainer.train()

# 모델 저장
print("\n✓ 학습 완료! 모델 저장 중...")
trainer.model.save_pretrained(training_args.output_dir)
tokenizer.save_pretrained(training_args.output_dir)
print(f"✓ 모델 저장 완료: {training_args.output_dir}")

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    training_args.output_dir,
    dtype='auto',
    trust_remote_code=True
).to('cuda')

# Non-think 추론
print("[Non-think 추론]")
messages = [{"role": "user", "content": test_prompt}]
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, enable_thinking=False)
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)

with torch.inference_mode():
    outputs = model.generate(
        **inputs,
        max_new_tokens=1024,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"입력: {test_prompt}")
print(f"출력: {response}")

# Non-think 추론
print("\n[Think 추론]")
messages = [{"role": "user", "content": test_prompt}]
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, enable_thinking=True)
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)

with torch.inference_mode():
    outputs = model.generate(
        **inputs,
        max_new_tokens=1024,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"입력: {test_prompt}")
print(f"출력: {response}")