In [1]:
!export TOKENIZERS_PARALLELISM=false
!rm -rf q_lora_korqa
!python -m pip install --upgrade pip
!pip install typing_extensions pydantic openai
!pip install datasets transformers peft trl bitsandbytes

[0m

In [None]:
from huggingface_hub import login
login(token='')

In [3]:
import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForSeq2Seq
from peft import LoraConfig, get_peft_model, PeftModel, prepare_model_for_kbit_training
import bitsandbytes as bnb
import torch.nn.functional as F

# base 모델 담는 객체
model_name = "google/gemma-3-4b-it"

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskType

# QLoRA 설정
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

# 모델 로드 (GPU 0만 사용)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map={"": 0},
    low_cpu_mem_usage=True
)

# 토크나이저 및 모델 로드 (모델 로드 시 4-bit 양자화 설정)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

base_model = prepare_model_for_kbit_training(model)

# LoRA 설정
lora_config = LoraConfig(
    r=16,  # 로우 랭크 어댑터의 차원
    lora_alpha=32,  # 스케일링 팩터
    lora_dropout=0.1,  # 드롭아웃 비율
    bias="none",  # 바이어스 처리 방식
    task_type=TaskType.CAUSAL_LM,
    target_modules=[  # Gemma 모델의 주요 선형 레이어 지정
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj"
    ]
)

# LoRA 적용
model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 32,788,480 || all params: 4,332,867,952 || trainable%: 0.7567


---
### 학습용 데이터 로드

In [5]:
def preprocess_function(example):
    question, answer = example['question'], example['answer']
    # 프롬프트와 응답 결합
    input_text = f"### 질문:\n{question}\n\n### 답변:\n"
    # 토큰화
    tokenized = tokenizer(input_text, padding="max_length", truncation=True, max_length=512)
    input_ids = tokenized["input_ids"]
    attention_mask = tokenized["attention_mask"]
    # 라벨 생성: 프롬프트 부분은 -100, 응답 부분은 해당 토큰 ID
    prompt_len = len(tokenizer(input_text)["input_ids"]) - 1  # EOS 토큰 제외
    labels = [-100] * prompt_len + input_ids[prompt_len:]
    labels = labels[:512] + [-100] * (512 - len(labels))  # 패딩 부분은 -100
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }

In [6]:
import json
from datasets import Dataset

# JSON 파일 로드
with open('./qa_data.json', 'r', encoding='utf-8') as f:
    dataset = json.load(f)

with open('./qa_nega_data.json', 'r', encoding='utf-8') as f:
    dataset_nega = json.load(f)

dataset.extend(dataset_nega)

# Hugging Face Dataset으로 변환
dataset = Dataset.from_list(dataset)

# 전처리 + 텐서화
tokenized_dataset = dataset.map(preprocess_function, batched=False, remove_columns=['question', 'answer'])

# Train / Val 분리
split_dataset = tokenized_dataset.train_test_split(test_size=0.2)
train_dataset = split_dataset["train"]
val_dataset = split_dataset["test"]

Map:   0%|          | 0/2491 [00:00<?, ? examples/s]

In [7]:
print(tokenizer.decode(train_dataset[0]['input_ids'], skip_special_tokens=True))
print(tokenizer.decode(val_dataset[0]['input_ids'], skip_special_tokens=True))

### 질문:
홍콩익스프레스 항공에서 전자담배의 기내 반입 규정은 무엇인가요?

### 답변:

### 질문:
홍콩익스프레스 항공에서 배터리로 작동되는 전자담배의 반입 규정은 어떻게 되나요?

### 답변:



---

In [None]:
# 추가 설정
# model.enable_input_require_grads()
# model.gradient_checkpointing_enable()

# TrainingArguments 설정
training_args = TrainingArguments(
    output_dir='./q_lora_korqa',
    eval_strategy='epoch',
    save_strategy='epoch',
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    weight_decay=0.01,
    num_train_epochs=10,
    logging_dir='./logs',
    logging_steps=1,
    save_total_limit=3,
    bf16=True,
    fp16=False,
    push_to_hub=False,
    report_to='none',
)

data_collator = DataCollatorForSeq2Seq(tokenizer, model=model,return_tensors="pt",label_pad_token_id=-100)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=data_collator,
)

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [None]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Epoch,Training Loss,Validation Loss
1,1.6446,0.976602


