# Train LoRA

In [1]:
%pip install python-dotenv torch transformers datasets bitsandbytes accelerate peft -qU

Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import get_peft_model, LoraConfig, TaskType

# ==== MPS 디바이스 설정 ====
def get_device():
    device = None
    if torch.backends.mps.is_available():
        device = torch.device("mps")
        print("MPS 디바이스를 사용합니다.")
    else:
        device = torch.device("cpu")
        print("MPS를 사용할 수 없어 CPU를 사용합니다.")
    return device

# ==== 토크나이저 로드 ====
def get_tokenizer(model_path):
    print("🔄 Loading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(
        model_path,
        use_fast=True,
        padding_side="left",  # 배치 추론 대비 안전
        use_safetensors=True,
    )
    if tokenizer.pad_token is None:
        print("⚠️ pad_token이 없어서 eos_token으로 설정합니다.")
        tokenizer.pad_token = tokenizer.eos_token
        tokenizer.pad_token_id = tokenizer.eos_token_id

    tokenizer.padding_side = "left"  
    return tokenizer

def get_model(model_path, dtype, option):
    print("🔄 Loading model...")
    return AutoModelForCausalLM.from_pretrained(
        model_path,
        dtype=dtype,
        low_cpu_mem_usage=True,
        use_safetensors=option["use_safetensors"],
    )

def set_model_to_device(model, device):
    print("🔄 Moving model to device...")
    model.to(device)
    model.eval()
    return model


In [3]:
LOCAL_MODEL_PATH = "../ai_models/gemma-3-270m"
DTYPE = torch.bfloat16
MODEL_OPTION = {"use_safetensors": True}
ADAPTER_FLAG = False
ADAPTER_PATH = ""

device = get_device()
tokenizer = get_tokenizer(LOCAL_MODEL_PATH)
model = get_model(LOCAL_MODEL_PATH, DTYPE, MODEL_OPTION)

lora_config = LoraConfig(
    r = 16,
    lora_alpha = 16,
    # target_modules = ["c_attn", "c_proj", "q_attn"], # GPT 계열
    target_modules= ['k_proj', 'q_proj', 'v_proj', 'o_proj', "gate_proj", "down_proj", "up_proj"],
    modules_to_save=['embed_tokens', 'lm_head'],
    lora_dropout = 0.05,
    bias = "none",
    task_type = TaskType.CAUSAL_LM,
)

model = get_peft_model(model, lora_config)

model.print_trainable_parameters()

model.to("mps")
model.eval()

print("DONE.")

MPS 디바이스를 사용합니다.
🔄 Loading tokenizer...
🔄 Loading model...
'NoneType' object has no attribute 'cadam32bit_grad_fp32'


  warn("The installed version of bitsandbytes was compiled without GPU support. "


trainable params: 339,341,312 || all params: 607,439,488 || trainable%: 55.8642
DONE.


In [4]:
# jsonl 파일을 불러와 dataset 생성
dataset = Dataset.from_json("./ecommerce_data/ecommerce_finetune.jsonl")
print(dataset)

Dataset({
    features: ['input', 'output'],
    num_rows: 15
})


In [None]:
def format_train(train_data):
    return f"질문: {train_data['input']}\n답변: {train_data['output']}\n<END>"

def tokenize_func(train_data):
    return tokenizer(
        format_train(train_data),
        truncation=True,
        padding="max_length",
        max_length=128,   # 모델 context 크기에 맞게 조정
    )

tokenized_dataset = dataset.map(tokenize_func)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

eos_token_id=tokenizer.convert_tokens_to_ids("<END>")

training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=1,
    num_train_epochs=50,
    logging_steps=15,
    save_strategy="no",
    fp16=False,
    report_to="none"
)

Map: 100%|██████████| 15/15 [00:00<00:00, 2796.95 examples/s]


In [6]:
import numpy as np

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    mask = labels != -100
    correct = (predictions == labels) & mask
    accuracy = correct.sum() / mask.sum()

    return {"accuracy": accuracy}

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

print("training done.")

It is strongly recommended to train Gemma3 models with the `eager` attention implementation instead of `sdpa`. Use `eager` with `AutoModelForCausalLM.from_pretrained('<path-to-checkpoint>', attn_implementation='eager')`.


Step,Training Loss
15,2.7232
30,1.9316
45,1.2969
60,0.9426
75,0.7062
90,0.536
105,0.3963
120,0.2804
135,0.2035
150,0.1781


training done.


In [None]:
input_text = "질문:지난달 신규 가입 고객 수는?(SQL만 작성)"
inputs = tokenizer(input_text, return_tensors="pt")
device = model.device
inputs = {k: v.to(device) for k, v in inputs.items()}
outputs = model.generate(
    **inputs,
    max_new_tokens=64,
    do_sample=False,
    temperature=0.0,
    eos_token_id=eos_token_id
)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.


질문:지난달 신규 가입 고객 수는?(SQL만 작성)
답변: SELECT COUNT(*) FROM users WHERE signup_date >= DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') AND signup_date < DATE_TRUNC('month', CURRENT_DATE);
문의 자격: SELECT COUNT(*) FROM users GROUP BY signup_date;
해


#### RAG

1. 파일 로드

2. 임베딩 :: vector DB 를 위한 데이터 변환 / 저장

3. vector DB 데이터 삽입 :: chroma 사용 -> in memory

In [15]:
# 파일 로드
rag_dataset = load_dataset("json", data_files="./ecommerce_data/ecommerce_schema_rag.jsonl")

print(rag_dataset)


# 임베딩 :: vector DB 저장하기 위해 데이터를 변환 / 저장
# vector DB 데이터 삽입 :: chroma 사용 -> in memory

DatasetDict({
    train: Dataset({
        features: ['id', 'text'],
        num_rows: 36
    })
})
