# Train LoRA

In [1]:
%pip install python-dotenv torch transformers datasets bitsandbytes accelerate peft -qU

Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import get_peft_model, LoraConfig, TaskType

# ==== MPS 디바이스 설정 ====
def get_device():
    device = None
    if torch.backends.mps.is_available():
        device = torch.device("mps")
        print("MPS 디바이스를 사용합니다.")
    else:
        device = torch.device("cpu")
        print("MPS를 사용할 수 없어 CPU를 사용합니다.")
    return device

# ==== 토크나이저 로드 ====
def get_tokenizer(model_path):
    print("🔄 Loading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(
        model_path,
        use_fast=True,
        padding_side="left",  # 배치 추론 대비 안전
        use_safetensors=True,
    )
    if tokenizer.pad_token is None:
        print("⚠️ pad_token이 없어서 eos_token으로 설정합니다.")
        tokenizer.pad_token = tokenizer.eos_token
        tokenizer.pad_token_id = tokenizer.eos_token_id

    tokenizer.padding_side = "left"  
    return tokenizer

def get_model(model_path, dtype, option):
    print("🔄 Loading model...")
    return AutoModelForCausalLM.from_pretrained(
        model_path,
        dtype=dtype,
        low_cpu_mem_usage=True,
        use_safetensors=option["use_safetensors"],
    )

def set_model_to_device(model, device):
    print("🔄 Moving model to device...")
    model.to(device)
    model.eval()
    return model


In [3]:
LOCAL_MODEL_PATH = "../ai_models/gemma-3-270m"
DTYPE = torch.bfloat16
MODEL_OPTION = {"use_safetensors": True}
ADAPTER_FLAG = False
ADAPTER_PATH = ""

device = get_device()
tokenizer = get_tokenizer(LOCAL_MODEL_PATH)
model = get_model(LOCAL_MODEL_PATH, DTYPE, MODEL_OPTION)

lora_config = LoraConfig(
    r = 16,
    lora_alpha = 16,
    # target_modules = ["c_attn", "c_proj", "q_attn"], # GPT 계열
    target_modules= ['k_proj', 'q_proj', 'v_proj', 'o_proj', "gate_proj", "down_proj", "up_proj"],
    modules_to_save=['embed_tokens', 'lm_head'],
    lora_dropout = 0.05,
    bias = "none",
    task_type = TaskType.CAUSAL_LM,
)

special_tokens_dict = {'additional_special_tokens': ['<END>']}
tokenizer.add_special_tokens(special_tokens_dict)
model.resize_token_embeddings(len(tokenizer))

model = get_peft_model(model, lora_config)

model.print_trainable_parameters()

model.to("mps")
model.eval()

print("DONE.")

MPS 디바이스를 사용합니다.
🔄 Loading tokenizer...
🔄 Loading model...


The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'
trainable params: 339,343,872 || all params: 607,443,328 || trainable%: 55.8643
DONE.


In [None]:
# jsonl 파일을 불러와 dataset 생성
dataset = Dataset.from_json("./ecommerce_data/ecommerce_finetune.jsonl")
print(dataset)

train_dataset = load_dataset("gretelai/synthetic_text_to_sql", split="train")
test_dataset  = load_dataset("gretelai/synthetic_text_to_sql", split="test")
print(train_dataset)
print(test_dataset)

Dataset({
    features: ['input', 'output'],
    num_rows: 15
})


ValueError: Unknown split "validation". Should be one of ['train', 'test'].

In [4]:
def format_train(train_data):
    return f"질문: {train_data['input']}\n답변: {train_data['output']}\n<END>"

def tokenize_func(train_data):
    return tokenizer(
        format_train(train_data),
        truncation=True,
        padding="max_length",
        max_length=128,   # 모델 context 크기에 맞게 조정
    )

tokenized_dataset = dataset.map(tokenize_func)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

eos_token_id=tokenizer.convert_tokens_to_ids("<END>")

training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=1,
    num_train_epochs=50,
    logging_steps=15,
    save_strategy="no",
    fp16=False,
    report_to="none"
)

In [11]:
import numpy as np

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    mask = labels != -100
    correct = (predictions == labels) & mask
    accuracy = correct.sum() / mask.sum()

    return {"accuracy": accuracy}

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

print("training done.")

It is strongly recommended to train Gemma3 models with the `eager` attention implementation instead of `sdpa`. Use `eager` with `AutoModelForCausalLM.from_pretrained('<path-to-checkpoint>', attn_implementation='eager')`.


Step,Training Loss
15,3.0521
30,2.2612
45,1.6039
60,1.1948
75,0.9374
90,0.7615
105,0.6004
120,0.4533
135,0.2957
150,0.18


KeyboardInterrupt: 

In [9]:
input_text = "작년 신규 가입 고객 수는?"
format_input = f"질문: {input_text}\n답변:"
inputs = tokenizer(format_input, return_tensors="pt")
device = model.device
inputs = {k: v.to(device) for k, v in inputs.items()}
outputs = model.generate(
    **inputs,
    max_new_tokens=64,
    do_sample=False,       # 샘플링 비활성화
    num_beams=1,           # Greedy Search
    temperature=None,      # 아예 제거
    top_p=None,
    top_k=None,
    eos_token_id=eos_token_id,
)

import re

result = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("Full Output")
print(result)

# "답변:" 뒤만 취함
if "답변:" in result:
    result = result.split("답변:")[1].strip()

# SELECT ~ ; 패턴만 추출
match = re.search(r"(SELECT[\s\S]*?;)", result, re.IGNORECASE)
if match:
    sql_query = match.group(1).strip()
else:
    sql_query = result.strip()

print("Generated SQL")
print(sql_query)

Setting `pad_token_id` to `eos_token_id`:262145 for open-end generation.


Full Output
질문: 작년 신규 가입 고객 수는?
답변: 2019년 12월 10일

질문: 2019년 12월 10일
답변: 2019년 12월 10일

질문: 2019년 12월
Generated SQL
2019년 12월 10일

질문: 2019년 12월 10일


In [10]:
input_text = "hi! how are you?"
format_input = f"{input_text}\n"
inputs = tokenizer(format_input, return_tensors="pt")
device = model.device
inputs = {k: v.to(device) for k, v in inputs.items()}
outputs = model.generate(
    **inputs,
    max_new_tokens=64,
    do_sample=False,       # 샘플링 비활성화
    num_beams=3,           # Greedy Search
    temperature=None,      # 아예 제거
    top_p=None,
    top_k=None,
    eos_token_id=eos_token_id,
)

result = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(result)

Setting `pad_token_id` to `eos_token_id`:262145 for open-end generation.


hi! how are you?
i hope you are doing well.
i hope you are having a good day.
i hope you are having a good day.
i hope you are having a good day.
i hope you are having a good day.
i hope you are having a good day.
i hope you are having a


#### RAG

1. 파일 로드

2. 임베딩 :: vector DB 를 위한 데이터 변환 / 저장

3. vector DB 데이터 삽입 :: chroma 사용 -> in memory

In [15]:
# 파일 로드
rag_dataset = load_dataset("json", data_files="./ecommerce_data/ecommerce_schema_rag.jsonl")

print(rag_dataset)


# 임베딩 :: vector DB 저장하기 위해 데이터를 변환 / 저장
# vector DB 데이터 삽입 :: chroma 사용 -> in memory

DatasetDict({
    train: Dataset({
        features: ['id', 'text'],
        num_rows: 36
    })
})
