In [None]:
"""
코랩용 DPO (Direct Preference Optimization) 학습 스크립트
./finetuning_data의 cycle_00.csv 파일을 토대로 1 사이클 DPO 학습 이후
./checkpoints에 Trainer 등의 메타 데이터를 저장하고 이후 resume을 통해 추가 학습할 수 있도록 함.
adapter의 경우 /content/drive/Mydrive/멋사/adapters/에 저장
"""

In [1]:
import torch
torch.cuda.is_available()

True

In [2]:
!pip install datasets peft trl bitsandbytes accelerate
!pip install -U transformers
!pip show transformers

Collecting trl
  Downloading trl-0.26.2-py3-none-any.whl.metadata (11 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.49.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading trl-0.26.2-py3-none-any.whl (518 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m518.9/518.9 kB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bitsandbytes-0.49.0-py3-none-manylinux_2_24_x86_64.whl (59.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes, trl
Successfully installed bitsandbytes-0.49.0 trl-0.26.2
Name: transformers
Version: 4.57.3
Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow
Home-page: https://github.com/huggingface/transformers
Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)
Author-email: tran

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import os
print(os.getcwd())
print(os.listdir())

/content
['.config', 'drive', 'sample_data']


In [7]:
!git clone https://github.com/jjjh02/AmoRe_crm_generator.git
!cd AmoRe_crm_generator
!git checkout jinhyeok
!git branch
os.chdir("/content/AmoRe_crm_generator")
print(os.getcwd())

Cloning into 'AmoRe_crm_generator'...
remote: Enumerating objects: 41, done.[K
remote: Counting objects: 100% (41/41), done.[K
remote: Compressing objects: 100% (35/35), done.[K
remote: Total 41 (delta 8), reused 33 (delta 4), pack-reused 0 (from 0)[K
Receiving objects: 100% (41/41), 1.68 MiB | 17.89 MiB/s, done.
Resolving deltas: 100% (8/8), done.
Branch 'jinhyeok' set up to track remote branch 'jinhyeok' from 'origin'.
Switched to a new branch 'jinhyeok'
* [32mjinhyeok[m
  main[m
/content/AmoRe_crm_generator


In [8]:
import os
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
)
from datasets import load_dataset
from peft import LoraConfig
from trl import DPOTrainer, DPOConfig

# 모델 및 경로 설정
MODEL_ID = "LGAI-EXAONE/EXAONE-4.0-1.2B"
CACHE_DIR = "./models"
OUTPUT_DIR = "./finetuning/checkpoints"

# 데이터셋 경로 설정
DATA_DIR = "./finetuning/finetuning_data"
CSV_FILE = os.path.join(DATA_DIR, "cycle_01.csv")

# 하이퍼파라미터 설정
PROMPT_LENGTH = 1024
MAX_SEQ_LENGTH = 1512


def load_dpo_dataset(csv_path: str):
    """CSV 파일에서 DPO 형식의 데이터셋을 로드합니다.

    CSV 형식: prompt, chosen, rejected 컬럼을 가져야 합니다.

    Args:
        csv_path: CSV 파일 경로

    Returns:
        train_dataset, eval_dataset: 학습 및 평가 데이터셋
    """
    # CSV 파일 로드
    dataset = load_dataset("csv", data_files=csv_path)
    dataset = dataset['train']

    # train/test split
    dataset = dataset.train_test_split(test_size=0.1, seed=42)

    return dataset['train'], dataset['test']


def main():
    """DPO 학습 메인 함수"""

    # 1. 토크나이저 로드
    print("토크나이저 로드 중...")
    tokenizer = AutoTokenizer.from_pretrained(
        MODEL_ID,
        cache_dir=CACHE_DIR,
    )

    # pad_token 설정
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    # 패딩 사이드 설정 (DPO 학습에 유리)
    tokenizer.padding_side = 'left'
    tokenizer.truncation_side = 'left'

    # 2. 데이터셋 로드
    print(f"데이터셋 로드 중: {CSV_FILE}")
    if not os.path.exists(CSV_FILE):
        raise FileNotFoundError(f"데이터셋 파일을 찾을 수 없습니다: {CSV_FILE}")

    train_dataset, eval_dataset = load_dpo_dataset(CSV_FILE)
    print(f"학습 데이터: {len(train_dataset)}개, 평가 데이터: {len(eval_dataset)}개")

    # 3. BitsAndBytesConfig 설정 (QLoRA)
    print("BitsAndBytesConfig 설정 중...")
    # bnb_config = BitsAndBytesConfig(
    #     load_in_4bit=True,
    #     bnb_4bit_use_double_quant=True,
    #     bnb_4bit_quant_type="nf4",
    #     bnb_4bit_compute_dtype=torch.bfloat16
    # )

    # 4. Flash Attention 설정
    if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8:
        attn_implementation = "flash_attention_2"
        torch_dtype = torch.bfloat16
    else:
        attn_implementation = "eager"
        torch_dtype = torch.float16

    # 5. 모델 로드
    print("모델 로드 중...")
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        device_map="auto",
        use_cache=False,
        attn_implementation=attn_implementation,
        torch_dtype=torch_dtype,
        # quantization_config=bnb_config,
        cache_dir=CACHE_DIR,
    )

    # 6. PEFT (LoRA) 설정
    print("PEFT 설정 중...")
    peft_config = LoraConfig(
        lora_alpha=128,
        lora_dropout=0.05,
        r=256,
        bias="none",
        target_modules=[
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
        ],
        task_type="CAUSAL_LM",
    )

    # 7. DPO 설정
    dpo_args = {
        "beta": 0.1,
        "loss_type": "sigmoid"
    }

    # 8. DPO Config 설정
    print("DPO Config 설정 중...")
    dpo_config = DPOConfig(
        output_dir=OUTPUT_DIR,
        num_train_epochs=4,
        per_device_train_batch_size=12,
        per_device_eval_batch_size=4,
        gradient_accumulation_steps=1,
        learning_rate=5e-5,
        max_grad_norm=0.3,
        warmup_ratio=0.1,
        lr_scheduler_type="cosine",
        logging_steps=1,
        save_steps=100,
        save_total_limit=20,
        eval_strategy="steps",
        eval_steps=30000,
        fp16=True,
        # beta=dpo_args["beta"],
        # loss_type=dpo_args["loss_type"],
        report_to="none",
    )

    # 9. DPOTrainer 초기화
    print("DPOTrainer 초기화 중...")
    trainer = DPOTrainer(
        model=model,
        ref_model=None,  # PEFT 사용 시 None으로 설정
        peft_config=peft_config,
        args=dpo_config,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        processing_class=tokenizer,
        # max_length=MAX_SEQ_LENGTH,
        # max_prompt_length=PROMPT_LENGTH,
        # beta=dpo_args["beta"],
        # loss_type=dpo_args["loss_type"],
    )

    # 10. 학습 시작
    print("학습 시작...")
    ckpt_dir = "AmoRe_crm_generator/finetuning/checkpoints"

    resume = None
    if os.path.isdir(ckpt_dir) and len(os.listdir(ckpt_dir)) > 0:
        resume = True

    trainer.train(resume_from_checkpoint=resume)

    # 11. 모델 저장
    print("모델 저장 중...")
    trainer.save_model("/content/drive/MyDrive/멋사/adapters")
    print(f"모델이 저장되었습니다: {OUTPUT_DIR}")


if __name__ == "__main__":
    main()


토크나이저 로드 중...
데이터셋 로드 중: ./finetuning/finetuning_data/cycle_01.csv


Generating train split: 0 examples [00:00, ? examples/s]

학습 데이터: 5개, 평가 데이터: 1개
BitsAndBytesConfig 설정 중...
모델 로드 중...


config.json: 0.00B [00:00, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/2.56G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/134 [00:00<?, ?B/s]

PEFT 설정 중...
DPO Config 설정 중...
DPOTrainer 초기화 중...


Extracting prompt in train dataset:   0%|          | 0/5 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/5 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/5 [00:00<?, ? examples/s]

Extracting prompt in eval dataset:   0%|          | 0/1 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/1 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/1 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.


학습 시작...




Step,Training Loss,Validation Loss


config.json: 0.00B [00:00, ?B/s]

모델 저장 중...
모델이 저장되었습니다: ./finetuning/checkpoints


In [None]:
!find . -name "cycle_01.csv"
!ls
!cd AmoRe_crm_generator && ls

AmoRe_crm_generator  data  models  README.md  requirements.txt	src
models
