In [None]:
"""
코랩용 DPO (Direct Preference Optimization) 학습 스크립트
./finetuning_data_dpo의 cycle_01.csv 파일을 토대로 1 사이클 DPO 학습 이후
./checkpoints_dpo에 Trainer 등의 메타 데이터를 저장하고 이후 resume을 통해 추가 학습할 수 있도록 함.
adapter의 경우 /content/drive/Mydrive/멋사/adapters/에 저장
"""

In [1]:
import torch
torch.cuda.is_available()

True

In [None]:
!pip install datasets peft trl bitsandbytes accelerate
!pip install -U transformers
!pip show transformers

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
print(os.getcwd())
print(os.listdir())

/content
['.config', 'drive', '.env', '.ipynb_checkpoints', 'sample_data']


In [4]:
!git clone https://github.com/jjjh02/AmoRe_crm_generator.git
%cd AmoRe_crm_generator
!git checkout jinhyeok
!git branch
os.chdir("/content/AmoRe_crm_generator")
print(os.getcwd())

Cloning into 'AmoRe_crm_generator'...
remote: Enumerating objects: 291, done.[K
remote: Counting objects: 100% (29/29), done.[K
remote: Compressing objects: 100% (24/24), done.[K
remote: Total 291 (delta 9), reused 11 (delta 5), pack-reused 262 (from 1)[K
Receiving objects: 100% (291/291), 3.37 MiB | 6.91 MiB/s, done.
Resolving deltas: 100% (164/164), done.
/content/AmoRe_crm_generator
Branch 'jinhyeok' set up to track remote branch 'jinhyeok' from 'origin'.
Switched to a new branch 'jinhyeok'
* [32mjinhyeok[m
  main[m
/content/AmoRe_crm_generator


In [None]:
from dotenv import load_dotenv
load_dotenv()

True

In [8]:
import os
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
)
from datasets import load_dataset
from peft import LoraConfig, PeftModel
from trl import DPOTrainer, DPOConfig

# 모델 및 경로 설정
MODEL_ID = "LGAI-EXAONE/EXAONE-4.0-1.2B"
CACHE_DIR = "./models"
OUTPUT_DIR = "./finetuning/checkpoints_dpo"
OUTPUT_ADAPTER_DIR = "/content/drive/MyDrive/멋사/adapters_dpo_1_v2"
# BASE_ADAPTER_PATH = "/content/drive/MyDrive/멋사/adapters_dpo_2"
NEW_ADAPTER_NAME = "dpo_adapter_v2"

# 데이터셋 경로 설정
DATA_DIR = "/content/drive/MyDrive/멋사/dataset_dpo"
JSON_FILE = os.path.join(DATA_DIR, "cycle_01_v2.json")

# 하이퍼파라미터 설정
PROMPT_LENGTH = 1024
MAX_SEQ_LENGTH = 1512


def load_dpo_dataset(json_path: str):
    """JSON 파일에서 DPO 형식의 데이터셋을 로드합니다.

    JSON 형식:
    [
      { "prompt": "...", "chosen": "...", "rejected": "..." },
      ...
    ]

    Args:
        json_path: JSON 파일 경로

    Returns:
        train_dataset, eval_dataset
    """
    # JSON 파일 로드
    dataset = load_dataset(
        "json",
        data_files=json_path,
    )
    dataset = dataset["train"]

    # train / eval split
    dataset = dataset.train_test_split(test_size=0.1, seed=42)

    return dataset["train"], dataset["test"]


def _freeze_all_params(model):
    for _, param in model.named_parameters():
        param.requires_grad = False


def _enable_adapter_params(model, adapter_name):
    for name, param in model.named_parameters():
        if f".{adapter_name}." in name:
            param.requires_grad = True


In [None]:
"DPO 학습 메인 함수"

# 1. 토크나이저 로드
print("토크나이저 로드 중...")
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_ID,
    cache_dir=CACHE_DIR,
)

# pad_token 설정
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# 패딩 사이드 설정 (DPO 학습에 유리)
tokenizer.padding_side = 'left'
tokenizer.truncation_side = 'left'

# max_length 설정
tokenizer.model_max_length = MAX_SEQ_LENGTH

# 2. 데이터셋 로드
print(f"데이터셋 로드 중: {JSON_FILE}")
if not os.path.exists(JSON_FILE):
    raise FileNotFoundError(f"데이터셋 파일을 찾을 수 없습니다: {JSON_FILE}")

train_dataset, eval_dataset = load_dpo_dataset(JSON_FILE)
print(f"학습 데이터: {len(train_dataset)}개, 평가 데이터: {len(eval_dataset)}개")

# 3. Flash Attention 설정
if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8:
    attn_implementation = "flash_attention_2"
    torch_dtype = torch.bfloat16
else:
    attn_implementation = "eager"
    torch_dtype = torch.float16

# 4. 모델 로드
print("모델 로드 중...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto",
    use_cache=False,
    # attn_implementation=attn_implementation,
    torch_dtype=torch_dtype,
    cache_dir=CACHE_DIR,
)

# 5. PEFT (LoRA) 설정
print("PEFT 설정 중...")
peft_config = LoraConfig(
    lora_alpha=64,
    lora_dropout=0.05,
    r=64,
    bias="none",
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    task_type="CAUSAL_LM"
)

# 6. 베이스 어댑터 로드 (학습하지 않음)
# print(f"베이스 어댑터 로드 중: {BASE_ADAPTER_PATH}")
# if not os.path.exists(BASE_ADAPTER_PATH):
#     raise FileNotFoundError(f"베이스 어댑터를 찾을 수 없습니다: {BASE_ADAPTER_PATH}")

# model = PeftModel.from_pretrained(
#     model,
#     BASE_ADAPTER_PATH,
#     is_trainable=False,
# )

# 7. 추가 어댑터 생성 및 활성화
print(f"추가 어댑터 생성: {NEW_ADAPTER_NAME}")
model.add_adapter(peft_config, NEW_ADAPTER_NAME)
model.set_adapter(NEW_ADAPTER_NAME)
_freeze_all_params(model)
_enable_adapter_params(model, NEW_ADAPTER_NAME)

# 8. DPO Config 설정
print("DPO Config 설정 중...")
dpo_config = DPOConfig(
    output_dir=OUTPUT_DIR,
    num_train_epochs=4,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=3,
    learning_rate=5e-5,
    max_grad_norm=0.3,
    warmup_ratio=0.1,
    lr_scheduler_type="cosine",
    logging_steps=1,
    logging_first_step=True,
    logging_strategy="steps",
    log_level="info",
    disable_tqdm=False,
    save_steps=100,
    save_total_limit=20,
    eval_strategy="steps",
    eval_steps=10,
    # fp16=True,
    beta=0.1,
    loss_type="sigmoid",
    report_to="none"
)

# 9. DPOTrainer 초기화
print("DPOTrainer 초기화 중...")
trainer = DPOTrainer(
    model=model,
    ref_model=None,  # PEFT 사용 시 None으로 설정
    args=dpo_config,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    processing_class=tokenizer,
)

# 10. 학습 시작
print("학습 시작...")
ckpt_dir = "AmoRe_crm_generator/finetuning/checkpoints_dpo"

resume = None
if os.path.isdir(ckpt_dir) and len(os.listdir(ckpt_dir)) > 0:
    resume = True

trainer.train(resume_from_checkpoint=resume)

# 11. 모델 저장
print("모델 저장 중...")
trainer.save_model(OUTPUT_ADAPTER_DIR)
print(f"모델이 저장되었습니다: {OUTPUT_ADAPTER_DIR}")



토크나이저 로드 중...


loading file vocab.json from cache at ./models/models--LGAI-EXAONE--EXAONE-4.0-1.2B/snapshots/3abf2810673c7c0778df64a73c2d52eab32d91c4/vocab.json
loading file merges.txt from cache at ./models/models--LGAI-EXAONE--EXAONE-4.0-1.2B/snapshots/3abf2810673c7c0778df64a73c2d52eab32d91c4/merges.txt
loading file tokenizer.json from cache at ./models/models--LGAI-EXAONE--EXAONE-4.0-1.2B/snapshots/3abf2810673c7c0778df64a73c2d52eab32d91c4/tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at ./models/models--LGAI-EXAONE--EXAONE-4.0-1.2B/snapshots/3abf2810673c7c0778df64a73c2d52eab32d91c4/special_tokens_map.json
loading file tokenizer_config.json from cache at ./models/models--LGAI-EXAONE--EXAONE-4.0-1.2B/snapshots/3abf2810673c7c0778df64a73c2d52eab32d91c4/tokenizer_config.json
loading file chat_template.jinja from cache at ./models/models--LGAI-EXAONE--EXAONE-4.0-1.2B/snapshots/3abf2810673c7c0778df64a73c2d52eab32d91c4/chat_template.jinja


데이터셋 로드 중: /content/drive/MyDrive/멋사/dataset_dpo/cycle_01_v2.json


loading configuration file config.json from cache at ./models/models--LGAI-EXAONE--EXAONE-4.0-1.2B/snapshots/3abf2810673c7c0778df64a73c2d52eab32d91c4/config.json
Model config Exaone4Config {
  "architectures": [
    "Exaone4ForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "dtype": "bfloat16",
  "eos_token_id": 361,
  "head_dim": 64,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_types": [
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_atten

학습 데이터: 1123개, 평가 데이터: 125개
모델 로드 중...


loading configuration file generation_config.json from cache at ./models/models--LGAI-EXAONE--EXAONE-4.0-1.2B/snapshots/3abf2810673c7c0778df64a73c2d52eab32d91c4/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 361,
  "pad_token_id": 0
}

Could not locate the custom_generate/generate.py inside LGAI-EXAONE/EXAONE-4.0-1.2B.


PEFT 설정 중...
추가 어댑터 생성: dpo_adapter_v2


PyTorch: setting up devices


DPO Config 설정 중...
DPOTrainer 초기화 중...


The model is already on multiple devices. Skipping the move to device specified in `args`.
Using auto half precision backend
The following columns in the Training set don't have a corresponding argument in `Exaone4ForCausalLM.forward` and have been ignored: reason_best, prompt, rejected_index, best_index, reason_rejected. If reason_best, prompt, rejected_index, best_index, reason_rejected are not expected by `Exaone4ForCausalLM.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 1,123
  Num Epochs = 4
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 12
  Gradient Accumulation steps = 3
  Total optimization steps = 376
  Number of trainable parameters = 60,948,480


학습 시작...


Step,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/chosen,Logps/rejected,Logits/chosen,Logits/rejected
10,0.6,0.683299,-0.180104,-0.233855,0.5,0.053751,-684.730469,-741.759888,-3.709682,-3.63975
20,0.6083,0.73849,-1.46638,-1.695951,0.59375,0.229571,-697.59314,-756.380859,-3.727176,-3.657523
30,0.467,0.75593,-2.665429,-3.475004,0.648438,0.809575,-709.583618,-774.171326,-3.758888,-3.702438
40,1.0587,0.661129,-2.616832,-4.502807,0.734375,1.885975,-709.097778,-784.449341,-3.748129,-3.68154
50,0.7062,0.533892,-1.116477,-3.405584,0.78125,2.289107,-694.094177,-773.477112,-3.678073,-3.626885
60,0.2371,0.421431,1.435554,-0.884238,0.820312,2.319792,-668.573914,-748.263672,-3.469806,-3.425312
70,0.781,0.410736,-1.538668,-4.835542,0.851562,3.296874,-698.316101,-787.776733,-3.622673,-3.592692
80,0.5555,0.458349,1.179502,-1.581026,0.820312,2.760528,-671.134338,-755.231506,-3.56332,-3.526843
90,0.2278,0.573121,-1.233341,-4.48739,0.789062,3.254049,-695.262817,-784.295166,-3.634358,-3.584069
100,0.052,0.563404,-3.884204,-8.46355,0.796875,4.579345,-721.771423,-824.056763,-3.992111,-3.964693


The following columns in the Evaluation set don't have a corresponding argument in `Exaone4ForCausalLM.forward` and have been ignored: reason_best, prompt, rejected_index, best_index, reason_rejected. If reason_best, prompt, rejected_index, best_index, reason_rejected are not expected by `Exaone4ForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 125
  Batch size = 4
The following columns in the Evaluation set don't have a corresponding argument in `Exaone4ForCausalLM.forward` and have been ignored: reason_best, prompt, rejected_index, best_index, reason_rejected. If reason_best, prompt, rejected_index, best_index, reason_rejected are not expected by `Exaone4ForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 125
  Batch size = 4
The following columns in the Evaluation set don't have a corresponding argument in `Exaone4ForCausalLM.forward` and have been ignored: reason_best, 

config.json: 0.00B [00:00, ?B/s]

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--LGAI-EXAONE--EXAONE-4.0-1.2B/snapshots/3abf2810673c7c0778df64a73c2d52eab32d91c4/config.json
Model config Exaone4Config {
  "architectures": [
    "Exaone4ForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "dtype": "bfloat16",
  "eos_token_id": 361,
  "head_dim": 64,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_types": [
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attenti

모델 저장 중...


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--LGAI-EXAONE--EXAONE-4.0-1.2B/snapshots/3abf2810673c7c0778df64a73c2d52eab32d91c4/config.json
Model config Exaone4Config {
  "architectures": [
    "Exaone4ForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "dtype": "bfloat16",
  "eos_token_id": 361,
  "head_dim": 64,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_types": [
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attenti

모델이 저장되었습니다: /content/drive/MyDrive/멋사/adapters_dpo_1_v2


In [5]:
!pip install huggingface-hub



In [14]:
# Push to HuggingFace Hub

import os

from dotenv import load_dotenv
from huggingface_hub import login, create_repo, upload_folder

login(os.getenv("HUGGINGFACE_API_KEY"))

create_repo(
    repo_id="crm-dpo-adapter",
    repo_type="model",
    private=False,
    exist_ok=True
)

upload_folder(
    folder_path=OUTPUT_ADAPTER_DIR,
    repo_id="jinn33/crm-dpo-adapter",
    repo_type="model",
)

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...adapter_model.safetensors:   1%|          |  620kB /  122MB            

  ...po_1_v2/training_args.bin:   1%|1         |  76.0B / 6.76kB            

CommitInfo(commit_url='https://huggingface.co/jinn33/crm-dpo-adapter/commit/38e95322898190e4a5295f408a79a138ae55ca16', commit_message='Upload folder using huggingface_hub', commit_description='', oid='38e95322898190e4a5295f408a79a138ae55ca16', pr_url=None, repo_url=RepoUrl('https://huggingface.co/jinn33/crm-dpo-adapter', endpoint='https://huggingface.co', repo_type='model', repo_id='jinn33/crm-dpo-adapter'), pr_revision=None, pr_num=None)