In [None]:
# from huggingface_hub import login
# login()

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig
from trl import SFTTrainer

# --- 1. 설정 ---
# base_model = "LGAI-EXAONE/EXAONE-4.0-32B"
# dataset_file = "HONGCHA_DATASET.json"
# new_model_name = "lgai"

# --- 2. 모델 및 토크나이저 로드 ---
# 정상적인 환경에서는 아래 설정으로 작동해야 합니다.
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    device_map="auto",
    torch_dtype=torch.bfloat16,
)
model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# --- 3. LoRA 설정 (LoRA의 'LoRA') ---
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
)

# --- 4. 학습 인자(Training Arguments) 설정 ---
training_args = TrainingArguments(
    output_dir=f"./{new_model_name}-results",
    num_train_epochs=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    optim="adamw_torch",
    save_strategy="steps",
    save_steps=50,
    save_total_limit=3,
    logging_steps=10,
    learning_rate=2e-4,
    weight_decay=0.001,
    bf16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard"
)

# --- 5. SFTTrainer 설정 ---
def formatting_prompts_func(example):
    return f"### User:\n{example['instruction']}\n\n### Assistant:\n{example['output']}"

train_dataset = load_dataset("json", data_files=dataset_file, split="train")

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    peft_config=peft_config,
    formatting_func=formatting_prompts_func,
    # max_seq_length=512, # VRAM 최적화를 위해 길이 제한
    args=training_args,
)

# --- 7. 모델 학습 시작 ---
print("모델 학습을 시작합니다...")
trainer.train(resume_from_checkpoint=False)

# --- 8. 최종 모델 저장 ---
print("학습된 모델 어댑터를 저장합니다...")
trainer.model.save_pretrained(new_model_name)
tokenizer.save_pretrained(new_model_name)

print(f"학습 완료! 모델이 '{new_model_name}' 디렉토리에 저장되었습니다.")