In [1]:
# 환경 초기화 및 라이브러리 재설치
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# 1) PyTorch 설치
!pip install torch torchvision torchaudio

# 2) Triton 설치
!pip install triton

# 3) LLM 학습 라이브러리 (버전 고정)
# Transformers 4.46 + PEFT 0.13 + Bitsandbytes 0.44
!pip install transformers==4.46.0 \
             peft==0.13.2 \
             accelerate==1.1.1 \
             bitsandbytes==0.44.1 \
             trl==0.12.0 \
             datasets==3.1.0 \
             huggingface_hub \
             pandas \
             scipy \
             flash-attn

# 설치 후 Kernel 재시작해야 함
print("설치 완료")

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
설치 완료


In [2]:
import torch
import pandas as pd
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
from huggingface_hub import login

In [3]:
login(token=os.getenv("HF_TOKEN"))

MODEL_ID = "kakaocorp/kanana-nano-2.1b-instruct"     # 베이스 모델 ID (혹은 경로)
NEW_MODEL_NAME = "kanana-nano-2.1B-customer-emotional" # 저장할 모델명
DATA_FILE = "hana_rewritten.csv"

In [4]:
# 모델 및 토크나이저 로드
print(f"데이터 로드 중: {DATA_FILE}")
df = pd.read_csv(DATA_FILE)
dataset = Dataset.from_pandas(df)

print("모델 로드 중...")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=False,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
tokenizer.padding_side = 'right'
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

데이터 로드 중: hana_rewritten.csv
모델 로드 중...


In [5]:
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    attn_implementation="sdpa",
    torch_dtype=torch.bfloat16
)
model.config.use_cache = False
model.config.pretraining_tp = 1

In [6]:
# LoRA 설정
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
)

# 학습 설정
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=16,
    gradient_checkpointing=True,
    dataloader_num_workers=8,
    optim="paged_adamw_32bit",
    
    save_steps=100,
    logging_steps=10,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    report_to="none"
)

In [7]:
# 데이터 포맷팅
def formatting_prompts_func(example):
    output_texts = []
    
    # 데이터셋의 각 행을 순회하며 처리
    for i in range(len(example['counselor_utterance'])):
        current_emotion = example['emotion'][i] if example['emotion'][i] else "일반"
        
        target_response = example['customer_utterance_rewritten'][i]
        if not target_response or str(target_response) == 'nan':
             target_response = example['customer_utterance'][i]

        if current_emotion == "일반":
            system_msg = "당신은 하나카드의 고객입니다. 상담원의 질문이나 안내에 대해 실제 고객처럼 자연스럽게 응답하세요."
        else:
            # 감정이 있을 경우, 해당 감정을 연기하도록 명시
            system_msg = f"당신은 하나카드의 고객입니다. 현재 당신의 감정 및 성격 상태는 '{current_emotion}'입니다. 이 페르소나에 맞춰 상담원에게 응답하세요."

        messages = [
            {"role": "system", "content": system_msg},
            {"role": "user", "content": example['counselor_utterance'][i]},
            {"role": "assistant", "content": target_response}
        ]
        
        text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
        output_texts.append(text)
        
    return output_texts

In [8]:
# 학습기 설정
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    formatting_func=formatting_prompts_func,
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_args,
    packing=False,
)

print("학습 시작...")
trainer.train()


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/96886 [00:00<?, ? examples/s]

학습 시작...


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Step,Training Loss
10,5.082
20,5.3647
30,4.9386
40,3.7296
50,2.3604
60,2.4891
70,1.9792
80,1.5586
90,1.1861
100,0.873


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enab

SafetensorError: Error while serializing: I/O error: Disk quota exceeded (os error 122)

In [10]:
# 마지막 체크포인트에서 학습 재개
trainer.train(resume_from_checkpoint=True)

  torch.load(os.path.join(checkpoint, OPTIMIZER_NAME), map_location=map_location)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after pa

Step,Training Loss
8910,0.8316
8920,0.9539
8930,0.7989
8940,0.6872
8950,0.5436
8960,0.8164
8970,0.9539
8980,0.8144
8990,0.7018
9000,0.5144


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


TrainOutput(global_step=9081, training_loss=0.015099419179354952, metrics={'train_runtime': 786.533, 'train_samples_per_second': 369.543, 'train_steps_per_second': 11.546, 'total_flos': 2.9773602669551616e+17, 'train_loss': 0.015099419179354952, 'epoch': 2.9997729290093513})

In [11]:
# 어댑터 저장
trainer.model.save_pretrained(NEW_MODEL_NAME)
tokenizer.save_pretrained(NEW_MODEL_NAME)

('kanana-nano-2.1B-customer-emotional/tokenizer_config.json',
 'kanana-nano-2.1B-customer-emotional/special_tokens_map.json',
 'kanana-nano-2.1B-customer-emotional/tokenizer.json')

In [12]:
print("메모리 정리 및 모델 병합 중...")
# del model, trainer
torch.cuda.empty_cache()

# 베이스 모델 재로드 (FP16)
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

# 병합
model_to_merge = PeftModel.from_pretrained(base_model, NEW_MODEL_NAME)
merged_model = model_to_merge.merge_and_unload()
print("병합 완료")

# 업로드
print(f"업로드 시작: {NEW_MODEL_NAME}")
merged_model.push_to_hub(NEW_MODEL_NAME, use_temp_dir=False, use_auth_token=True)
tokenizer.push_to_hub(NEW_MODEL_NAME, use_temp_dir=False, use_auth_token=True)

print("✅ 업로드 완료")

메모리 정리 및 모델 병합 중...
병합 완료
업로드 시작: kanana-nano-2.1B-customer-emotional




Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            



README.md: 0.00B [00:00, ?B/s]

Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

✅ 업로드 완료
