In [None]:
# ✅ 1. 라이브러리 로드
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, TaskType
from datasets import Dataset
import pandas as pd
import torch
import re
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# ✅ 2. 모델 설정 (한국어 특화 모델)
model_name = "beomi/KcBERT-base"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

# ✅ 3. 모델 로딩 (분류용)
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2
).to("mps")

# ✅ 4. 한국어 이메일 데이터 불러오기 및 전처리
data = pd.read_csv("./spam_dataset_KO_balanced_5000.csv")

def clean_text(text):
    text = re.sub(r"[\\n\\r]", " ", text)
    text = re.sub(r"\\s+", " ", text)
    return text.strip()

data["text"] = data["text"].apply(clean_text)
data["label"] = data["label"].astype(int)

# ✅ 5. 훈련/검증 데이터 분할
train, test = train_test_split(data, test_size=0.2, random_state=42)
train_dataset = Dataset.from_pandas(train[["text", "label"]])
test_dataset = Dataset.from_pandas(test[["text", "label"]])

  from .autonotebook import tqdm as notebook_tqdm
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at beomi/KcBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [2]:
# ✅ 6. 토큰화
def preprocess(example):
    tokenized = tokenizer(
        example["text"],
        padding="max_length",
        truncation=True,
        max_length=256,
    )
    tokenized["label"] = int(example["label"])
    return tokenized

tokenized_train_dataset = train_dataset.map(preprocess)
tokenized_test_dataset = test_dataset.map(preprocess)

# ✅ 7. LoRA 구성 (KcBERT 구조에 맞는 target_modules)
peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["query", "value"],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.SEQ_CLS
)
model = get_peft_model(model, peft_config)

# ✅ 8. 평가 지표 함수
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary', zero_division=0)
    acc = accuracy_score(labels, preds)
    return {
        "accuracy": acc,
        "f1": f1,
        "precision": precision,
        "recall": recall
    }


Map: 100%|██████████| 4000/4000 [00:00<00:00, 7120.19 examples/s]
Map: 100%|██████████| 1000/1000 [00:00<00:00, 9754.94 examples/s]


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


  warn("The installed version of bitsandbytes was compiled without GPU support. "


In [3]:
# ✅ 9. 학습 설정
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=5,  # 🔼 더 오래 학습
    per_device_train_batch_size=8,  # 🔽 과적합 방지
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=2,
    warmup_ratio=0.1,  # 🔥 학습 초반엔 천천히 시작
    learning_rate=2e-5,  # 🔽 너무 크지 않게
    weight_decay=0.01,
    logging_steps=10,
    save_steps=100,
    save_total_limit=1,
    optim="adamw_torch",
    report_to="none",
    no_cuda=True
)


# ✅ 10. Trainer 정의 및 학습 실행
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)
trainer.can_return_loss = True
trainer.train()

  1%|          | 10/1250 [00:56<1:46:39,  5.16s/it]

{'loss': 0.7479, 'grad_norm': 2.6148362159729004, 'learning_rate': 1.6000000000000001e-06, 'epoch': 0.04}


  2%|▏         | 20/1250 [01:45<1:41:06,  4.93s/it]

{'loss': 0.7369, 'grad_norm': 3.3786673545837402, 'learning_rate': 3.2000000000000003e-06, 'epoch': 0.08}


  2%|▏         | 30/1250 [02:34<1:39:39,  4.90s/it]

{'loss': 0.691, 'grad_norm': 2.227353811264038, 'learning_rate': 4.800000000000001e-06, 'epoch': 0.12}


  3%|▎         | 40/1250 [03:23<1:37:43,  4.85s/it]

{'loss': 0.7171, 'grad_norm': 7.57123327255249, 'learning_rate': 6.4000000000000006e-06, 'epoch': 0.16}


  4%|▍         | 50/1250 [04:12<1:36:52,  4.84s/it]

{'loss': 0.7015, 'grad_norm': 2.1808719635009766, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.2}


  5%|▍         | 60/1250 [05:01<1:36:42,  4.88s/it]

{'loss': 0.6952, 'grad_norm': 2.8957293033599854, 'learning_rate': 9.600000000000001e-06, 'epoch': 0.24}


  6%|▌         | 70/1250 [05:52<1:39:28,  5.06s/it]

{'loss': 0.695, 'grad_norm': 3.0075488090515137, 'learning_rate': 1.1200000000000001e-05, 'epoch': 0.28}


  6%|▋         | 80/1250 [06:41<1:37:27,  5.00s/it]

{'loss': 0.672, 'grad_norm': 2.8624203205108643, 'learning_rate': 1.2800000000000001e-05, 'epoch': 0.32}


  7%|▋         | 90/1250 [07:33<1:41:57,  5.27s/it]

{'loss': 0.7408, 'grad_norm': 2.480095148086548, 'learning_rate': 1.4400000000000001e-05, 'epoch': 0.36}


  8%|▊         | 100/1250 [08:25<1:38:29,  5.14s/it]

{'loss': 0.6519, 'grad_norm': 2.8095083236694336, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.4}


  9%|▉         | 110/1250 [09:19<1:39:11,  5.22s/it]

{'loss': 0.6495, 'grad_norm': 1.81438410282135, 'learning_rate': 1.76e-05, 'epoch': 0.44}


 10%|▉         | 120/1250 [10:09<1:35:30,  5.07s/it]

{'loss': 0.655, 'grad_norm': 2.4093048572540283, 'learning_rate': 1.9200000000000003e-05, 'epoch': 0.48}


 10%|█         | 130/1250 [11:00<1:35:32,  5.12s/it]

{'loss': 0.6342, 'grad_norm': 3.9277448654174805, 'learning_rate': 1.9911111111111112e-05, 'epoch': 0.52}


 11%|█         | 140/1250 [11:51<1:35:12,  5.15s/it]

{'loss': 0.6077, 'grad_norm': 3.6366124153137207, 'learning_rate': 1.9733333333333336e-05, 'epoch': 0.56}


 12%|█▏        | 150/1250 [12:44<1:39:00,  5.40s/it]

{'loss': 0.6032, 'grad_norm': 3.9959113597869873, 'learning_rate': 1.9555555555555557e-05, 'epoch': 0.6}


 13%|█▎        | 160/1250 [13:35<1:32:30,  5.09s/it]

{'loss': 0.5812, 'grad_norm': 3.452687978744507, 'learning_rate': 1.9377777777777778e-05, 'epoch': 0.64}


 14%|█▎        | 170/1250 [14:27<1:31:41,  5.09s/it]

{'loss': 0.555, 'grad_norm': 2.3758063316345215, 'learning_rate': 1.9200000000000003e-05, 'epoch': 0.68}


 14%|█▍        | 180/1250 [15:19<1:28:50,  4.98s/it]

{'loss': 0.5045, 'grad_norm': 4.4314351081848145, 'learning_rate': 1.9022222222222223e-05, 'epoch': 0.72}


 15%|█▌        | 190/1250 [16:04<1:18:40,  4.45s/it]

{'loss': 0.4874, 'grad_norm': 2.8382375240325928, 'learning_rate': 1.8844444444444444e-05, 'epoch': 0.76}


 16%|█▌        | 200/1250 [16:48<1:17:11,  4.41s/it]

{'loss': 0.4754, 'grad_norm': 3.9850876331329346, 'learning_rate': 1.866666666666667e-05, 'epoch': 0.8}


 17%|█▋        | 210/1250 [17:33<1:16:36,  4.42s/it]

{'loss': 0.4313, 'grad_norm': 4.927398204803467, 'learning_rate': 1.848888888888889e-05, 'epoch': 0.84}


 18%|█▊        | 220/1250 [18:21<1:23:32,  4.87s/it]

{'loss': 0.3992, 'grad_norm': 1.8841770887374878, 'learning_rate': 1.8311111111111114e-05, 'epoch': 0.88}


 18%|█▊        | 230/1250 [19:11<1:24:40,  4.98s/it]

{'loss': 0.3915, 'grad_norm': 2.003798723220825, 'learning_rate': 1.8133333333333335e-05, 'epoch': 0.92}


 19%|█▉        | 240/1250 [19:58<1:18:55,  4.69s/it]

{'loss': 0.3681, 'grad_norm': 2.098982095718384, 'learning_rate': 1.7955555555555556e-05, 'epoch': 0.96}


 20%|██        | 250/1250 [20:45<1:18:03,  4.68s/it]

{'loss': 0.3066, 'grad_norm': 2.0043017864227295, 'learning_rate': 1.7777777777777777e-05, 'epoch': 1.0}


 21%|██        | 260/1250 [21:32<1:18:32,  4.76s/it]

{'loss': 0.3024, 'grad_norm': 2.1909515857696533, 'learning_rate': 1.76e-05, 'epoch': 1.04}


 22%|██▏       | 270/1250 [22:19<1:15:26,  4.62s/it]

{'loss': 0.2854, 'grad_norm': 1.9224392175674438, 'learning_rate': 1.7422222222222222e-05, 'epoch': 1.08}


 22%|██▏       | 280/1250 [23:06<1:14:32,  4.61s/it]

{'loss': 0.2373, 'grad_norm': 1.5901676416397095, 'learning_rate': 1.7244444444444446e-05, 'epoch': 1.12}


 23%|██▎       | 290/1250 [23:53<1:16:01,  4.75s/it]

{'loss': 0.2093, 'grad_norm': 1.9504425525665283, 'learning_rate': 1.706666666666667e-05, 'epoch': 1.16}


 24%|██▍       | 300/1250 [24:40<1:13:48,  4.66s/it]

{'loss': 0.164, 'grad_norm': 1.3034923076629639, 'learning_rate': 1.688888888888889e-05, 'epoch': 1.2}


 25%|██▍       | 310/1250 [25:26<1:10:38,  4.51s/it]

{'loss': 0.1648, 'grad_norm': 1.6017906665802002, 'learning_rate': 1.6711111111111112e-05, 'epoch': 1.24}


 26%|██▌       | 320/1250 [26:12<1:12:02,  4.65s/it]

{'loss': 0.1347, 'grad_norm': 1.0816043615341187, 'learning_rate': 1.6533333333333333e-05, 'epoch': 1.28}


 26%|██▋       | 330/1250 [27:01<1:13:37,  4.80s/it]

{'loss': 0.1074, 'grad_norm': 1.0817192792892456, 'learning_rate': 1.6355555555555557e-05, 'epoch': 1.32}


 27%|██▋       | 340/1250 [27:49<1:12:16,  4.77s/it]

{'loss': 0.1028, 'grad_norm': 0.8549413084983826, 'learning_rate': 1.617777777777778e-05, 'epoch': 1.36}


 28%|██▊       | 350/1250 [28:35<1:06:49,  4.46s/it]

{'loss': 0.0813, 'grad_norm': 0.8436170816421509, 'learning_rate': 1.6000000000000003e-05, 'epoch': 1.4}


 29%|██▉       | 360/1250 [29:19<1:05:29,  4.42s/it]

{'loss': 0.0655, 'grad_norm': 3.921375036239624, 'learning_rate': 1.5822222222222224e-05, 'epoch': 1.44}


 30%|██▉       | 370/1250 [30:03<1:05:09,  4.44s/it]

{'loss': 0.0782, 'grad_norm': 1.3592687845230103, 'learning_rate': 1.5644444444444448e-05, 'epoch': 1.48}


 30%|███       | 380/1250 [30:48<1:04:16,  4.43s/it]

{'loss': 0.0545, 'grad_norm': 0.6756714582443237, 'learning_rate': 1.546666666666667e-05, 'epoch': 1.52}


 31%|███       | 390/1250 [31:32<1:03:16,  4.42s/it]

{'loss': 0.0433, 'grad_norm': 0.9854825735092163, 'learning_rate': 1.528888888888889e-05, 'epoch': 1.56}


 32%|███▏      | 400/1250 [32:16<1:00:43,  4.29s/it]

{'loss': 0.04, 'grad_norm': 0.5539579391479492, 'learning_rate': 1.5111111111111112e-05, 'epoch': 1.6}


 33%|███▎      | 410/1250 [33:00<1:00:20,  4.31s/it]

{'loss': 0.0318, 'grad_norm': 1.045880913734436, 'learning_rate': 1.4933333333333335e-05, 'epoch': 1.64}


 34%|███▎      | 420/1250 [33:43<59:19,  4.29s/it]  

{'loss': 0.0307, 'grad_norm': 0.2680871784687042, 'learning_rate': 1.4755555555555556e-05, 'epoch': 1.68}


 34%|███▍      | 430/1250 [34:26<58:46,  4.30s/it]

{'loss': 0.0343, 'grad_norm': 0.6090499758720398, 'learning_rate': 1.457777777777778e-05, 'epoch': 1.72}


 35%|███▌      | 440/1250 [35:09<58:13,  4.31s/it]

{'loss': 0.0236, 'grad_norm': 0.4275103509426117, 'learning_rate': 1.4400000000000001e-05, 'epoch': 1.76}


 36%|███▌      | 450/1250 [35:52<57:12,  4.29s/it]

{'loss': 0.0199, 'grad_norm': 0.33544912934303284, 'learning_rate': 1.4222222222222224e-05, 'epoch': 1.8}


 37%|███▋      | 460/1250 [36:35<56:33,  4.30s/it]

{'loss': 0.0319, 'grad_norm': 0.38476091623306274, 'learning_rate': 1.4044444444444445e-05, 'epoch': 1.84}


 38%|███▊      | 470/1250 [37:23<59:49,  4.60s/it]  

{'loss': 0.0164, 'grad_norm': 0.3413844704627991, 'learning_rate': 1.3866666666666669e-05, 'epoch': 1.88}


 38%|███▊      | 480/1250 [38:07<56:19,  4.39s/it]

{'loss': 0.0188, 'grad_norm': 0.21778716146945953, 'learning_rate': 1.368888888888889e-05, 'epoch': 1.92}


 39%|███▉      | 490/1250 [38:50<54:31,  4.30s/it]

{'loss': 0.0143, 'grad_norm': 0.5422659516334534, 'learning_rate': 1.3511111111111112e-05, 'epoch': 1.96}


 40%|████      | 500/1250 [39:33<53:22,  4.27s/it]

{'loss': 0.0119, 'grad_norm': 0.13796599209308624, 'learning_rate': 1.3333333333333333e-05, 'epoch': 2.0}


 41%|████      | 510/1250 [40:18<53:35,  4.35s/it]

{'loss': 0.0144, 'grad_norm': 0.27645283937454224, 'learning_rate': 1.3155555555555558e-05, 'epoch': 2.04}


 42%|████▏     | 520/1250 [41:02<55:00,  4.52s/it]

{'loss': 0.0111, 'grad_norm': 0.23461005091667175, 'learning_rate': 1.2977777777777779e-05, 'epoch': 2.08}


 42%|████▏     | 530/1250 [41:46<53:26,  4.45s/it]

{'loss': 0.0127, 'grad_norm': 0.40052467584609985, 'learning_rate': 1.2800000000000001e-05, 'epoch': 2.12}


 43%|████▎     | 540/1250 [42:31<52:38,  4.45s/it]

{'loss': 0.0117, 'grad_norm': 0.18863248825073242, 'learning_rate': 1.2622222222222222e-05, 'epoch': 2.16}


 44%|████▍     | 550/1250 [43:15<51:47,  4.44s/it]

{'loss': 0.0104, 'grad_norm': 0.1189626157283783, 'learning_rate': 1.2444444444444446e-05, 'epoch': 2.2}


 45%|████▍     | 560/1250 [44:00<51:20,  4.46s/it]

{'loss': 0.0089, 'grad_norm': 0.10219530761241913, 'learning_rate': 1.2266666666666667e-05, 'epoch': 2.24}


 46%|████▌     | 570/1250 [44:43<49:39,  4.38s/it]

{'loss': 0.0074, 'grad_norm': 0.22874921560287476, 'learning_rate': 1.208888888888889e-05, 'epoch': 2.28}


 46%|████▋     | 580/1250 [45:27<49:07,  4.40s/it]

{'loss': 0.0087, 'grad_norm': 0.1434171497821808, 'learning_rate': 1.191111111111111e-05, 'epoch': 2.32}


 47%|████▋     | 590/1250 [46:11<48:43,  4.43s/it]

{'loss': 0.0082, 'grad_norm': 1.1344538927078247, 'learning_rate': 1.1733333333333335e-05, 'epoch': 2.36}


 48%|████▊     | 600/1250 [46:55<47:50,  4.42s/it]

{'loss': 0.007, 'grad_norm': 0.3599469065666199, 'learning_rate': 1.1555555555555556e-05, 'epoch': 2.4}


 49%|████▉     | 610/1250 [47:41<47:00,  4.41s/it]

{'loss': 0.0093, 'grad_norm': 0.5003973245620728, 'learning_rate': 1.1377777777777779e-05, 'epoch': 2.44}


 50%|████▉     | 620/1250 [48:25<46:19,  4.41s/it]

{'loss': 0.0073, 'grad_norm': 0.1572941243648529, 'learning_rate': 1.1200000000000001e-05, 'epoch': 2.48}


 50%|█████     | 630/1250 [49:09<45:17,  4.38s/it]

{'loss': 0.0074, 'grad_norm': 0.4018140435218811, 'learning_rate': 1.1022222222222224e-05, 'epoch': 2.52}


 51%|█████     | 640/1250 [49:52<44:09,  4.34s/it]

{'loss': 0.0055, 'grad_norm': 0.10701844841241837, 'learning_rate': 1.0844444444444446e-05, 'epoch': 2.56}


 52%|█████▏    | 650/1250 [50:36<43:46,  4.38s/it]

{'loss': 0.0066, 'grad_norm': 0.15768612921237946, 'learning_rate': 1.0666666666666667e-05, 'epoch': 2.6}


 53%|█████▎    | 660/1250 [51:20<42:49,  4.35s/it]

{'loss': 0.0047, 'grad_norm': 0.09021932631731033, 'learning_rate': 1.048888888888889e-05, 'epoch': 2.64}


 54%|█████▎    | 670/1250 [52:04<42:29,  4.40s/it]

{'loss': 0.0057, 'grad_norm': 0.12290999293327332, 'learning_rate': 1.0311111111111113e-05, 'epoch': 2.68}


 54%|█████▍    | 680/1250 [52:48<41:53,  4.41s/it]

{'loss': 0.0047, 'grad_norm': 0.07561959326267242, 'learning_rate': 1.0133333333333335e-05, 'epoch': 2.72}


 55%|█████▌    | 690/1250 [53:32<40:50,  4.38s/it]

{'loss': 0.0051, 'grad_norm': 0.07654371112585068, 'learning_rate': 9.955555555555556e-06, 'epoch': 2.76}


 56%|█████▌    | 700/1250 [54:16<40:04,  4.37s/it]

{'loss': 0.0047, 'grad_norm': 0.07336466759443283, 'learning_rate': 9.777777777777779e-06, 'epoch': 2.8}


 57%|█████▋    | 710/1250 [55:01<39:32,  4.39s/it]

{'loss': 0.0047, 'grad_norm': 0.06595899909734726, 'learning_rate': 9.600000000000001e-06, 'epoch': 2.84}


 58%|█████▊    | 720/1250 [55:45<38:41,  4.38s/it]

{'loss': 0.0062, 'grad_norm': 1.932833194732666, 'learning_rate': 9.422222222222222e-06, 'epoch': 2.88}


 58%|█████▊    | 730/1250 [56:29<38:03,  4.39s/it]

{'loss': 0.0051, 'grad_norm': 0.0812935158610344, 'learning_rate': 9.244444444444445e-06, 'epoch': 2.92}


 59%|█████▉    | 740/1250 [57:13<37:25,  4.40s/it]

{'loss': 0.0062, 'grad_norm': 0.0735601931810379, 'learning_rate': 9.066666666666667e-06, 'epoch': 2.96}


 60%|██████    | 750/1250 [57:57<36:39,  4.40s/it]

{'loss': 0.0038, 'grad_norm': 0.07333066314458847, 'learning_rate': 8.888888888888888e-06, 'epoch': 3.0}


 61%|██████    | 760/1250 [58:41<35:51,  4.39s/it]

{'loss': 0.0072, 'grad_norm': 0.07890888303518295, 'learning_rate': 8.711111111111111e-06, 'epoch': 3.04}


 62%|██████▏   | 770/1250 [59:26<35:11,  4.40s/it]

{'loss': 0.0054, 'grad_norm': 0.07448534667491913, 'learning_rate': 8.533333333333335e-06, 'epoch': 3.08}


 62%|██████▏   | 780/1250 [1:00:10<34:50,  4.45s/it]

{'loss': 0.004, 'grad_norm': 0.06736911088228226, 'learning_rate': 8.355555555555556e-06, 'epoch': 3.12}


 63%|██████▎   | 790/1250 [1:00:54<33:57,  4.43s/it]

{'loss': 0.0042, 'grad_norm': 0.19422754645347595, 'learning_rate': 8.177777777777779e-06, 'epoch': 3.16}


 64%|██████▍   | 800/1250 [1:01:38<32:57,  4.40s/it]

{'loss': 0.0044, 'grad_norm': 0.08282898366451263, 'learning_rate': 8.000000000000001e-06, 'epoch': 3.2}


 65%|██████▍   | 810/1250 [1:02:23<32:15,  4.40s/it]

{'loss': 0.0042, 'grad_norm': 0.11339191347360611, 'learning_rate': 7.822222222222224e-06, 'epoch': 3.24}


 66%|██████▌   | 820/1250 [1:03:07<31:14,  4.36s/it]

{'loss': 0.0039, 'grad_norm': 0.057229071855545044, 'learning_rate': 7.644444444444445e-06, 'epoch': 3.28}


 66%|██████▋   | 830/1250 [1:03:51<30:34,  4.37s/it]

{'loss': 0.0038, 'grad_norm': 0.09096317738294601, 'learning_rate': 7.4666666666666675e-06, 'epoch': 3.32}


 67%|██████▋   | 840/1250 [1:04:35<30:08,  4.41s/it]

{'loss': 0.0044, 'grad_norm': 0.08236171305179596, 'learning_rate': 7.28888888888889e-06, 'epoch': 3.36}


 68%|██████▊   | 850/1250 [1:05:19<29:07,  4.37s/it]

{'loss': 0.0091, 'grad_norm': 0.09486071020364761, 'learning_rate': 7.111111111111112e-06, 'epoch': 3.4}


 69%|██████▉   | 860/1250 [1:06:04<28:38,  4.41s/it]

{'loss': 0.0038, 'grad_norm': 0.06199045851826668, 'learning_rate': 6.9333333333333344e-06, 'epoch': 3.44}


 70%|██████▉   | 870/1250 [1:06:48<28:06,  4.44s/it]

{'loss': 0.0037, 'grad_norm': 0.0689253956079483, 'learning_rate': 6.755555555555556e-06, 'epoch': 3.48}


 70%|███████   | 880/1250 [1:07:32<27:11,  4.41s/it]

{'loss': 0.0038, 'grad_norm': 0.04435558244585991, 'learning_rate': 6.577777777777779e-06, 'epoch': 3.52}


 71%|███████   | 890/1250 [1:08:16<26:11,  4.36s/it]

{'loss': 0.003, 'grad_norm': 0.052557572722435, 'learning_rate': 6.4000000000000006e-06, 'epoch': 3.56}


 72%|███████▏  | 900/1250 [1:09:00<25:30,  4.37s/it]

{'loss': 0.0032, 'grad_norm': 0.052447717636823654, 'learning_rate': 6.222222222222223e-06, 'epoch': 3.6}


 73%|███████▎  | 910/1250 [1:09:44<24:26,  4.31s/it]

{'loss': 0.0031, 'grad_norm': 0.053408022969961166, 'learning_rate': 6.044444444444445e-06, 'epoch': 3.64}


 74%|███████▎  | 920/1250 [1:10:28<23:55,  4.35s/it]

{'loss': 0.0039, 'grad_norm': 0.07898295670747757, 'learning_rate': 5.8666666666666675e-06, 'epoch': 3.68}


 74%|███████▍  | 930/1250 [1:11:11<23:13,  4.35s/it]

{'loss': 0.003, 'grad_norm': 0.06664544343948364, 'learning_rate': 5.688888888888889e-06, 'epoch': 3.72}


 75%|███████▌  | 940/1250 [1:11:56<22:57,  4.44s/it]

{'loss': 0.0029, 'grad_norm': 0.16480952501296997, 'learning_rate': 5.511111111111112e-06, 'epoch': 3.76}


 76%|███████▌  | 950/1250 [1:12:40<21:56,  4.39s/it]

{'loss': 0.0045, 'grad_norm': 0.1705099195241928, 'learning_rate': 5.333333333333334e-06, 'epoch': 3.8}


 77%|███████▋  | 960/1250 [1:13:24<21:02,  4.35s/it]

{'loss': 0.003, 'grad_norm': 0.11668325960636139, 'learning_rate': 5.155555555555556e-06, 'epoch': 3.84}


 78%|███████▊  | 970/1250 [1:14:07<20:20,  4.36s/it]

{'loss': 0.0034, 'grad_norm': 0.11756392568349838, 'learning_rate': 4.977777777777778e-06, 'epoch': 3.88}


 78%|███████▊  | 980/1250 [1:14:51<19:43,  4.38s/it]

{'loss': 0.0026, 'grad_norm': 0.054268401116132736, 'learning_rate': 4.800000000000001e-06, 'epoch': 3.92}


 79%|███████▉  | 990/1250 [1:15:35<18:50,  4.35s/it]

{'loss': 0.0032, 'grad_norm': 0.0723799616098404, 'learning_rate': 4.622222222222222e-06, 'epoch': 3.96}


 80%|████████  | 1000/1250 [1:16:19<18:22,  4.41s/it]

{'loss': 0.003, 'grad_norm': 0.05771502107381821, 'learning_rate': 4.444444444444444e-06, 'epoch': 4.0}


 81%|████████  | 1010/1250 [1:17:05<17:44,  4.43s/it]

{'loss': 0.0028, 'grad_norm': 0.05224691331386566, 'learning_rate': 4.266666666666668e-06, 'epoch': 4.04}


 82%|████████▏ | 1020/1250 [1:17:49<16:53,  4.41s/it]

{'loss': 0.0029, 'grad_norm': 0.05972812697291374, 'learning_rate': 4.088888888888889e-06, 'epoch': 4.08}


 82%|████████▏ | 1030/1250 [1:18:33<16:11,  4.42s/it]

{'loss': 0.0027, 'grad_norm': 0.051542334258556366, 'learning_rate': 3.911111111111112e-06, 'epoch': 4.12}


 83%|████████▎ | 1040/1250 [1:19:17<15:28,  4.42s/it]

{'loss': 0.0029, 'grad_norm': 0.0396014004945755, 'learning_rate': 3.7333333333333337e-06, 'epoch': 4.16}


 84%|████████▍ | 1050/1250 [1:20:02<14:44,  4.42s/it]

{'loss': 0.0028, 'grad_norm': 0.08541306853294373, 'learning_rate': 3.555555555555556e-06, 'epoch': 4.2}


 85%|████████▍ | 1060/1250 [1:20:46<14:07,  4.46s/it]

{'loss': 0.0028, 'grad_norm': 0.054861921817064285, 'learning_rate': 3.377777777777778e-06, 'epoch': 4.24}


 86%|████████▌ | 1070/1250 [1:21:31<13:16,  4.42s/it]

{'loss': 0.0026, 'grad_norm': 0.04861239343881607, 'learning_rate': 3.2000000000000003e-06, 'epoch': 4.28}


 86%|████████▋ | 1080/1250 [1:22:15<12:29,  4.41s/it]

{'loss': 0.0028, 'grad_norm': 0.05291736498475075, 'learning_rate': 3.0222222222222225e-06, 'epoch': 4.32}


 87%|████████▋ | 1090/1250 [1:23:00<11:47,  4.42s/it]

{'loss': 0.0038, 'grad_norm': 0.04374388977885246, 'learning_rate': 2.8444444444444446e-06, 'epoch': 4.36}


 88%|████████▊ | 1100/1250 [1:23:44<11:02,  4.42s/it]

{'loss': 0.003, 'grad_norm': 0.07500649243593216, 'learning_rate': 2.666666666666667e-06, 'epoch': 4.4}


 89%|████████▉ | 1110/1250 [1:24:29<10:14,  4.39s/it]

{'loss': 0.0025, 'grad_norm': 0.3636963367462158, 'learning_rate': 2.488888888888889e-06, 'epoch': 4.44}


 90%|████████▉ | 1120/1250 [1:25:13<09:33,  4.41s/it]

{'loss': 0.0053, 'grad_norm': 0.041582949459552765, 'learning_rate': 2.311111111111111e-06, 'epoch': 4.48}


 90%|█████████ | 1130/1250 [1:25:57<08:42,  4.35s/it]

{'loss': 0.0028, 'grad_norm': 0.045919597148895264, 'learning_rate': 2.133333333333334e-06, 'epoch': 4.52}


 91%|█████████ | 1140/1250 [1:26:41<08:01,  4.38s/it]

{'loss': 0.0027, 'grad_norm': 0.03611411154270172, 'learning_rate': 1.955555555555556e-06, 'epoch': 4.56}


 92%|█████████▏| 1150/1250 [1:27:25<07:22,  4.42s/it]

{'loss': 0.0026, 'grad_norm': 0.050768062472343445, 'learning_rate': 1.777777777777778e-06, 'epoch': 4.6}


 93%|█████████▎| 1160/1250 [1:28:10<06:37,  4.41s/it]

{'loss': 0.0032, 'grad_norm': 0.22135625779628754, 'learning_rate': 1.6000000000000001e-06, 'epoch': 4.64}


 94%|█████████▎| 1170/1250 [1:28:54<05:58,  4.48s/it]

{'loss': 0.0026, 'grad_norm': 0.05276409164071083, 'learning_rate': 1.4222222222222223e-06, 'epoch': 4.68}


 94%|█████████▍| 1180/1250 [1:29:39<05:06,  4.38s/it]

{'loss': 0.0025, 'grad_norm': 0.06415767967700958, 'learning_rate': 1.2444444444444445e-06, 'epoch': 4.72}


 95%|█████████▌| 1190/1250 [1:30:23<04:29,  4.49s/it]

{'loss': 0.0023, 'grad_norm': 0.05687741935253143, 'learning_rate': 1.066666666666667e-06, 'epoch': 4.76}


 96%|█████████▌| 1200/1250 [1:31:09<03:53,  4.68s/it]

{'loss': 0.0027, 'grad_norm': 0.058499425649642944, 'learning_rate': 8.88888888888889e-07, 'epoch': 4.8}


 97%|█████████▋| 1210/1250 [1:31:58<03:09,  4.73s/it]

{'loss': 0.0029, 'grad_norm': 0.05367050692439079, 'learning_rate': 7.111111111111112e-07, 'epoch': 4.84}


 98%|█████████▊| 1220/1250 [1:32:45<02:21,  4.73s/it]

{'loss': 0.0028, 'grad_norm': 0.03949056565761566, 'learning_rate': 5.333333333333335e-07, 'epoch': 4.88}


 98%|█████████▊| 1230/1250 [1:33:33<01:35,  4.76s/it]

{'loss': 0.0026, 'grad_norm': 0.048191510140895844, 'learning_rate': 3.555555555555556e-07, 'epoch': 4.92}


 99%|█████████▉| 1240/1250 [1:34:21<00:48,  4.80s/it]

{'loss': 0.0026, 'grad_norm': 0.062128663063049316, 'learning_rate': 1.777777777777778e-07, 'epoch': 4.96}


100%|██████████| 1250/1250 [1:35:08<00:00,  4.57s/it]

{'loss': 0.0027, 'grad_norm': 0.059130970388650894, 'learning_rate': 0.0, 'epoch': 5.0}
{'train_runtime': 5708.8426, 'train_samples_per_second': 3.503, 'train_steps_per_second': 0.219, 'train_loss': 0.13890601873099803, 'epoch': 5.0}





TrainOutput(global_step=1250, training_loss=0.13890601873099803, metrics={'train_runtime': 5708.8426, 'train_samples_per_second': 3.503, 'train_steps_per_second': 0.219, 'train_loss': 0.13890601873099803, 'epoch': 5.0})

In [5]:
# ✅ 11. 성능 평가 및 저장
eval_result = trainer.evaluate()
print("\\n📊 평가 지표:")
for key, value in eval_result.items():
    print(f"{key}: {value:.4f}")

trainer.model.save_pretrained("./KO_results")
tokenizer.save_pretrained("./KO_results")

100%|██████████| 250/250 [01:46<00:00,  2.34it/s]


\n📊 평가 지표:
eval_loss: 0.0016
eval_accuracy: 1.0000
eval_f1: 1.0000
eval_precision: 1.0000
eval_recall: 1.0000
eval_runtime: 107.5325
eval_samples_per_second: 9.3000
eval_steps_per_second: 2.3250
epoch: 5.0000




('./KO_results/tokenizer_config.json',
 './KO_results/special_tokens_map.json',
 './KO_results/vocab.txt',
 './KO_results/added_tokens.json',
 './KO_results/tokenizer.json')