In [1]:
# KoBERT 및 필요한 라이브러리 설치
!pip install torch torchvision torchaudio
!pip install transformers
!pip install kobert-transformers
!pip install sentencepiece
!pip install google-generativeai
!pip install psutil
!pip install GPUtil

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import json
import time
import psutil
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import gc
from typing import Dict, List, Tuple

print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA version:", torch.version.cuda)
    print("GPU:", torch.cuda.get_device_name(0))

Collecting kobert-transformers
  Downloading kobert_transformers-0.6.0-py3-none-any.whl.metadata (7.3 kB)
Downloading kobert_transformers-0.6.0-py3-none-any.whl (12 kB)
Installing collected packages: kobert-transformers
Successfully installed kobert-transformers-0.6.0
Collecting GPUtil
  Downloading GPUtil-1.4.0.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: GPUtil
  Building wheel for GPUtil (setup.py) ... [?25l[?25hdone
  Created wheel for GPUtil: filename=GPUtil-1.4.0-py3-none-any.whl size=7392 sha256=a6217ed49dc03e07edb104da28d796b6de82c1357f2609fbe2bcef32497784f1
  Stored in directory: /root/.cache/pip/wheels/92/a8/b7/d8a067c31a74de9ca252bbe53dea5f896faabd25d55f541037
Successfully built GPUtil
Installing collected packages: GPUtil
Successfully installed GPUtil-1.4.0
PyTorch version: 2.8.0+cu126
CUDA available: True
CUDA version: 12.6
GPU: Tesla T4


In [2]:
from google.colab import files
import io

# 파일 업로드
print("CSV 파일들을 업로드해주세요...")
uploaded = files.upload()

# 업로드된 파일 확인
print("\n업로드된 파일:")
for filename in uploaded.keys():
    print(f"  - {filename}")

CSV 파일들을 업로드해주세요...


Saving normal_gemini.csv to normal_gemini.csv
Saving normal_gpt.csv to normal_gpt.csv
Saving open_source_combined.csv to open_source_combined.csv
Saving specific_gemini.csv to specific_gemini.csv
Saving specific_gpt.csv to specific_gpt.csv

업로드된 파일:
  - normal_gemini.csv
  - normal_gpt.csv
  - open_source_combined.csv
  - specific_gemini.csv
  - specific_gpt.csv


In [3]:
# 데이터 로드 및 전처리
def load_and_preprocess_data():
    """
    5개의 CSV 파일을 로드하고 전처리
    loss weight를 각 샘플에 할당
    """
    data_files = {
        'normal_gemini.csv': 'gemini',
        'normal_gpt.csv': 'gpt',
        'specific_gemini.csv': 'gemini',
        'specific_gpt.csv': 'gpt',
        'open_source_combined.csv': 'opensource'
    }

    # weight 매핑
    weight_map = {
        'opensource': 1.3,
        'gemini': 1.3,
        'gpt': 0.6
    }

    all_data = []
    stats = {}

    for filename, source in data_files.items():
        if filename in uploaded.keys():
            # CSV 로드
            df = pd.read_csv(io.BytesIO(uploaded[filename]))

            # 필수 컬럼 확인
            if 'dialogue' not in df.columns or 'label' not in df.columns:
                print(f"⚠️  {filename}: 필수 컬럼(dialogue, label)이 없습니다.")
                continue

            # 결측치 제거
            before_len = len(df)
            df = df.dropna(subset=['dialogue', 'label'])
            after_len = len(df)

            # weight 할당
            df['weight'] = weight_map[source]
            df['source'] = source

            all_data.append(df)

            # 통계
            stats[filename] = {
                'total': before_len,
                'after_cleaning': after_len,
                'removed': before_len - after_len,
                'label_0': (df['label'] == 0).sum(),
                'label_1': (df['label'] == 1).sum(),
                'weight': weight_map[source]
            }

            print(f"✓ {filename}")
            print(f"   원본: {before_len}개 → 정제 후: {after_len}개 (제거: {before_len - after_len}개)")
            print(f"   Label 0 (정상): {stats[filename]['label_0']}개")
            print(f"   Label 1 (피싱): {stats[filename]['label_1']}개")
            print(f"   Loss Weight: {weight_map[source]}")
            print()

    # 전체 데이터 합치기
    combined_df = pd.concat(all_data, ignore_index=True)

    print("="*60)
    print(f"📊 전체 데이터 통계")
    print(f"   총 샘플 수: {len(combined_df)}개")
    print(f"   정상 대화 (0): {(combined_df['label'] == 0).sum()}개")
    print(f"   피싱 대화 (1): {(combined_df['label'] == 1).sum()}개")
    print(f"   소스별 분포:")
    for source in ['opensource', 'gemini', 'gpt']:
        count = (combined_df['source'] == source).sum()
        print(f"      {source}: {count}개 (weight: {weight_map[source]})")
    print("="*60)

    return combined_df, stats

# 데이터 로드
df_combined, data_stats = load_and_preprocess_data()

# 샘플 데이터 확인
print("\n샘플 데이터 (첫 3개):")
print(df_combined.head(3))

✓ normal_gemini.csv
   원본: 1615개 → 정제 후: 1615개 (제거: 0개)
   Label 0 (정상): 1058개
   Label 1 (피싱): 557개
   Loss Weight: 1.3

✓ normal_gpt.csv
   원본: 1599개 → 정제 후: 1598개 (제거: 1개)
   Label 0 (정상): 1057개
   Label 1 (피싱): 541개
   Loss Weight: 0.6

✓ specific_gemini.csv
   원본: 1615개 → 정제 후: 1615개 (제거: 0개)
   Label 0 (정상): 1115개
   Label 1 (피싱): 500개
   Loss Weight: 1.3

✓ specific_gpt.csv
   원본: 1498개 → 정제 후: 1497개 (제거: 1개)
   Label 0 (정상): 998개
   Label 1 (피싱): 499개
   Loss Weight: 0.6

✓ open_source_combined.csv
   원본: 4978개 → 정제 후: 4978개 (제거: 0개)
   Label 0 (정상): 2828개
   Label 1 (피싱): 2150개
   Loss Weight: 1.3

📊 전체 데이터 통계
   총 샘플 수: 11303개
   정상 대화 (0): 7056개
   피싱 대화 (1): 4247개
   소스별 분포:
      opensource: 4978개 (weight: 1.3)
      gemini: 3230개 (weight: 1.3)
      gpt: 3095개 (weight: 0.6)

샘플 데이터 (첫 3개):
                                            dialogue  label  weight  source
0  아예 신경화 고객님이죠.\n네.\n고객님 네이버 꼬마 손님 주소를 다시 바꿨습니다....    1.0     1.3  gemini
1  여행 좋아하신다고 하셨는데 여행 다니시면서 가장 맛있던

In [4]:
# Train/Test 분할 (stratified split)
from sklearn.model_selection import train_test_split

# stratified split으로 label 비율 유지
train_df, test_df = train_test_split(
    df_combined,
    test_size=0.2,
    random_state=42,
    stratify=df_combined['label']
)

print("📊 데이터 분할 완료")
print(f"   Train: {len(train_df)}개 (Label 0: {(train_df['label']==0).sum()}, Label 1: {(train_df['label']==1).sum()})")
print(f"   Test:  {len(test_df)}개 (Label 0: {(test_df['label']==0).sum()}, Label 1: {(test_df['label']==1).sum()})")

# PyTorch Dataset 클래스
class VoicePhishingDataset(Dataset):
    """보이스피싱 탐지 데이터셋"""

    def __init__(self, dataframe, tokenizer=None, max_length=128):
        """
        Args:
            dataframe: dialogue, label, weight 컬럼을 가진 DataFrame
            tokenizer: BERT tokenizer (None이면 LSTM용)
            max_length: 최대 시퀀스 길이
        """
        self.data = dataframe.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        text = str(row['dialogue'])
        label = int(row['label'])
        weight = float(row['weight'])

        if self.tokenizer is not None:
            # BERT용 토크나이징
            encoding = self.tokenizer(
                text,
                add_special_tokens=True,
                max_length=self.max_length,
                padding='max_length',
                truncation=True,
                return_attention_mask=True,
                return_tensors='pt'
            )

            return {
                'input_ids': encoding['input_ids'].flatten(),
                'attention_mask': encoding['attention_mask'].flatten(),
                'label': torch.tensor(label, dtype=torch.long),
                'weight': torch.tensor(weight, dtype=torch.float)
            }
        else:
            # LSTM용 - 텍스트 그대로 반환 (나중에 vocabulary로 변환)
            return {
                'text': text,
                'label': torch.tensor(label, dtype=torch.long),
                'weight': torch.tensor(weight, dtype=torch.float)
            }

print("\n✓ Dataset 클래스 정의 완료")

📊 데이터 분할 완료
   Train: 9042개 (Label 0: 5645, Label 1: 3397)
   Test:  2261개 (Label 0: 1411, Label 1: 850)

✓ Dataset 클래스 정의 완료


In [5]:
from kobert_transformers import get_tokenizer

# KoBERT 토크나이저 로드
print("KoBERT 토크나이저 로딩 중...")
kobert_tokenizer = get_tokenizer()
print("✓ KoBERT 토크나이저 로드 완료")

# 샘플 토크나이징 테스트
sample_text = train_df.iloc[0]['dialogue']
tokens = kobert_tokenizer(
    sample_text,
    max_length=128,
    padding='max_length',
    truncation=True,
    return_tensors='pt'
)

print(f"\n샘플 텍스트 길이: {len(sample_text)} characters")
print(f"토큰 수: {(tokens['attention_mask'][0] == 1).sum().item()} tokens")
print(f"Input IDs shape: {tokens['input_ids'].shape}")

KoBERT 토크나이저 로딩 중...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/263 [00:00<?, ?B/s]

tokenizer_78b3253a26.model:   0%|          | 0.00/371k [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

✓ KoBERT 토크나이저 로드 완료

샘플 텍스트 길이: 209 characters
토큰 수: 112 tokens
Input IDs shape: torch.Size([1, 128])


In [6]:
from transformers import ElectraTokenizer, ElectraModel

# 한국어 ELECTRA-Small 로드
print("경량 LM 모델(ELECTRA-Small) 로딩 중...")

electra_model_name = "monologg/koelectra-small-v3-discriminator"
electra_tokenizer = ElectraTokenizer.from_pretrained(electra_model_name)

print("✓ ELECTRA-Small 토크나이저 로드 완료")

# 모델 크기 비교를 위한 정보
print("\n📊 사전 학습 모델 크기 비교:")
print("   - ELECTRA-Small: ~50MB")
print("   - KoBERT: ~350MB")
print("   - LSTM: ~10-50MB (embedding 크기에 따라)")

경량 LM 모델(ELECTRA-Small) 로딩 중...


tokenizer_config.json:   0%|          | 0.00/61.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/458 [00:00<?, ?B/s]

✓ ELECTRA-Small 토크나이저 로드 완료

📊 사전 학습 모델 크기 비교:
   - ELECTRA-Small: ~50MB
   - KoBERT: ~350MB
   - LSTM: ~10-50MB (embedding 크기에 따라)


In [7]:
# 각 모델별 Dataset 생성
print("Dataset 생성 중...\n")

# 1. KoBERT용
train_dataset_kobert = VoicePhishingDataset(
    train_df,
    tokenizer=kobert_tokenizer,
    max_length=128
)
test_dataset_kobert = VoicePhishingDataset(
    test_df,
    tokenizer=kobert_tokenizer,
    max_length=128
)
print(f"✓ KoBERT Dataset: Train {len(train_dataset_kobert)}, Test {len(test_dataset_kobert)}")

# 2. ELECTRA용
train_dataset_electra = VoicePhishingDataset(
    train_df,
    tokenizer=electra_tokenizer,
    max_length=128
)
test_dataset_electra = VoicePhishingDataset(
    test_df,
    tokenizer=electra_tokenizer,
    max_length=128
)
print(f"✓ ELECTRA Dataset: Train {len(train_dataset_electra)}, Test {len(test_dataset_electra)}")

# 3. LSTM용 (토크나이저 없음)
train_dataset_lstm = VoicePhishingDataset(
    train_df,
    tokenizer=None,  # LSTM은 자체 vocabulary 사용
    max_length=128
)
test_dataset_lstm = VoicePhishingDataset(
    test_df,
    tokenizer=None,
    max_length=128
)
print(f"✓ LSTM Dataset: Train {len(train_dataset_lstm)}, Test {len(test_dataset_lstm)}")

Dataset 생성 중...

✓ KoBERT Dataset: Train 9042, Test 2261
✓ ELECTRA Dataset: Train 9042, Test 2261
✓ LSTM Dataset: Train 9042, Test 2261


In [8]:
# DistilKoBERT는 공식 배포가 없어서, 대안으로 진행합니다
# 옵션 1: DistilBERT multilingual (한국어 지원)
# 옵션 2: KoBERT를 직접 distillation (시간 많이 소요)

print("경량화 모델 체크 중...\n")

# DistilBERT multilingual 사용 (한국어 포함)
from transformers import DistilBertTokenizer, DistilBertModel

distilbert_model_name = "distilbert-base-multilingual-cased"
distilbert_tokenizer = DistilBertTokenizer.from_pretrained(distilbert_model_name)

print("✓ DistilBERT-Multilingual 로드 완료 (~270MB)")

# Dataset 생성
train_dataset_distilbert = VoicePhishingDataset(
    train_df,
    tokenizer=distilbert_tokenizer,
    max_length=128
)
test_dataset_distilbert = VoicePhishingDataset(
    test_df,
    tokenizer=distilbert_tokenizer,
    max_length=128
)
print(f"✓ DistilBERT Dataset: Train {len(train_dataset_distilbert)}, Test {len(test_dataset_distilbert)}")

print("\n📊 최종 비교 모델:")
print("   1. LSTM (~10-50MB)")
print("   2. ELECTRA-Small (~50MB)")
print("   3. DistilBERT-Multilingual (~270MB)")
print("   4. KoBERT (~350MB)")
print("   5. Gemini API (서버 통신)")

경량화 모델 체크 중...



tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/466 [00:00<?, ?B/s]

✓ DistilBERT-Multilingual 로드 완료 (~270MB)
✓ DistilBERT Dataset: Train 9042, Test 2261

📊 최종 비교 모델:
   1. LSTM (~10-50MB)
   2. ELECTRA-Small (~50MB)
   3. DistilBERT-Multilingual (~270MB)
   4. KoBERT (~350MB)
   5. Gemini API (서버 통신)


In [9]:
import os
import psutil
import gc

class ResourceMonitor:
    """리소스 사용량 모니터링"""

    @staticmethod
    def get_model_size(model, name="Model"):
        """모델 파일 크기 측정 (MB)"""
        # 임시로 저장해서 크기 측정
        temp_path = f"temp_{name}.pt"
        torch.save(model.state_dict(), temp_path)
        size_mb = os.path.getsize(temp_path) / (1024 * 1024)
        os.remove(temp_path)
        return size_mb

    @staticmethod
    def get_memory_usage():
        """현재 메모리 사용량 (MB)"""
        process = psutil.Process(os.getpid())
        return process.memory_info().rss / (1024 * 1024)

    @staticmethod
    def get_gpu_memory():
        """GPU 메모리 사용량 (MB)"""
        if torch.cuda.is_available():
            return torch.cuda.memory_allocated() / (1024 * 1024)
        return 0

    @staticmethod
    def clear_memory():
        """메모리 정리"""
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

print("✓ ResourceMonitor 클래스 정의 완료")

✓ ResourceMonitor 클래스 정의 완료


In [10]:
from kobert_transformers import get_kobert_model

class KoBERTClassifier(nn.Module):
    """KoBERT 기반 보이스피싱 분류 모델"""

    def __init__(self, num_classes=2, dropout=0.1):
        super(KoBERTClassifier, self).__init__()

        # KoBERT 모델 로드
        self.kobert = get_kobert_model()
        self.dropout = nn.Dropout(dropout)

        # Classification head
        self.classifier = nn.Linear(768, num_classes)  # BERT hidden size = 768

    def forward(self, input_ids, attention_mask):
        # KoBERT encoding
        outputs = self.kobert(
            input_ids=input_ids,
            attention_mask=attention_mask
        )

        # [CLS] token의 hidden state 사용
        pooled_output = outputs[1]  # pooler_output
        pooled_output = self.dropout(pooled_output)

        # Classification
        logits = self.classifier(pooled_output)
        return logits

# 모델 초기화
print("KoBERT 모델 초기화 중...")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model_kobert = KoBERTClassifier(num_classes=2, dropout=0.1).to(device)

# 모델 크기 측정
model_size = ResourceMonitor.get_model_size(model_kobert, "kobert")
param_count = sum(p.numel() for p in model_kobert.parameters())
trainable_params = sum(p.numel() for p in model_kobert.parameters() if p.requires_grad)

print(f"\n{'='*60}")
print(f"📊 KoBERT 모델 정보")
print(f"{'='*60}")
print(f"   모델 크기: {model_size:.2f} MB")
print(f"   총 파라미터: {param_count:,}")
print(f"   학습 가능 파라미터: {trainable_params:,}")
print(f"   Device: {device}")
print(f"{'='*60}")

KoBERT 모델 초기화 중...


config.json:   0%|          | 0.00/426 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/369M [00:00<?, ?B/s]


📊 KoBERT 모델 정보
   모델 크기: 351.75 MB
   총 파라미터: 92,188,418
   학습 가능 파라미터: 92,188,418
   Device: cuda


In [13]:
from tqdm import tqdm

def train_model(model, train_loader, criterion, optimizer, device, epoch):
    """1 epoch 학습"""
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch}")

    for batch in progress_bar:
        # 데이터 준비
        if 'input_ids' in batch:  # BERT 계열
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)
            weights = batch['weight'].to(device)

            # Forward pass
            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask)
        else:  # LSTM
            inputs = batch['inputs'].to(device)
            labels = batch['label'].to(device)
            weights = batch['weight'].to(device)

            optimizer.zero_grad()
            outputs = model(inputs)

        # Loss 계산 (weighted)
        loss = criterion(outputs, labels)
        loss = (loss * weights).mean()  # weight 적용

        # Backward pass
        loss.backward()
        optimizer.step()

        # 통계
        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Progress bar 업데이트
        progress_bar.set_postfix({
            'loss': f'{loss.item():.4f}',
            'acc': f'{100.*correct/total:.2f}%'
        })

    avg_loss = total_loss / len(train_loader)
    accuracy = 100. * correct / total

    return avg_loss, accuracy

def evaluate_model(model, test_loader, criterion, device):
    """모델 평가"""
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    # 메모리 측정
    mem_before = ResourceMonitor.get_gpu_memory()

    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Evaluating"):
            # 데이터 준비
            if 'input_ids' in batch:  # BERT 계열
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['label'].to(device)

                outputs = model(input_ids, attention_mask)
            else:  # LSTM
                inputs = batch['inputs'].to(device)
                labels = batch['label'].to(device)

                outputs = model(inputs)

            loss = criterion(outputs, labels)

            total_loss += loss.mean().item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    mem_after = ResourceMonitor.get_gpu_memory()

    avg_loss = total_loss / len(test_loader)
    accuracy = 100. * correct / total

    # 정밀도, 재현율, F1
    precision, recall, f1, _ = precision_recall_fscore_support(
        all_labels, all_preds, average='binary'
    )

    return {
        'loss': avg_loss,
        'accuracy': accuracy,
        'precision': precision * 100,
        'recall': recall * 100,
        'f1': f1 * 100,
        'memory_usage': mem_after - mem_before
    }

print("✓ 학습/평가 함수 정의 완료")

✓ 학습/평가 함수 정의 완료


In [14]:
from torch.utils.data import DataLoader

# 하이퍼파라미터 설정
BATCH_SIZE = 16
LEARNING_RATE = 2e-5
NUM_EPOCHS = 3
WEIGHT_DECAY = 0.01

# DataLoader 생성
train_loader_kobert = DataLoader(
    train_dataset_kobert,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2
)

test_loader_kobert = DataLoader(
    test_dataset_kobert,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2
)

# Loss function (reduction='none'으로 개별 weight 적용 가능)
criterion = nn.CrossEntropyLoss(reduction='none')

# Optimizer
optimizer = optim.AdamW(
    model_kobert.parameters(),
    lr=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY
)

print(f"{'='*60}")
print(f"🚀 KoBERT 학습 시작")
print(f"{'='*60}")
print(f"   Batch Size: {BATCH_SIZE}")
print(f"   Learning Rate: {LEARNING_RATE}")
print(f"   Epochs: {NUM_EPOCHS}")
print(f"   Train Batches: {len(train_loader_kobert)}")
print(f"   Test Batches: {len(test_loader_kobert)}")
print(f"{'='*60}\n")

# 학습 시작
kobert_results = {
    'train_loss': [],
    'train_acc': [],
    'test_metrics': []
}

start_time = time.time()

for epoch in range(1, NUM_EPOCHS + 1):
    print(f"\n{'='*60}")
    print(f"Epoch {epoch}/{NUM_EPOCHS}")
    print(f"{'='*60}")

    # 학습
    train_loss, train_acc = train_model(
        model_kobert,
        train_loader_kobert,
        criterion,
        optimizer,
        device,
        epoch
    )

    kobert_results['train_loss'].append(train_loss)
    kobert_results['train_acc'].append(train_acc)

    print(f"\n   Train Loss: {train_loss:.4f}")
    print(f"   Train Accuracy: {train_acc:.2f}%")

    # 평가
    print(f"\n   테스트 세트 평가 중...")
    test_metrics = evaluate_model(
        model_kobert,
        test_loader_kobert,
        criterion,
        device
    )

    kobert_results['test_metrics'].append(test_metrics)

    print(f"\n   📊 Test Results:")
    print(f"      Loss: {test_metrics['loss']:.4f}")
    print(f"      Accuracy: {test_metrics['accuracy']:.2f}%")
    print(f"      Precision: {test_metrics['precision']:.2f}%")
    print(f"      Recall: {test_metrics['recall']:.2f}%")
    print(f"      F1 Score: {test_metrics['f1']:.2f}%")
    print(f"      GPU Memory: {test_metrics['memory_usage']:.2f} MB")

training_time = time.time() - start_time

print(f"\n{'='*60}")
print(f"✅ KoBERT 학습 완료!")
print(f"{'='*60}")
print(f"   총 학습 시간: {training_time:.2f}초 ({training_time/60:.2f}분)")
print(f"{'='*60}\n")

🚀 KoBERT 학습 시작
   Batch Size: 16
   Learning Rate: 2e-05
   Epochs: 3
   Train Batches: 566
   Test Batches: 142


Epoch 1/3


Epoch 1: 100%|██████████| 566/566 [03:18<00:00,  2.86it/s, loss=0.0035, acc=96.87%]



   Train Loss: 0.1017
   Train Accuracy: 96.87%

   테스트 세트 평가 중...


Evaluating: 100%|██████████| 142/142 [00:16<00:00,  8.39it/s]



   📊 Test Results:
      Loss: 0.0138
      Accuracy: 99.60%
      Precision: 99.76%
      Recall: 99.18%
      F1 Score: 99.47%
      GPU Memory: 0.01 MB

Epoch 2/3


Epoch 2: 100%|██████████| 566/566 [03:18<00:00,  2.85it/s, loss=0.0015, acc=99.18%]



   Train Loss: 0.0313
   Train Accuracy: 99.18%

   테스트 세트 평가 중...


Evaluating: 100%|██████████| 142/142 [00:17<00:00,  8.28it/s]



   📊 Test Results:
      Loss: 0.0531
      Accuracy: 98.41%
      Precision: 100.00%
      Recall: 95.76%
      F1 Score: 97.84%
      GPU Memory: 0.01 MB

Epoch 3/3


Epoch 3: 100%|██████████| 566/566 [03:18<00:00,  2.85it/s, loss=0.0008, acc=99.69%]



   Train Loss: 0.0121
   Train Accuracy: 99.69%

   테스트 세트 평가 중...


Evaluating: 100%|██████████| 142/142 [00:16<00:00,  8.42it/s]


   📊 Test Results:
      Loss: 0.0242
      Accuracy: 99.38%
      Precision: 98.38%
      Recall: 100.00%
      F1 Score: 99.18%
      GPU Memory: 0.01 MB

✅ KoBERT 학습 완료!
   총 학습 시간: 646.09초 (10.77분)






In [15]:
import time

def measure_inference_performance(model, test_loader, device, num_samples=100):
    """추론 성능 측정"""
    model.eval()

    # 메모리 정리
    ResourceMonitor.clear_memory()

    inference_times = []
    mem_usages = []

    with torch.no_grad():
        for i, batch in enumerate(test_loader):
            if i >= num_samples // BATCH_SIZE:
                break

            # 메모리 측정 (전)
            mem_before = ResourceMonitor.get_gpu_memory()

            # 추론 시간 측정
            start_time = time.time()

            if 'input_ids' in batch:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                outputs = model(input_ids, attention_mask)
            else:
                inputs = batch['inputs'].to(device)
                outputs = model(inputs)

            # GPU 연산 완료 대기
            if torch.cuda.is_available():
                torch.cuda.synchronize()

            inference_time = (time.time() - start_time) * 1000  # ms

            # 메모리 측정 (후)
            mem_after = ResourceMonitor.get_gpu_memory()

            inference_times.append(inference_time)
            mem_usages.append(mem_after - mem_before)

    # 샘플당 평균
    avg_time_per_batch = np.mean(inference_times)
    avg_time_per_sample = avg_time_per_batch / BATCH_SIZE
    avg_memory = np.mean(mem_usages)

    return {
        'avg_latency_ms': avg_time_per_sample,
        'avg_memory_mb': avg_memory,
        'throughput': 1000 / avg_time_per_sample  # samples/sec
    }

# KoBERT 추론 성능 측정
print("🔍 KoBERT 추론 성능 측정 중...\n")

kobert_inference = measure_inference_performance(
    model_kobert,
    test_loader_kobert,
    device,
    num_samples=100
)

print(f"{'='*60}")
print(f"📊 KoBERT 최종 성능 리포트")
print(f"{'='*60}")
print(f"🎯 정확도 지표 (Best Epoch):")
best_epoch_idx = np.argmax([m['accuracy'] for m in kobert_results['test_metrics']])
best_metrics = kobert_results['test_metrics'][best_epoch_idx]
print(f"   Accuracy: {best_metrics['accuracy']:.2f}%")
print(f"   Precision: {best_metrics['precision']:.2f}%")
print(f"   Recall: {best_metrics['recall']:.2f}%")
print(f"   F1 Score: {best_metrics['f1']:.2f}%")
print(f"\n💾 모델 크기:")
print(f"   파일 크기: {model_size:.2f} MB")
print(f"   파라미터 수: {param_count:,}")
print(f"\n⚡ 추론 성능:")
print(f"   평균 Latency: {kobert_inference['avg_latency_ms']:.2f} ms/sample")
print(f"   처리량: {kobert_inference['throughput']:.2f} samples/sec")
print(f"   추론 메모리: {kobert_inference['avg_memory_mb']:.2f} MB")
print(f"\n⏱️ 학습 시간:")
print(f"   총 시간: {training_time:.2f}초 ({training_time/60:.2f}분)")
print(f"   Epoch당 평균: {training_time/NUM_EPOCHS:.2f}초")
print(f"{'='*60}\n")

# 결과 저장
kobert_final_results = {
    'model_name': 'KoBERT',
    'model_size_mb': model_size,
    'parameters': param_count,
    'best_accuracy': best_metrics['accuracy'],
    'best_f1': best_metrics['f1'],
    'inference_latency_ms': kobert_inference['avg_latency_ms'],
    'inference_memory_mb': kobert_inference['avg_memory_mb'],
    'training_time_sec': training_time,
    'device': 'on-device' if model_size < 100 else 'on-device (large)'
}

print("✅ KoBERT 측정 완료!")
print("\n다음: DistilBERT (경량화 모델) 학습 시작")

🔍 KoBERT 추론 성능 측정 중...

📊 KoBERT 최종 성능 리포트
🎯 정확도 지표 (Best Epoch):
   Accuracy: 99.60%
   Precision: 99.76%
   Recall: 99.18%
   F1 Score: 99.47%

💾 모델 크기:
   파일 크기: 351.75 MB
   파라미터 수: 92,188,418

⚡ 추론 성능:
   평균 Latency: 6.64 ms/sample
   처리량: 150.62 samples/sec
   추론 메모리: 0.01 MB

⏱️ 학습 시간:
   총 시간: 646.09초 (10.77분)
   Epoch당 평균: 215.36초

✅ KoBERT 측정 완료!

다음: DistilBERT (경량화 모델) 학습 시작


In [16]:
from transformers import DistilBertModel

class DistilBERTClassifier(nn.Module):
    """DistilBERT 기반 보이스피싱 분류 모델"""

    def __init__(self, num_classes=2, dropout=0.1):
        super(DistilBERTClassifier, self).__init__()

        # DistilBERT 모델 로드
        self.distilbert = DistilBertModel.from_pretrained(
            "distilbert-base-multilingual-cased"
        )
        self.dropout = nn.Dropout(dropout)
        self.classifier = nn.Linear(768, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.distilbert(
            input_ids=input_ids,
            attention_mask=attention_mask
        )

        # [CLS] token 사용
        pooled_output = outputs.last_hidden_state[:, 0]
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits

# 모델 초기화
print("🚀 DistilBERT 모델 학습 시작\n")
ResourceMonitor.clear_memory()

model_distilbert = DistilBERTClassifier(num_classes=2, dropout=0.1).to(device)
distilbert_size = ResourceMonitor.get_model_size(model_distilbert, "distilbert")
distilbert_params = sum(p.numel() for p in model_distilbert.parameters())

print(f"📊 DistilBERT: {distilbert_size:.2f} MB, {distilbert_params:,} params\n")

# DataLoader
train_loader_distilbert = DataLoader(train_dataset_distilbert, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
test_loader_distilbert = DataLoader(test_dataset_distilbert, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# Optimizer
optimizer_distilbert = optim.AdamW(model_distilbert.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

# 학습
distilbert_results = {'train_loss': [], 'train_acc': [], 'test_metrics': []}
start_time = time.time()

for epoch in range(1, NUM_EPOCHS + 1):
    print(f"Epoch {epoch}/{NUM_EPOCHS}")
    train_loss, train_acc = train_model(model_distilbert, train_loader_distilbert, criterion, optimizer_distilbert, device, epoch)
    distilbert_results['train_loss'].append(train_loss)
    distilbert_results['train_acc'].append(train_acc)

    test_metrics = evaluate_model(model_distilbert, test_loader_distilbert, criterion, device)
    distilbert_results['test_metrics'].append(test_metrics)
    print(f"   Acc: {test_metrics['accuracy']:.2f}% | F1: {test_metrics['f1']:.2f}%\n")

distilbert_training_time = time.time() - start_time

# 추론 성능 측정
distilbert_inference = measure_inference_performance(model_distilbert, test_loader_distilbert, device, num_samples=100)

distilbert_best_idx = np.argmax([m['accuracy'] for m in distilbert_results['test_metrics']])
distilbert_best = distilbert_results['test_metrics'][distilbert_best_idx]

distilbert_final_results = {
    'model_name': 'DistilBERT',
    'model_size_mb': distilbert_size,
    'parameters': distilbert_params,
    'best_accuracy': distilbert_best['accuracy'],
    'best_f1': distilbert_best['f1'],
    'inference_latency_ms': distilbert_inference['avg_latency_ms'],
    'inference_memory_mb': distilbert_inference['avg_memory_mb'],
    'training_time_sec': distilbert_training_time,
    'device': 'on-device'
}

print(f"✅ DistilBERT 완료: Acc {distilbert_best['accuracy']:.2f}% | {distilbert_size:.2f}MB | {distilbert_inference['avg_latency_ms']:.2f}ms\n")
print("="*60 + "\n")

🚀 DistilBERT 모델 학습 시작



model.safetensors:   0%|          | 0.00/542M [00:00<?, ?B/s]

📊 DistilBERT: 514.02 MB, 134,735,618 params

Epoch 1/3


Epoch 1: 100%|██████████| 566/566 [01:58<00:00,  4.79it/s, loss=0.0371, acc=90.37%]
Evaluating: 100%|██████████| 142/142 [00:16<00:00,  8.66it/s]


   Acc: 98.98% | F1: 98.64%

Epoch 2/3


Epoch 2: 100%|██████████| 566/566 [01:58<00:00,  4.78it/s, loss=0.0022, acc=98.52%]
Evaluating: 100%|██████████| 142/142 [00:16<00:00,  8.68it/s]


   Acc: 98.76% | F1: 98.35%

Epoch 3/3


Epoch 3: 100%|██████████| 566/566 [01:58<00:00,  4.79it/s, loss=0.0003, acc=99.20%]
Evaluating: 100%|██████████| 142/142 [00:16<00:00,  8.39it/s]


   Acc: 99.07% | F1: 98.78%

✅ DistilBERT 완료: Acc 99.07% | 514.02MB | 2.94ms




In [17]:
class ELECTRAClassifier(nn.Module):
    """ELECTRA-Small 기반 보이스피싱 분류 모델"""

    def __init__(self, num_classes=2, dropout=0.1):
        super(ELECTRAClassifier, self).__init__()

        # ELECTRA 모델 로드
        self.electra = ElectraModel.from_pretrained(
            "monologg/koelectra-small-v3-discriminator"
        )
        self.dropout = nn.Dropout(dropout)
        self.classifier = nn.Linear(256, num_classes)  # ELECTRA-small hidden = 256

    def forward(self, input_ids, attention_mask):
        outputs = self.electra(
            input_ids=input_ids,
            attention_mask=attention_mask
        )

        pooled_output = outputs.last_hidden_state[:, 0]
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits

# 모델 초기화
print("🚀 ELECTRA-Small 모델 학습 시작\n")
ResourceMonitor.clear_memory()

model_electra = ELECTRAClassifier(num_classes=2, dropout=0.1).to(device)
electra_size = ResourceMonitor.get_model_size(model_electra, "electra")
electra_params = sum(p.numel() for p in model_electra.parameters())

print(f"📊 ELECTRA-Small: {electra_size:.2f} MB, {electra_params:,} params\n")

# DataLoader
train_loader_electra = DataLoader(train_dataset_electra, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
test_loader_electra = DataLoader(test_dataset_electra, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# Optimizer
optimizer_electra = optim.AdamW(model_electra.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

# 학습
electra_results = {'train_loss': [], 'train_acc': [], 'test_metrics': []}
start_time = time.time()

for epoch in range(1, NUM_EPOCHS + 1):
    print(f"Epoch {epoch}/{NUM_EPOCHS}")
    train_loss, train_acc = train_model(model_electra, train_loader_electra, criterion, optimizer_electra, device, epoch)
    electra_results['train_loss'].append(train_loss)
    electra_results['train_acc'].append(train_acc)

    test_metrics = evaluate_model(model_electra, test_loader_electra, criterion, device)
    electra_results['test_metrics'].append(test_metrics)
    print(f"   Acc: {test_metrics['accuracy']:.2f}% | F1: {test_metrics['f1']:.2f}%\n")

electra_training_time = time.time() - start_time

# 추론 성능 측정
electra_inference = measure_inference_performance(model_electra, test_loader_electra, device, num_samples=100)

electra_best_idx = np.argmax([m['accuracy'] for m in electra_results['test_metrics']])
electra_best = electra_results['test_metrics'][electra_best_idx]

electra_final_results = {
    'model_name': 'ELECTRA-Small',
    'model_size_mb': electra_size,
    'parameters': electra_params,
    'best_accuracy': electra_best['accuracy'],
    'best_f1': electra_best['f1'],
    'inference_latency_ms': electra_inference['avg_latency_ms'],
    'inference_memory_mb': electra_inference['avg_memory_mb'],
    'training_time_sec': electra_training_time,
    'device': 'on-device'
}

print(f"✅ ELECTRA-Small 완료: Acc {electra_best['accuracy']:.2f}% | {electra_size:.2f}MB | {electra_inference['avg_latency_ms']:.2f}ms\n")
print("="*60 + "\n")

🚀 ELECTRA-Small 모델 학습 시작



pytorch_model.bin:   0%|          | 0.00/56.6M [00:00<?, ?B/s]

📊 ELECTRA-Small: 53.70 MB, 14,056,706 params

Epoch 1/3


Epoch 1:   0%|          | 1/566 [00:00<03:35,  2.62it/s, loss=0.7466, acc=50.00%]

model.safetensors:   0%|          | 0.00/56.5M [00:00<?, ?B/s]

Epoch 1: 100%|██████████| 566/566 [01:05<00:00,  8.67it/s, loss=0.0110, acc=92.47%]
Evaluating: 100%|██████████| 142/142 [00:14<00:00,  9.58it/s]


   Acc: 99.34% | F1: 99.12%

Epoch 2/3


Epoch 2: 100%|██████████| 566/566 [01:04<00:00,  8.75it/s, loss=0.0062, acc=99.20%]
Evaluating: 100%|██████████| 142/142 [00:14<00:00,  9.93it/s]


   Acc: 99.56% | F1: 99.42%

Epoch 3/3


Epoch 3: 100%|██████████| 566/566 [01:04<00:00,  8.74it/s, loss=0.0020, acc=99.59%]
Evaluating: 100%|██████████| 142/142 [00:15<00:00,  9.35it/s]


   Acc: 98.45% | F1: 97.90%

✅ ELECTRA-Small 완료: Acc 99.56% | 53.70MB | 1.86ms




In [18]:
from collections import Counter
import re

class Vocabulary:
    """LSTM용 단어 사전"""

    def __init__(self, max_vocab_size=10000):
        self.word2idx = {'<PAD>': 0, '<UNK>': 1}
        self.idx2word = {0: '<PAD>', 1: '<UNK>'}
        self.max_vocab_size = max_vocab_size

    def build_vocab(self, texts):
        """텍스트로부터 vocabulary 구축"""
        word_counts = Counter()

        for text in texts:
            # 간단한 토크나이징 (공백 기준)
            words = text.lower().split()
            word_counts.update(words)

        # 빈도수 상위 단어들만 선택
        most_common = word_counts.most_common(self.max_vocab_size - 2)  # PAD, UNK 제외

        for idx, (word, _) in enumerate(most_common, start=2):
            self.word2idx[word] = idx
            self.idx2word[idx] = word

        return len(self.word2idx)

    def text_to_indices(self, text, max_length=128):
        """텍스트를 인덱스 시퀀스로 변환"""
        words = text.lower().split()[:max_length]
        indices = [self.word2idx.get(word, 1) for word in words]  # UNK = 1

        # Padding
        if len(indices) < max_length:
            indices += [0] * (max_length - len(indices))

        return indices

# Vocabulary 구축
print("📚 LSTM용 Vocabulary 구축 중...\n")
vocab = Vocabulary(max_vocab_size=10000)
vocab_size = vocab.build_vocab(train_df['dialogue'].values)
print(f"✓ Vocabulary 크기: {vocab_size}\n")

# LSTM용 Dataset 재정의
class LSTMDataset(Dataset):
    """LSTM용 데이터셋"""

    def __init__(self, dataframe, vocabulary, max_length=128):
        self.data = dataframe.reset_index(drop=True)
        self.vocab = vocabulary
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        text = str(row['dialogue'])
        label = int(row['label'])
        weight = float(row['weight'])

        # 텍스트를 인덱스로 변환
        indices = self.vocab.text_to_indices(text, self.max_length)

        return {
            'inputs': torch.tensor(indices, dtype=torch.long),
            'label': torch.tensor(label, dtype=torch.long),
            'weight': torch.tensor(weight, dtype=torch.float)
        }

# LSTM용 Dataset 생성
train_dataset_lstm_new = LSTMDataset(train_df, vocab, max_length=128)
test_dataset_lstm_new = LSTMDataset(test_df, vocab, max_length=128)

print(f"✓ LSTM Dataset: Train {len(train_dataset_lstm_new)}, Test {len(test_dataset_lstm_new)}\n")

📚 LSTM용 Vocabulary 구축 중...

✓ Vocabulary 크기: 10000

✓ LSTM Dataset: Train 9042, Test 2261



In [22]:
class LSTMClassifier(nn.Module):
    """LSTM 기반 보이스피싱 분류 모델"""

    def __init__(self, vocab_size, embedding_dim=128, hidden_dim=128, num_layers=2, num_classes=2, dropout=0.3):
        super(LSTMClassifier, self).__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(
            embedding_dim,
            hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0,
            bidirectional=True
        )
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim * 2, num_classes)  # *2 for bidirectional

    def forward(self, inputs):
        # inputs: [batch_size, seq_length]
        embedded = self.embedding(inputs)  # [batch_size, seq_length, embedding_dim]

        # LSTM
        lstm_out, (hidden, cell) = self.lstm(embedded)

        # 마지막 hidden state (forward + backward)
        hidden_concat = torch.cat((hidden[-2], hidden[-1]), dim=1)
        hidden_concat = self.dropout(hidden_concat)

        # Classification
        logits = self.fc(hidden_concat)
        return logits

# 모델 초기화
print("🚀 LSTM 모델 학습 시작\n")
ResourceMonitor.clear_memory()

model_lstm = LSTMClassifier(
    vocab_size=vocab_size,
    embedding_dim=128,
    hidden_dim=128,
    num_layers=2,
    num_classes=2,
    dropout=0.3
).to(device)

lstm_size = ResourceMonitor.get_model_size(model_lstm, "lstm")
lstm_params = sum(p.numel() for p in model_lstm.parameters())

print(f"📊 LSTM: {lstm_size:.2f} MB, {lstm_params:,} params\n")

# DataLoader
train_loader_lstm = DataLoader(train_dataset_lstm_new, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
test_loader_lstm = DataLoader(test_dataset_lstm_new, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# Optimizer
optimizer_lstm = optim.AdamW(model_lstm.parameters(), lr=0.001, weight_decay=WEIGHT_DECAY)

# 학습
lstm_results = {'train_loss': [], 'train_acc': [], 'test_metrics': []}
start_time = time.time()

for epoch in range(1, 6):
    print(f"Epoch {epoch}/{5}")
    train_loss, train_acc = train_model(model_lstm, train_loader_lstm, criterion, optimizer_lstm, device, epoch)
    lstm_results['train_loss'].append(train_loss)
    lstm_results['train_acc'].append(train_acc)

    test_metrics = evaluate_model(model_lstm, test_loader_lstm, criterion, device)
    lstm_results['test_metrics'].append(test_metrics)
    print(f"   Acc: {test_metrics['accuracy']:.2f}% | F1: {test_metrics['f1']:.2f}%\n")

lstm_training_time = time.time() - start_time

# 추론 성능 측정
lstm_inference = measure_inference_performance(model_lstm, test_loader_lstm, device, num_samples=100)

lstm_best_idx = np.argmax([m['accuracy'] for m in lstm_results['test_metrics']])
lstm_best = lstm_results['test_metrics'][lstm_best_idx]

lstm_final_results = {
    'model_name': 'LSTM',
    'model_size_mb': lstm_size,
    'parameters': lstm_params,
    'best_accuracy': lstm_best['accuracy'],
    'best_f1': lstm_best['f1'],
    'inference_latency_ms': lstm_inference['avg_latency_ms'],
    'inference_memory_mb': lstm_inference['avg_memory_mb'],
    'training_time_sec': lstm_training_time,
    'device': 'on-device'
}

print(f"✅ LSTM 완료: Acc {lstm_best['accuracy']:.2f}% | {lstm_size:.2f}MB | {lstm_inference['avg_latency_ms']:.2f}ms\n")
print("="*60 + "\n")

🚀 LSTM 모델 학습 시작

📊 LSTM: 7.40 MB, 1,939,970 params

Epoch 1/5


Epoch 1: 100%|██████████| 566/566 [00:09<00:00, 62.61it/s, loss=0.0532, acc=89.29%]
Evaluating: 100%|██████████| 142/142 [00:01<00:00, 132.22it/s]


   Acc: 93.37% | F1: 91.38%

Epoch 2/5


Epoch 2: 100%|██████████| 566/566 [00:05<00:00, 98.14it/s, loss=0.0099, acc=96.81%] 
Evaluating: 100%|██████████| 142/142 [00:00<00:00, 153.05it/s]


   Acc: 97.35% | F1: 96.48%

Epoch 3/5


Epoch 3: 100%|██████████| 566/566 [00:06<00:00, 84.93it/s, loss=0.0001, acc=99.13%]
Evaluating: 100%|██████████| 142/142 [00:00<00:00, 156.20it/s]


   Acc: 97.26% | F1: 96.45%

Epoch 4/5


Epoch 4: 100%|██████████| 566/566 [00:05<00:00, 96.39it/s, loss=0.0001, acc=99.65%] 
Evaluating: 100%|██████████| 142/142 [00:00<00:00, 153.83it/s]


   Acc: 98.45% | F1: 97.93%

Epoch 5/5


Epoch 5: 100%|██████████| 566/566 [00:06<00:00, 86.19it/s, loss=0.0000, acc=99.89%] 
Evaluating: 100%|██████████| 142/142 [00:00<00:00, 153.18it/s]


   Acc: 98.81% | F1: 98.41%

✅ LSTM 완료: Acc 98.81% | 7.40MB | 0.17ms




In [23]:
# 전체 결과 수집
all_results = [
    kobert_final_results,
    distilbert_final_results,
    electra_final_results,
    lstm_final_results
]

# DataFrame으로 변환
results_df = pd.DataFrame(all_results)

print("="*80)
print("📊 전체 모델 비교 결과")
print("="*80)
print(results_df.to_string(index=False))
print("="*80)

# 비교 지표
print("\n🎯 정확도 순위:")
sorted_by_acc = results_df.sort_values('best_accuracy', ascending=False)
for i, row in enumerate(sorted_by_acc.itertuples(), 1):
    print(f"   {i}. {row.model_name}: {row.best_accuracy:.2f}% (F1: {row.best_f1:.2f}%)")

print("\n💾 모델 크기 순위 (작은 순):")
sorted_by_size = results_df.sort_values('model_size_mb')
for i, row in enumerate(sorted_by_size.itertuples(), 1):
    print(f"   {i}. {row.model_name}: {row.model_size_mb:.2f} MB")

print("\n⚡ 추론 속도 순위 (빠른 순):")
sorted_by_latency = results_df.sort_values('inference_latency_ms')
for i, row in enumerate(sorted_by_latency.itertuples(), 1):
    print(f"   {i}. {row.model_name}: {row.inference_latency_ms:.2f} ms/sample")

print("\n🔋 추론 메모리 순위 (적은 순):")
sorted_by_memory = results_df.sort_values('inference_memory_mb')
for i, row in enumerate(sorted_by_memory.itertuples(), 1):
    print(f"   {i}. {row.model_name}: {row.inference_memory_mb:.2f} MB")

print("\n⏱️ 학습 시간 순위 (빠른 순):")
sorted_by_train_time = results_df.sort_values('training_time_sec')
for i, row in enumerate(sorted_by_train_time.itertuples(), 1):
    print(f"   {i}. {row.model_name}: {row.training_time_sec:.2f}초 ({row.training_time_sec/60:.1f}분)")

# 성능/크기 효율성
print("\n🏆 성능/크기 효율성 (Accuracy per MB):")
results_df['efficiency'] = results_df['best_accuracy'] / results_df['model_size_mb']
sorted_by_efficiency = results_df.sort_values('efficiency', ascending=False)
for i, row in enumerate(sorted_by_efficiency.itertuples(), 1):
    print(f"   {i}. {row.model_name}: {row.efficiency:.2f} (Acc: {row.best_accuracy:.2f}%, Size: {row.model_size_mb:.2f}MB)")

print("\n" + "="*80)

📊 전체 모델 비교 결과
   model_name  model_size_mb  parameters  best_accuracy   best_f1  inference_latency_ms  inference_memory_mb  training_time_sec            device
       KoBERT     351.750710    92188418      99.601946 99.469027              6.639076             0.005290         646.087392 on-device (large)
   DistilBERT     514.015765   134735618      99.071207 98.778360              2.938377             0.005290         404.387163         on-device
ELECTRA-Small      53.702446    14056706      99.557718 99.415205              1.861426             0.005290         239.024529         on-device
         LSTM       7.404160     1939970      98.805838 98.408957              0.171234             0.002686          38.749381         on-device

🎯 정확도 순위:
   1. KoBERT: 99.60% (F1: 99.47%)
   2. ELECTRA-Small: 99.56% (F1: 99.42%)
   3. DistilBERT: 99.07% (F1: 98.78%)
   4. LSTM: 98.81% (F1: 98.41%)

💾 모델 크기 순위 (작은 순):
   1. LSTM: 7.40 MB
   2. ELECTRA-Small: 53.70 MB
   3. KoBERT: 351.75 MB
   4. 