# 📄 Document type classification baseline code with WandB Integration



In [50]:

# =============================================================================
# 0. Prepare Environments & Install Libraries
# =============================================================================

# 필요한 라이브러리를 설치합니다.
!pip install -r ../requirements.txt

[0m

In [51]:
# =============================================================================
# 1. Import Libraries & Define Functions
# =============================================================================

import os
import time
import random
import copy

import optuna, math
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.cuda.amp import autocast, GradScaler  # Mixed Precision용

from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, StratifiedKFold
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

# WandB 관련 import 추가
import wandb
from datetime import datetime


In [52]:
# =============================================================================
# 1-1. WandB Login and Configuration
# =============================================================================
"""
🚀 팀원 사용 가이드:

1. WandB 계정 생성: https://wandb.ai/signup
2. 이 셀 실행 시 로그인 프롬프트가 나타나면 개인 API 키 입력
3. EXPERIMENT_NAME을 다음과 같이 변경:
   - "member1-baseline"
   - "member2-augmentation-test"  
   - "member3-hyperparameter-tuning"
   등등 각자 다른 이름 사용

4. 팀 대시보드 URL: [여기에 당신의 프로젝트 URL 추가]

⚠️ 주의사항:
- 절대 API 키를 코드에 하드코딩하지 마세요
- EXPERIMENT_NAME만 변경하고 PROJECT_NAME은 그대로 두세요
- 각자 개인 계정으로 로그인해서 실험을 추가하세요
"""

# WandB 로그인 (각자 실행)
try:
    if wandb.api.api_key is None:
        print("WandB에 로그인이 필요합니다.")
        wandb.login()
    else:
        print(f"WandB 로그인 상태: {wandb.api.viewer()['username']}")
except:
    print("WandB 로그인을 진행합니다...")
    wandb.login()

# 프로젝트 설정 (각자 수정할 부분)
PROJECT_NAME = "document-classification-team-CV"  # 모든 팀원 동일
ENTITY = None  # 각자 개인 계정 사용
EXPERIMENT_NAME = "efficientnet-b3-baseline"  # 팀원별로 변경 (예: "member1-hyperopt", "member2-augmentation")

print(f"프로젝트: {PROJECT_NAME}")
print(f"실험명: {EXPERIMENT_NAME}")
print("팀원들은 EXPERIMENT_NAME을 각자 다르게 변경해주세요!")

WandB 로그인 상태: kimsunmin0227
프로젝트: document-classification-team-CV
실험명: efficientnet-b3-baseline
팀원들은 EXPERIMENT_NAME을 각자 다르게 변경해주세요!


In [None]:
# =============================================================================
# 3. Seed & basic augmentations (Mixup)
# =============================================================================

# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True


In [None]:

# =============================================================================
# 4. Dataset Class
# =============================================================================

class ImageDataset(Dataset):
    def __init__(self, data, path, transform=None, problem_class_transform=None):
        # CSV 파일이면 읽고, DataFrame이면 그대로 사용
        if isinstance(data, str):
            self.df = pd.read_csv(data).values
        else:
            self.df = data.values  # DataFrame을 numpy array로 변환
        self.path = path
        self.transform = transform
        self.problem_class_transform = problem_class_transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        
        # 기본 transform 적용
        if self.transform:
            img = self.transform(image=img)['image']
        
        # 문제 클래스에 대해서만 추가 augmentation 적용
        if self.problem_class_transform:
            
            # tensor를 다시 numpy로 변환 (추가 augmentation을 위해)
            img_np = img.permute(1, 2, 0).cpu().numpy()
            # 정규화 해제
            mean = np.array([0.485, 0.456, 0.406])
            std = np.array([0.229, 0.224, 0.225])
            img_np = (img_np * std + mean) * 255
            img_np = np.clip(img_np, 0, 255).astype(np.uint8)
            
            # 추가 augmentation 적용 (결과는 numpy array, H,W,C 형태)
            extra_aug = self.problem_class_transform(image=img_np)['image']
            
            # numpy array를 torch tensor로 변환하고 차원 순서 변경 (H,W,C → C,H,W)
            extra_aug = torch.from_numpy(extra_aug).float()
            extra_aug = extra_aug.permute(2, 0, 1)  # (H,W,C) → (C,H,W)로 변경
            
            # 정규화 적용 (이제 차원이 맞음)
            extra_aug = extra_aug / 255.0
            extra_aug = (extra_aug - torch.tensor(mean).view(3, 1, 1)) / torch.tensor(std).view(3, 1, 1)
            img = extra_aug
            
        return img, target

In [None]:
 
# Cutout (Random Erasing) 함수 정의
def random_erasing(image, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3)):
    if random.random() > p:
        return image
    img_c, img_h, img_w = image.shape[1], image.shape[2], image.shape[3]
    area = img_h * img_w
    
    target_area = random.uniform(scale[0], scale[1]) * area
    aspect_ratio = random.uniform(ratio[0], ratio[1])
    h = int(round(math.sqrt(target_area * aspect_ratio)))
    w = int(round(math.sqrt(target_area / aspect_ratio)))
    
    if h < img_h and w < img_w:
        x = random.randint(0, img_w - w)
        y = random.randint(0, img_h - h)
        image[:, :, y:y+h, x:x+w] = 0.0  # 제거된 영역을 0으로 설정
    return image

# RandomCrop 함수 정의
def random_crop(image, crop_size=0.8):
    img_c, img_h, img_w = image.shape[1], image.shape[2], image.shape[3]
    crop_h = int(img_h * crop_size)
    crop_w = int(img_w * crop_size)
    
    if crop_h >= img_h or crop_w >= img_w:
        return image
    
    x = random.randint(0, img_w - crop_w)
    y = random.randint(0, img_h - crop_h)
    cropped_image = image[:, :, y:y+crop_h, x:x+crop_w]
    
    # 원래 이미지 크기로 복원 (패딩 또는 리사이즈)
    cropped_image = torch.nn.functional.interpolate(cropped_image, size=(img_h, img_w), mode='bilinear', align_corners=False)
    return cropped_image

# Mixup 함수 정의
def mixup_data(x, y, alpha=1.0):
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1
    batch_size = x.size()[0]
    index = torch.randperm(batch_size).cuda()
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def train_one_epoch(loader, model, optimizer, loss_fn, device, epoch=None, fold=None):
    scaler = GradScaler()
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader, desc=f"Training Epoch {epoch+1 if epoch else '?'}")
    batch_count = 0
    
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)
        
        # 증강 기법 선택 (Mixup 25%, Cutout 25%, RandomCrop 50%)
        aug_type = random.choices(['mixup', 'cutout', 'random_crop'], weights=[0.3, 0.3, 0.3])[0]
        mixup_applied = False
        cutout_applied = False
        random_crop_applied = False
        
        if aug_type == 'mixup':
            mixed_x, y_a, y_b, lam = mixup_data(image, targets, alpha=1.0)
            with autocast(): 
                preds = model(mixed_x)
            loss = lam * loss_fn(preds.float(), y_a) + (1 - lam) * loss_fn(preds.float(), y_b)
            mixup_applied = True
        elif aug_type == 'cutout':
            image = random_erasing(image, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3))
            with autocast(): 
                preds = model(image)
            loss = loss_fn(preds.float(), targets)  # ← .float() 추가
            cutout_applied = True
        elif aug_type == 'random_crop':
            image = random_crop(image, crop_size=0.8)
            with autocast(): 
                preds = model(image)
            loss = loss_fn(preds.float(), targets)  # ← .float() 추가
            random_crop_applied = True
        else:
            with autocast(): 
                preds = model(image)
            loss = loss_fn(preds.float(), targets)  # ← .float() 추가

        model.zero_grad(set_to_none=True)
        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        # 배치별 상세 로깅 (100 배치마다)
        if batch_count % 100 == 0 and wandb.run is not None:
            step = epoch * len(loader) + batch_count if epoch is not None else batch_count
            wandb.log({
                f"fold_{fold}/train_batch_loss": loss.item(),
                f"fold_{fold}/mixup_applied": int(mixup_applied),
                f"fold_{fold}/cutout_applied": int(cutout_applied),
                f"fold_{fold}/random_crop_applied": int(random_crop_applied),
                f"fold_{fold}/batch_step": step
            })
        
        batch_count += 1
        pbar.set_description(f"Loss: {loss.item():.4f}, Mixup: {mixup_applied}, Cutout: {cutout_applied}, RandomCrop: {random_crop_applied}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

    return ret

def validate_one_epoch(loader, model, loss_fn, device, epoch=None, fold=None, log_confusion=False):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []
    
    with torch.no_grad():
        pbar = tqdm(loader, desc=f"Validating Epoch {epoch+1 if epoch else '?'}")
        for image, targets in pbar:
            image = image.to(device)
            targets = targets.to(device)
            
            preds = model(image)
            loss = loss_fn(preds, targets)
            
            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())
            
            pbar.set_description(f"Val Loss: {loss.item():.4f}")
    
    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')
    
    # 🎯 문제 클래스들의 성능 별도 계산 및 추적
    target_classes = [3, 7, 14]
    
    # 전체 클래스별 F1 스코어 계산
    class_f1_scores = f1_score(targets_list, preds_list, average=None, labels=list(range(17)), zero_division=0)
    
    # 문제 클래스들의 F1 스코어 추출
    problem_class_f1 = {}
    problem_class_performance = []
    
    for cls in target_classes:
        if cls < len(class_f1_scores):
            cls_f1 = class_f1_scores[cls]
            problem_class_f1[f"class_{cls}_f1"] = cls_f1
            problem_class_performance.append(cls_f1)
            
            # 콘솔에 출력
            print(f"  Class {cls} F1: {cls_f1:.4f}")
    
    # 문제 클래스들의 평균 F1 계산
    avg_problem_f1 = np.mean(problem_class_performance) if problem_class_performance else 0.0
    print(f"  Problem Classes Avg F1: {avg_problem_f1:.4f}")
    
    # WandB 로깅 (문제 클래스 성능)
    if wandb.run is not None:
        log_dict = {
            f"fold_{fold}/val_loss": val_loss,
            f"fold_{fold}/val_acc": val_acc,
            f"fold_{fold}/val_f1": val_f1,
            f"fold_{fold}/problem_classes_avg_f1": avg_problem_f1,
        }
        
        # 각 문제 클래스별 F1 스코어 로깅
        for cls in target_classes:
            if cls < len(class_f1_scores):
                log_dict[f"fold_{fold}/class_{cls}_f1"] = class_f1_scores[cls]
        
        wandb.log(log_dict)
    
    # Confusion Matrix 로깅 (마지막 epoch에만)
    if log_confusion and wandb.run is not None:
        try:
            wandb.log({
                f"fold_{fold}/confusion_matrix": wandb.plot.confusion_matrix(
                    probs=None,
                    y_true=targets_list,
                    preds=preds_list,
                    class_names=[f"Class_{i}" for i in range(17)]
                )
            })
            
            # 전체 클래스별 F1 스코어 로깅
            for i, class_f1 in enumerate(class_f1_scores):
                wandb.log({f"fold_{fold}/all_class_{i}_f1": class_f1})
                
        except Exception as e:
            print(f" Confusion matrix 로깅 실패: {e}")
    
    ret = {
        "val_loss": val_loss,
        "val_acc": val_acc,  
        "val_f1": val_f1,
        "problem_class_f1": problem_class_f1,  # 문제 클래스 F1 스코어 추가
        "avg_problem_f1": avg_problem_f1,      # 문제 클래스 평균 F1 추가
    }
    
    return ret

In [None]:
# =============================================================================
# 6. Hyper-parameters with WandB Config
# =============================================================================

# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f" Using device: {device}")

# data config
data_path = '../data/'

# model config
model_name = 'efficientnet_b3' # 'resnet50' 'efficientnet-b0', ...

# training config
img_size = 384
LR = 5e-4
EPOCHS = 50
BATCH_SIZE = 64
num_workers = 30

# K-Fold config
N_FOLDS = 5  # 5-fold로 설정

# WandB Config 설정
config = {
    # Model config
    "model_name": model_name,
    "img_size": img_size,
    "num_classes": 17,
    "architecture": "EfficientNet-B3",
    
    # Training config  
    "lr": LR,
    "epochs": EPOCHS,
    "batch_size": BATCH_SIZE,
    "num_workers": num_workers,
    "device": str(device),
    
    # K-Fold config
    "n_folds": N_FOLDS,
    "seed": SEED,
    "cv_strategy": "StratifiedKFold",
    
    # Augmentation & Training techniques
    "mixup_alpha": 1.0,
    "mixup_prob": 0.3,
    "label_smoothing": 0.2,
    "gradient_clipping": 1.0,
    "mixed_precision": True,
    
    # Optimizer & Scheduler
    "optimizer": "Adam",
    "scheduler": "CosineAnnealingLR",
    
    # Data
    "data_path": data_path,
    "train_transforms": "Advanced",
    "test_transforms": "Basic",
}

print(" 하이퍼파라미터 설정 완료!")
print(f" 모델: {model_name}")
print(f" 이미지 크기: {img_size}x{img_size}")
print(f" 배치 크기: {BATCH_SIZE}")
print(f" 학습률: {LR}")
print(f" 에폭: {EPOCHS}")


 Using device: cuda
 하이퍼파라미터 설정 완료!
 모델: efficientnet_b3
 이미지 크기: 384x384
 배치 크기: 64
 학습률: 0.0005
 에폭: 50


In [None]:

# =============================================================================
# 7. Optuna Hyperparameter Tuning (선택적)
# =============================================================================

USE_OPTUNA = False  # True로 바꾸면 튜닝 실행

if USE_OPTUNA:
    print("🔍 Optuna 하이퍼파라미터 튜닝 시작...")
    
    def objective(trial):
        lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
        
        # WandB에 Optuna 시행 로깅
        optuna_run = wandb.init(
            project=PROJECT_NAME,
            entity=ENTITY,
            name=f"optuna-trial-{trial.number}",
            config={**config, "lr": lr, "batch_size": batch_size},
            tags=["optuna", "hyperparameter-tuning"],
            group="optuna-study",
            job_type="hyperparameter-optimization",
            reinit=True
        )
        
        # 간단한 3-fold CV로 빠른 평가
        skf_simple = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
        fold_scores = []
        
        # 간단한 평가 로직 (실제 구현에서는 더 단순화)
        # ... (Optuna 로직은 복잡하므로 기본적으로 비활성화)
        
        optuna_run.finish()
        return np.random.random()  # placeholder
    
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=10)
    
    # 최적 파라미터 적용
    best_params = study.best_params
    LR = best_params.get('lr', LR)
    BATCH_SIZE = best_params.get('batch_size', BATCH_SIZE)
    config.update(best_params)
    print(f"🎯 Optuna 최적 파라미터: {best_params}")
else:
    print("⏭️ Optuna 튜닝 건너뛰기 (USE_OPTUNA=False)")

SyntaxError: invalid syntax (2370869770.py, line 6)

In [None]:
# =============================================================================
# 8. Data Transforms
# =============================================================================

# augmentation을 위한 transform 코드
trn_transform = A.Compose([
    # 비율 보존 리사이징 (핵심 개선)
    A.LongestMaxSize(max_size=img_size),
    A.PadIfNeeded(min_height=img_size, min_width=img_size, 
                  border_mode=0, value=0),
    
    # 문서 특화 회전 + 미세 회전 추가
    A.OneOf([
        A.Rotate(limit=[90,90], p=1.0),
        A.Rotate(limit=[180,180], p=1.0),
        A.Rotate(limit=[270,270], p=1.0),
        A.Rotate(limit=(-15, 15), p=1.0),  # 미세 회전 추가
    ], p=0.7),
    
    # 기하학적 변환 강화
    A.OneOf([
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=5, p=1.0),
        A.ElasticTransform(alpha=50, sigma=5, p=1.0),
        A.GridDistortion(num_steps=5, distort_limit=0.2, p=1.0),
        A.OpticalDistortion(distort_limit=0.2, shift_limit=0.1, p=1.0),
    ], p=0.6),
    
    # 색상 및 조명 변환 강화
    A.OneOf([
        A.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.3, hue=0.1, p=1.0),
        A.RandomBrightnessContrast(brightness_limit=0.4, contrast_limit=0.4, p=1.0),
        A.CLAHE(clip_limit=4.0, tile_grid_size=(8, 8), p=1.0),
        A.RandomGamma(gamma_limit=(70, 130), p=1.0),
    ], p=0.9),
    
    # 블러 및 노이즈 강화
    A.OneOf([
        A.MotionBlur(blur_limit=(5, 15), p=1.0),
        A.GaussianBlur(blur_limit=(3, 15), p=1.0),
        A.MedianBlur(blur_limit=7, p=1.0),
        A.Blur(blur_limit=7, p=1.0),
    ], p=0.8),
    
    # 다양한 노이즈 추가
    A.OneOf([
        A.GaussNoise(var_limit=(10.0, 150.0), p=1.0),
        A.ISONoise(color_shift=(0.01, 0.08), intensity=(0.1, 0.8), p=1.0),
        A.MultiplicativeNoise(multiplier=(0.9, 1.1), p=1.0),
    ], p=0.8),
    
    # 문서 품질 시뮬레이션 (스캔/복사 효과)
    A.OneOf([
        A.Downscale(scale_min=0.7, scale_max=0.9, p=1.0),
        A.ImageCompression(quality_lower=60, quality_upper=95, p=1.0),
        A.Posterize(num_bits=6, p=1.0),
    ], p=0.5),
    
    # 픽셀 레벨 변환
    A.OneOf([
        A.ChannelShuffle(p=1.0),
        A.InvertImg(p=1.0),
        A.Solarize(threshold=128, p=1.0),
        A.Equalize(p=1.0),
    ], p=0.3),
    
    # 공간 변환
    A.OneOf([
        A.HorizontalFlip(p=1.0),
        A.VerticalFlip(p=1.0),  # 문서에서도 유용할 수 있음
        A.Transpose(p=1.0),
    ], p=0.6),
    
    # 조각 제거 (Cutout 계열)
    A.OneOf([
        A.CoarseDropout(max_holes=8, max_height=32, max_width=32, 
                       min_holes=1, min_height=8, min_width=8, 
                       fill_value=0, p=1.0),
        A.GridDropout(ratio=0.3, unit_size_min=8, unit_size_max=32, 
                     holes_number_x=5, holes_number_y=5, p=1.0),
    ], p=0.4),
    
    # 최종 정규화
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# 문제 클래스(3, 7, 14) 전용 추가 augmentation
problem_class_extra_transform = A.Compose([
    # 추가 회전 변형 (더 자주 적용)
    A.OneOf([
        A.Rotate(limit=(-20, 20), p=1.0),  # 더 넓은 범위
        A.Rotate(limit=(-10, 10), p=1.0),
    ], p=0.5),  # 50% 확률로 추가 회전
    
    # 추가 색상 변형
    A.OneOf([
        A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=1.0),
        A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=15, val_shift_limit=10, p=1.0),
    ], p=0.4),
    
    # 추가 스케일 변형
    A.OneOf([
        A.RandomScale(scale_limit=0.1, p=1.0),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=3, p=1.0),
    ], p=0.3),

    # 크기를 다시 맞춰주기 (중요!)
    A.LongestMaxSize(max_size=img_size),
    A.PadIfNeeded(min_height=img_size, min_width=img_size, 
                  border_mode=0, value=0),
])

# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.LongestMaxSize(max_size=img_size),
    A.PadIfNeeded(min_height=img_size, min_width=img_size, 
                  border_mode=0, value=0),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

print("✅ 데이터 변환 설정 완료!")

✅ Optuna objective function 정의 완료!


In [None]:
# wandb.finish()
# =============================================================================
# WandB MailboxClosedError 해결을 위한 정리 및 재시작
# =============================================================================

import wandb
import time
import os

# 1. 기존 WandB 런 강제 종료
try:
    if wandb.run is not None:
        print("기존 WandB 런 종료 중...")
        wandb.finish()
        time.sleep(2)  # 종료 대기
except Exception as e:
    print(f"기존 런 종료 중 에러 (무시 가능): {e}")

# 2. WandB 프로세스 정리
try:
    # WandB 내부 상태 초기화
    wandb.teardown()
    time.sleep(1)
except Exception as e:
    print(f"WandB teardown 중 에러 (무시 가능): {e}")

# 3. 환경 변수 재설정 (선택사항)
os.environ['WANDB_START_METHOD'] = 'thread'  # 프로세스 충돌 방지

# 4. 안전한 WandB 초기화 함수
def safe_wandb_init(project, name, config, **kwargs):
    """안전한 WandB 초기화"""
    max_retries = 3
    retry_delay = 5
    
    for attempt in range(max_retries):
        try:
            print(f"WandB 초기화 시도 {attempt + 1}/{max_retries}...")
            
            # 기존 런이 있다면 종료
            if wandb.run is not None:
                wandb.finish()
                time.sleep(1)
            
            # 새로운 런 시작
            run = wandb.init(
                project=project,
                name=name,
                config=config,
                **kwargs
            )
            
            print(f"✅ WandB 초기화 성공!")
            return run
            
        except Exception as e:
            print(f"❌ 시도 {attempt + 1} 실패: {e}")
            
            if attempt < max_retries - 1:
                print(f"⏳ {retry_delay}초 후 재시도...")
                time.sleep(retry_delay)
                
                # WandB 프로세스 강제 정리
                try:
                    wandb.teardown()
                    time.sleep(1)
                except:
                    pass
            else:
                print("❌ 모든 재시도 실패. WandB 없이 진행합니다.")
                return None
    
    return None

# 5. 수정된 메인 초기화 코드
print("🔧 WandB 연결 문제 해결 중...")

# 기존 코드 대체
main_run = safe_wandb_init(
    project=PROJECT_NAME,
    name=f"{EXPERIMENT_NAME}-{datetime.now().strftime('%m%d-%H%M')}",
    config=config,
    entity=ENTITY,
    tags=["k-fold-cv", "ensemble", model_name, "baseline", "main-experiment"],
    group="k-fold-experiment", 
    job_type="cross-validation",
    notes=f"{N_FOLDS}-Fold Cross Validation with {model_name}"
)

if main_run is not None:
    print(f"\n🚀 WandB 실험 시작!")
    print(f"📊 대시보드: {main_run.url}")
    print(f"📋 실험명: {main_run.name}")
else:
    print("\n⚠️ WandB 없이 실험을 진행합니다.")
    print("결과는 콘솔과 로컬 파일로만 저장됩니다.")

[I 2025-09-06 14:59:38,760] A new study created in memory with name: no-name-1e736f5e-ec0e-4b52-b144-6521f8ae3086


🚀 Optuna 하이퍼파라미터 튜닝 시작!


  0%|          | 0/20 [00:00<?, ?it/s]


🔍 Trial 0: Testing hyperparameters...
   LR: 0.000133
   Batch Size: 16
   Epochs: 29
   Image Size: 448
   Focal Alpha: 1.749
   Focal Gamma: 1.425
   📊 Fold 1/5 시작...


Loss: 4.5391, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 79/79 [00:16<00:00,  4.65it/s]
Val Loss: 5.5611: 100%|██████████| 20/20 [00:02<00:00,  7.11it/s]
Loss: 4.6797, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 79/79 [00:13<00:00,  5.69it/s]
Val Loss: 5.2895: 100%|██████████| 20/20 [00:02<00:00,  9.61it/s]
Loss: 3.4219, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 79/79 [00:13<00:00,  5.76it/s]
Val Loss: 5.2376: 100%|██████████| 20/20 [00:02<00:00,  9.14it/s]
Loss: 3.4141, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 79/79 [00:13<00:00,  5.69it/s]
Val Loss: 4.1323: 100%|██████████| 20/20 [00:02<00:00,  9.47it/s]
Loss: 4.2188, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 79/79 [00:13<00:00,  5.78it/s]
Val Loss: 3.8510: 100%|██████████| 20/20 [00:02<00:00,  9.47it/s]
Loss: 3.5293, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 79/79 [00:13<00:00,  5.73it/s]
Val Loss: 3.5639

In [None]:
# =============================================================================
# 9. Load Data & Start K-Fold Cross Validation with WandB
# =============================================================================

# 전체 학습 데이터 로드
train_df = pd.read_csv("../data/train.csv")
print(f"학습 데이터: {len(train_df)}개 샘플")

# 클래스 분포 확인
class_counts = train_df['target'].value_counts().sort_index()
print(f" 클래스 분포: {dict(class_counts)}")

# K-Fold 설정
skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)

# K-Fold 결과를 저장할 리스트
fold_results = []
fold_models = []  # 각 fold의 최고 성능 모델을 저장

#  WandB 메인 실험 시작
main_run = wandb.init(
    project=PROJECT_NAME,
    entity=ENTITY,
    name=f"{EXPERIMENT_NAME}-{datetime.now().strftime('%m%d-%H%M')}",
    config=config,
    tags=["k-fold-cv", "ensemble", model_name, "baseline", "main-experiment"],
    group="k-fold-experiment",
    job_type="cross-validation",
    notes=f"{N_FOLDS}-Fold Cross Validation with {model_name}",
    reinit=True,
)

print(f"\n🚀 WandB 실험 시작!")
print(f"📊 대시보드: {main_run.url}")
print(f"📋 실험명: {main_run.name}")

#  데이터셋 정보 로깅
wandb.log({
    "dataset/total_samples": len(train_df),
    "dataset/num_classes": 17,
    "dataset/samples_per_fold": len(train_df) // N_FOLDS,
})

# 클래스 분포 시각화
class_dist_data = [[f"Class_{i}", count] for i, count in enumerate(class_counts)]
wandb.log({
    "dataset/class_distribution": wandb.plot.bar(
        wandb.Table(data=class_dist_data, columns=["Class", "Count"]),
        "Class", "Count", 
        title="Training Data Class Distribution"
    )
})

print(f"\n{'='*60}")
print(f"🎯 {N_FOLDS}-FOLD CROSS VALIDATION 시작")
print(f"{'='*60}")


✅ 데이터 변환 설정 완료!


In [None]:

# =============================================================================
# 10. K-Fold Cross Validation Loop with WandB
# =============================================================================

for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df['target'])):
    print(f"\n{'='*50}")
    print(f" FOLD {fold + 1}/{N_FOLDS}")
    print(f"{'='*50}")
    
    # 각 fold별 child run 생성
    fold_run = wandb.init(
        project=PROJECT_NAME,
        entity=ENTITY,
        name=f"fold-{fold+1}-{model_name}-{datetime.now().strftime('%H%M')}",
        config=config,
        tags=["fold", f"fold-{fold+1}", model_name, "child-run"],
        group="k-fold-experiment",
        job_type=f"fold-{fold+1}",
        reinit=True  # 새로운 run 시작 허용
    )
    
    print(f"📊 Fold {fold+1} Dashboard: {fold_run.url}")
    
    # 현재 fold의 train/validation 데이터 분할
    train_fold_df = train_df.iloc[train_idx].reset_index(drop=True)
    val_fold_df = train_df.iloc[val_idx].reset_index(drop=True)
    
    # 데이터 분할 정보 로깅
    wandb.log({
        "fold_info/fold_number": fold + 1,
        "fold_info/train_samples": len(train_fold_df),
        "fold_info/val_samples": len(val_fold_df),
        "fold_info/train_ratio": len(train_fold_df) / len(train_df),
        "fold_info/val_ratio": len(val_fold_df) / len(train_df)
    })
    
    # 현재 fold의 Dataset 생성
    trn_dataset = ImageDataset(
        train_fold_df,
        "../data/train/",
        transform=trn_transform,
        problem_class_transform=problem_class_extra_transform
    )
    
    val_dataset = ImageDataset(
        val_fold_df,
        "../data/train/",
        transform=tst_transform  # 검증에는 증강 적용 안함
    )
    
    # 현재 fold의 DataLoader 생성
    trn_loader = DataLoader(
        trn_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True,
        drop_last=False
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )
    
    print(f"Train samples: {len(trn_dataset)}, Validation samples: {len(val_dataset)}")
    
    # 모델 초기화 (각 fold마다 새로운 모델)
    model = timm.create_model(
        model_name,
        pretrained=True,
        num_classes=17
    ).to(device)
    
    class_weights = torch.FloatTensor([
    4 if i in [3, 7, 14] else 1.0 for i in range(17)]).to(device)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights, label_smoothing=0.15)
    optimizer = Adam(model.parameters(), lr=LR)
    
    # Learning Rate Scheduler 추가
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)
    
    # 현재 fold의 최고 성능 추적
    best_val_f1 = 0.0
    best_model = None
    patience = 0
    max_patience = 7
    
    print(f" 모델 학습 시작 - Fold {fold+1}")
    
    # =============================================================================
    # 11. Training Loop for Current Fold
    # =============================================================================
    
    for epoch in range(EPOCHS):
        print(f"\n📈 Epoch {epoch+1}/{EPOCHS}")
        
        # Training
        train_ret = train_one_epoch(
            trn_loader, model, optimizer, loss_fn, device, 
            epoch=epoch, fold=fold+1
        )
        
        # Validation
        val_ret = validate_one_epoch(
            val_loader, model, loss_fn, device, 
            epoch=epoch, fold=fold+1,
            log_confusion=(epoch == EPOCHS-1)  # 마지막 epoch에만 confusion matrix
        )
        
        # Learning rate 로깅
        current_lr = optimizer.param_groups[0]['lr']
        
        # 문제 클래스 성능 개선 여부 확인
        problem_classes_improved = ""
        if 'avg_problem_f1' in val_ret:
            if val_ret['avg_problem_f1'] > best_val_f1 * 0.95:  # 전체 성능의 95% 이상이면 양호
                problem_classes_improved = "✅ Problem classes performing well"
            else:
                problem_classes_improved = "⚠️ Problem classes need attention"


        # WandB에 metrics 로깅
        log_data = {
            "epoch": epoch + 1,
            "fold": fold + 1,
            "train/loss": train_ret['train_loss'],
            "train/accuracy": train_ret['train_acc'], 
            "train/f1": train_ret['train_f1'],
            "val/loss": val_ret['val_loss'],
            "val/accuracy": val_ret['val_acc'],
            "val/f1": val_ret['val_f1'],
            "learning_rate": current_lr,
            "optimizer/lr": current_lr
        }
        
        # GPU 메모리 사용량 로깅
        if torch.cuda.is_available():
            gpu_memory_used = torch.cuda.memory_allocated(0) / 1e9
            gpu_memory_total = torch.cuda.get_device_properties(0).total_memory / 1e9
            log_data.update({
                "system/gpu_memory_used_gb": gpu_memory_used,
                "system/gpu_memory_total_gb": gpu_memory_total,
                "system/gpu_utilization_pct": (gpu_memory_used / gpu_memory_total) * 100
            })

        if 'avg_problem_f1' in val_ret:
            log_data["val/problem_classes_avg_f1"] = val_ret['avg_problem_f1']
        
        # 개별 문제 클래스 성능
        for cls in [3, 7, 14]:
            if f"class_{cls}_f1" in val_ret['problem_class_f1']:
                log_data[f"val/class_{cls}_f1"] = val_ret['problem_class_f1'][f"class_{cls}_f1"]
    
        
        wandb.log(log_data)
        
        # Scheduler step
        scheduler.step()
        
        print(f" Epoch {epoch+1:2d} | "
              f"Train Loss: {train_ret['train_loss']:.4f} | "
              f"Train F1: {train_ret['train_f1']:.4f} | "
              f"Val Loss: {val_ret['val_loss']:.4f} | "
              f"Val F1: {val_ret['val_f1']:.4f} | "
              f"LR: {current_lr:.2e}")
        
        # 문제 클래스 성능 별도 출력
        if 'avg_problem_f1' in val_ret:
            print(f"         Problem Classes (3,7,14) Avg F1: {val_ret['avg_problem_f1']:.4f} | {problem_classes_improved}")

        # 최고 성능 모델 저장 (전체 F1과 문제 클래스 성능 모두 고려)
        current_val_f1 = val_ret['val_f1']
        problem_class_bonus = 0
    
        # 문제 클래스 성능이 좋으면 보너스 점수 부여
        if 'avg_problem_f1' in val_ret and val_ret['avg_problem_f1'] > 0.90:
            problem_class_bonus = 0.001  # 작은 보너스로 동점일 때 문제 클래스 성능 우선
        
        adjusted_f1 = current_val_f1 + problem_class_bonus
        
        if adjusted_f1 > best_val_f1:
            best_val_f1 = current_val_f1  # 실제 F1 스코어로 저장
            best_model = copy.deepcopy(model.state_dict())
            patience = 0
            
            # 최고 성능 모델 아티팩트로 저장
            model_path = f'best_model_fold_{fold+1}.pth'
            torch.save(best_model, model_path)
            wandb.save(model_path, policy="now")
            
            # 새로운 최고 성능 로깅
            best_performance_log = {
                f"best_performance/epoch": epoch + 1,
                f"best_performance/val_f1": best_val_f1,
                f"best_performance/val_acc": val_ret['val_acc'],
                f"best_performance/val_loss": val_ret['val_loss'],
            }
            
            # 문제 클래스 성능도 최고 성능에 포함
            if 'avg_problem_f1' in val_ret:
                best_performance_log[f"best_performance/problem_classes_f1"] = val_ret['avg_problem_f1']
            
            wandb.log(best_performance_log)
            
            improvement_msg = f"🎉 새로운 최고 성능! F1: {best_val_f1:.4f}"
            if 'avg_problem_f1' in val_ret:
                improvement_msg += f" (Problem Classes: {val_ret['avg_problem_f1']:.4f})"
            print(improvement_msg)
        else:
            patience += 1
            
        # Early stopping
        if patience >= max_patience and epoch > EPOCHS // 2:
            print(f"⏸️ Early stopping at epoch {epoch+1} (patience: {patience})")
            wandb.log({"early_stopping/epoch": epoch + 1})
            break
    
    # =============================================================================
    # 12. Fold Results Summary
    # =============================================================================
    
    # 현재 fold 결과 저장
    fold_result = {
        'fold': fold + 1,
        'best_val_f1': best_val_f1,
        'final_train_f1': train_ret['train_f1'],
        'train_samples': len(trn_dataset),
        'val_samples': len(val_dataset),
        'epochs_trained': epoch + 1,
        'early_stopped': patience >= max_patience
    }
    
    fold_results.append(fold_result)
    fold_models.append(best_model)
    
    # Fold 최종 요약 로깅
    wandb.log({
        "fold_summary/best_val_f1": best_val_f1,
        "fold_summary/final_train_f1": train_ret['train_f1'],
        "fold_summary/epochs_trained": epoch + 1,
        "fold_summary/improvement": best_val_f1 - val_ret['val_f1'],
        "fold_summary/early_stopped": patience >= max_patience
    })
    
    print(f"\n Fold {fold + 1} 완료!")
    print(f" 최고 Validation F1: {best_val_f1:.4f}")
    print(f" 학습된 에폭: {epoch + 1}/{EPOCHS}")
    
    # Fold run 종료
    wandb.finish()
    
    # 메모리 정리
    del model, optimizer, scheduler, trn_loader, val_loader
    torch.cuda.empty_cache()


학습 데이터: 1570개 샘플
 클래스 분포: {0: 100, 1: 46, 2: 100, 3: 100, 4: 100, 5: 100, 6: 100, 7: 100, 8: 100, 9: 100, 10: 100, 11: 100, 12: 100, 13: 74, 14: 50, 15: 100, 16: 100}



🚀 WandB 실험 시작!
📊 대시보드: https://wandb.ai/kimsunmin0227-hufs/document-classification-team/runs/pzrkpkjc
📋 실험명: efficientnet-b3-baseline-0906-1441

🎯 5-FOLD CROSS VALIDATION 시작


In [None]:
# =============================================================================
# 13. K-Fold Cross Validation Results Summary
# =============================================================================

print(f"\n{'='*60}")
print(" K-FOLD CROSS VALIDATION 최종 결과")
print(f"{'='*60}")

val_f1_scores = [result['best_val_f1'] for result in fold_results]
mean_f1 = np.mean(val_f1_scores)
std_f1 = np.std(val_f1_scores)

try:
    # wandb.run이 현재 활성화된 run을 가리킴
    if wandb.run is None:
        print(" 활성화된 run이 없어 새로운 summary run을 생성합니다.")
        active_run = wandb.init(
            project=PROJECT_NAME,
            name=f"SUMMARY-{EXPERIMENT_NAME}-{datetime.now().strftime('%m%d-%H%M')}",
            config=config,
            tags=["summary", "cv-results", model_name],
            group="k-fold-experiment",
            job_type="summary",
            reinit=True
        )
    else:
        print(" 기존 run을 사용합니다.")
        active_run = wandb.run
        
except Exception as e:
    print(f" Run 상태 확인 중 에러: {e}")
    # 새로운 run 생성
    active_run = wandb.init(
        project=PROJECT_NAME,
        name=f"SUMMARY-{EXPERIMENT_NAME}-{datetime.now().strftime('%m%d-%H%M')}",
        config=config,
        tags=["summary", "cv-results", model_name],
        group="k-fold-experiment",
        job_type="summary",
        reinit=True
    )

# CV 요약 테이블 생성
fold_table = wandb.Table(columns=[
    "Fold", "Best_Val_F1", "Final_Train_F1", "Train_Samples", 
    "Val_Samples", "Epochs_Trained", "Early_Stopped"
])

for result in fold_results:
    fold_table.add_data(
        result['fold'], 
        result['best_val_f1'], 
        result['final_train_f1'],
        result['train_samples'], 
        result['val_samples'],
        result['epochs_trained'],
        result['early_stopped']
    )

# 안전한 로깅
try:
    active_run.log({
        "cv_results/mean_f1": mean_f1,
        "cv_results/std_f1": std_f1,
        "cv_results/best_fold_f1": max(val_f1_scores),
        "cv_results/worst_fold_f1": min(val_f1_scores),
        "cv_results/f1_range": max(val_f1_scores) - min(val_f1_scores),
        "cv_results/fold_results_table": fold_table,
        "cv_results/n_folds": N_FOLDS,
        "cv_results/total_epochs": sum([r['epochs_trained'] for r in fold_results]),
        "cv_results/avg_epochs_per_fold": np.mean([r['epochs_trained'] for r in fold_results]),
        "cv_results/early_stopped_folds": sum([r['early_stopped'] for r in fold_results])
    })
    
    # Fold별 성능 바차트 생성
    fold_performance_data = [[f"Fold {i+1}", score] for i, score in enumerate(val_f1_scores)]
    active_run.log({
        "cv_results/fold_performance_chart": wandb.plot.bar(
            wandb.Table(data=fold_performance_data, columns=["Fold", "F1_Score"]),
            "Fold", "F1_Score", 
            title="K-Fold Cross Validation Performance"
        )
    })
    
    print(" CV 결과 로깅 완료!")
    
except Exception as e:
    print(f" WandB 로깅 중 에러: {e}")
    print(" 결과를 콘솔에 출력합니다:")

# 어떤 경우든 콘솔에는 결과 출력
for result in fold_results:
    status = " Early Stopped" if result['early_stopped'] else " Completed"
    print(f"Fold {result['fold']}: {result['best_val_f1']:.4f} "
          f"({result['epochs_trained']} epochs) {status}")

print(f"\n 평균 CV F1: {mean_f1:.4f} ± {std_f1:.4f}")
print(f" 최고 Fold: {max(val_f1_scores):.4f}")
print(f" 최악 Fold: {min(val_f1_scores):.4f}")
print(f" 성능 범위: {max(val_f1_scores) - min(val_f1_scores):.4f}")



 FOLD 1/5


0,1
dataset/num_classes,▁
dataset/samples_per_fold,▁
dataset/total_samples,▁

0,1
dataset/num_classes,17
dataset/samples_per_fold,314
dataset/total_samples,1570


📊 Fold 1 Dashboard: https://wandb.ai/kimsunmin0227-hufs/document-classification-team/runs/2tjtce0l
Train samples: 1256, Validation samples: 314
 모델 학습 시작 - Fold 1

📈 Epoch 1/29


Loss: 6.0430, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 79/79 [00:20<00:00,  3.95it/s]
Val Loss: 6.6661: 100%|██████████| 20/20 [00:02<00:00,  7.15it/s]


 Epoch  1 | Train Loss: 5.5644 | Train F1: 0.0499 | Val Loss: 5.3148 | Val F1: 0.0848 | LR: 1.33e-04
🎉 새로운 최고 성능! F1: 0.0848

📈 Epoch 2/29


Loss: 4.7266, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 79/79 [00:14<00:00,  5.63it/s]
Val Loss: 6.4441: 100%|██████████| 20/20 [00:02<00:00,  9.99it/s]


 Epoch  2 | Train Loss: 5.3612 | Train F1: 0.0506 | Val Loss: 4.8685 | Val F1: 0.1196 | LR: 1.33e-04
🎉 새로운 최고 성능! F1: 0.1196

📈 Epoch 3/29


Loss: 3.8223, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 79/79 [00:13<00:00,  5.66it/s]
Val Loss: 6.2088: 100%|██████████| 20/20 [00:01<00:00, 10.04it/s]


 Epoch  3 | Train Loss: 5.0298 | Train F1: 0.0837 | Val Loss: 4.8058 | Val F1: 0.1404 | LR: 1.31e-04
🎉 새로운 최고 성능! F1: 0.1404

📈 Epoch 4/29


Loss: 4.5508, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 79/79 [00:13<00:00,  5.64it/s]
Val Loss: 5.8374: 100%|██████████| 20/20 [00:01<00:00, 10.61it/s]


 Epoch  4 | Train Loss: 4.8693 | Train F1: 0.0790 | Val Loss: 4.6522 | Val F1: 0.1316 | LR: 1.29e-04

📈 Epoch 5/29


Loss: 3.6504, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 79/79 [00:13<00:00,  5.69it/s]
Val Loss: 5.1404: 100%|██████████| 20/20 [00:01<00:00, 10.01it/s]


 Epoch  5 | Train Loss: 4.5866 | Train F1: 0.1068 | Val Loss: 4.3914 | Val F1: 0.1477 | LR: 1.27e-04
🎉 새로운 최고 성능! F1: 0.1477

📈 Epoch 6/29


Loss: 4.1328, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 79/79 [00:13<00:00,  5.70it/s]
Val Loss: 5.1456: 100%|██████████| 20/20 [00:02<00:00,  9.49it/s]


 Epoch  6 | Train Loss: 4.4928 | Train F1: 0.1131 | Val Loss: 4.3188 | Val F1: 0.1324 | LR: 1.24e-04

📈 Epoch 7/29


Loss: 4.0547, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 79/79 [00:13<00:00,  5.69it/s]
Val Loss: 4.6002: 100%|██████████| 20/20 [00:02<00:00, 10.00it/s]


 Epoch  7 | Train Loss: 4.3326 | Train F1: 0.1251 | Val Loss: 4.0417 | Val F1: 0.1553 | LR: 1.20e-04
🎉 새로운 최고 성능! F1: 0.1553

📈 Epoch 8/29


Loss: 3.8828, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 79/79 [00:13<00:00,  5.67it/s]
Val Loss: 4.5807: 100%|██████████| 20/20 [00:02<00:00,  9.93it/s]


 Epoch  8 | Train Loss: 4.2297 | Train F1: 0.1406 | Val Loss: 4.2853 | Val F1: 0.1144 | LR: 1.15e-04

📈 Epoch 9/29


Loss: 4.4297, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 79/79 [00:13<00:00,  5.70it/s]
Val Loss: 4.1792: 100%|██████████| 20/20 [00:02<00:00,  9.86it/s]


 Epoch  9 | Train Loss: 4.2773 | Train F1: 0.1368 | Val Loss: 3.7489 | Val F1: 0.1984 | LR: 1.10e-04
🎉 새로운 최고 성능! F1: 0.1984

📈 Epoch 10/29


Loss: 4.8477, Mixup: True, Cutout: False, RandomCrop: False:   4%|▍         | 3/79 [00:06<02:50,  2.25s/it]
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised exception.
socket.send() raised excepti

KeyboardInterrupt: 

socket.send() raised exception.
socket.send() raised exception.


socket.send() raised exception.


In [None]:

# =============================================================================
# 14. Ensemble Models Preparation
# =============================================================================

# 5-Fold 앙상블 모델 준비
ensemble_models = []
print(f"\n🔧 앙상블 모델 준비 중...")

for i, state_dict in enumerate(fold_models):
    fold_model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    fold_model.load_state_dict(state_dict)
    fold_model.eval()
    ensemble_models.append(fold_model)
    print(f"Fold {i+1} 모델 로드 완료")

print(f" 총 {len(ensemble_models)}개 모델로 앙상블 구성")

try:
    if wandb.run is not None:
        wandb.run.log({
            "ensemble/num_models": len(ensemble_models),
            "ensemble/model_architecture": model_name,
            "ensemble/ensemble_type": "simple_average"
        })
    else:
        print("📊 앙상블 정보:")
        print(f"  - 모델 개수: {len(ensemble_models)}")
        print(f"  - 아키텍처: {model_name}")
        print(f"  - 앙상블 타입: simple_average")
except Exception as e:
    print(f"⚠️ 앙상블 정보 로깅 실패: {e}")


In [None]:


# =============================================================================
# 15. TTA (Test Time Augmentation) Setup
# =============================================================================

# Temperature Scaling 클래스 정의
class TemperatureScaling(nn.Module):
    def __init__(self, temperature=1.5):
        super().__init__()
        self.temperature = nn.Parameter(torch.ones(1) * temperature)
    
    def forward(self, logits):
        return logits / self.temperature

print(f"\n TTA (Test Time Augmentation) 설정...")

# Essential TTA transforms
essential_tta_transforms = [
    # 원본
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
    # 90도 회전들
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.Rotate(limit=[90, 90], p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.Rotate(limit=[180, 180], p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.Rotate(limit=[-90, -90], p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
    # 밝기 개선
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.RandomBrightnessContrast(brightness_limit=[0.3, 0.3], contrast_limit=[0.3, 0.3], p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
]

print(f"TTA 변환 {len(essential_tta_transforms)}개 준비 완료")

try:
    if wandb.run is not None:
        wandb.run.log({
            "tta/num_transforms": len(essential_tta_transforms),
            "tta/transforms_used": ["original", "rot_90", "rot_180", "rot_270", "brightness"],
            "tta/batch_size": 64  # TTA용 배치 크기
        })
    else:
        print("📊 TTA 설정 정보:")
        print(f"  - 변형 개수: {len(essential_tta_transforms)}")
        print(f"  - 변형 종류: original, rot_90, rot_180, rot_270, brightness")
        print(f"  - 배치 크기: 64")
except Exception as e:
    print(f"⚠️ TTA 설정 로깅 실패: {e}")
    print("📊 TTA 설정 정보:")
    print(f"  - 변형 개수: {len(essential_tta_transforms)}")
    print(f"  - 배치 크기: 64")

In [None]:
# =============================================================================
# 16. TTA Dataset and DataLoader
# =============================================================================

class TTAImageDataset(Dataset):
    def __init__(self, data, path, transforms):
        if isinstance(data, str):
            self.df = pd.read_csv(data).values
        else:
            self.df = data.values
        self.path = path
        self.transforms = transforms  # 여러 transform을 리스트로 받음

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        
        # 모든 transform을 적용한 결과를 리스트로 반환
        augmented_images = []
        for transform in self.transforms:
            aug_img = transform(image=img)['image']
            augmented_images.append(aug_img)
        
        return augmented_images, target

# TTA Dataset 생성
tta_dataset = TTAImageDataset(
    "../data/sample_submission.csv",
    "../data/test/",
    essential_tta_transforms
)

# TTA DataLoader (배치 크기를 줄여서 메모리 절약)
tta_loader = DataLoader(
    tta_dataset,
    batch_size=64,  # TTA는 메모리를 많이 사용하므로 배치 크기 줄임
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True
)

print(f" TTA Dataset: {len(tta_dataset)}개 테스트 샘플")


 K-FOLD CROSS VALIDATION 최종 결과
 활성화된 run이 없어 새로운 summary run을 생성합니다.


 CV 결과 로깅 완료!
Fold 1: 0.9143 (28 epochs)  Early Stopped
Fold 2: 0.9087 (27 epochs)  Early Stopped
Fold 3: 0.9319 (36 epochs)  Early Stopped
Fold 4: 0.9083 (33 epochs)  Early Stopped
Fold 5: 0.9321 (49 epochs)  Early Stopped

 평균 CV F1: 0.9191 ± 0.0108
 최고 Fold: 0.9321
 최악 Fold: 0.9083
 성능 범위: 0.0238


In [None]:

# =============================================================================
# 17. Ensemble + TTA Inference with WandB Logging
# =============================================================================

def ensemble_tta_inference_with_logging(models, loader, transforms, confidence_threshold=0.9):
    """5-Fold 모델 앙상블 + TTA 추론 with WandB 로깅"""
    all_predictions = []
    all_confidences = []
    
    # TTA 진행상황 로깅을 위한 테이블
    tta_progress = wandb.Table(columns=["Batch", "Avg_Confidence", "Low_Conf_Count", "High_Conf_Count"])
    
    # Temperature scaling 초기화
    temp_scaling = TemperatureScaling().to(device)
    
    print(f"앙상블 TTA 추론 시작...")
    print(f"{len(models)}개 모델 × {len(transforms)}개 TTA 변형 = {len(models) * len(transforms)}개 예측 평균")
    
    start_time = time.time()
    
    for batch_idx, (images_list, _) in enumerate(tqdm(loader, desc="Ensemble TTA")):
        batch_size = images_list[0].size(0)
        ensemble_probs = torch.zeros(batch_size, 17).to(device)
        
        # 각 fold 모델별 예측
        for model_idx, model in enumerate(models):
            model.eval()
            with torch.no_grad():
                # 각 TTA 변형별 예측
                for tta_idx, images in enumerate(images_list):
                    images = images.to(device)
                    preds = model(images)
                    
                    # Temperature scaling 적용
                    preds = temp_scaling(preds)
                    probs = torch.softmax(preds, dim=1)
                    
                    # 앙상블 확률에 누적 (평균)
                    ensemble_probs += probs / (len(models) * len(images_list))
        
        # 신뢰도 계산
        max_probs = torch.max(ensemble_probs, dim=1)[0]
        batch_confidences = max_probs.cpu().numpy()
        all_confidences.extend(batch_confidences)
        
        final_preds = torch.argmax(ensemble_probs, dim=1)
        all_predictions.extend(final_preds.cpu().numpy())
        
        # 배치별 신뢰도 분석
        high_conf_count = np.sum(batch_confidences >= confidence_threshold)
        low_conf_count = batch_size - high_conf_count
        avg_confidence = np.mean(batch_confidences)
        
        # 진행상황 테이블에 추가
        tta_progress.add_data(batch_idx, avg_confidence, low_conf_count, high_conf_count)
        
        # 배치별 상세 로깅 (20배치마다)
        if batch_idx % 20 == 0:
            elapsed_time = time.time() - start_time
            estimated_total = elapsed_time * len(loader) / (batch_idx + 1)
            remaining_time = estimated_total - elapsed_time
            
            wandb.log({
                "tta_progress/batch": batch_idx,
                "tta_progress/avg_confidence": avg_confidence,
                "tta_progress/high_confidence_ratio": high_conf_count / batch_size,
                "tta_progress/low_confidence_count": low_conf_count,
                "tta_progress/elapsed_time_min": elapsed_time / 60,
                "tta_progress/estimated_remaining_min": remaining_time / 60,
                "tta_progress/samples_processed": (batch_idx + 1) * batch_size,
            })
    
    total_time = time.time() - start_time
    
    # TTA 최종 결과 로깅
    final_avg_confidence = np.mean(all_confidences)
    confidence_std = np.std(all_confidences)
    high_conf_samples = np.sum(np.array(all_confidences) >= confidence_threshold)
    
    wandb.log({
        "tta_results/total_time_min": total_time / 60,
        "tta_results/samples_per_second": len(all_predictions) / total_time,
        "tta_results/final_avg_confidence": final_avg_confidence,
        "tta_results/confidence_std": confidence_std,
        "tta_results/high_confidence_samples": high_conf_samples,
        "tta_results/high_confidence_ratio": high_conf_samples / len(all_predictions),
        "tta_results/total_predictions": len(all_predictions),
        "tta_results/confidence_histogram": wandb.Histogram(all_confidences),
        "tta_results/progress_table": tta_progress
    })
    
    print(f"\n 앙상블 TTA 추론 완료!")
    print(f"총 소요시간: {total_time/60:.1f}분")
    print(f" 평균 신뢰도: {final_avg_confidence:.4f} ± {confidence_std:.4f}")
    print(f" 고신뢰도 샘플: {high_conf_samples}/{len(all_predictions)} ({high_conf_samples/len(all_predictions)*100:.1f}%)")
    
    return all_predictions, all_confidences

# 앙상블 TTA 실행
print(f"\n{'='*60}")
print(" 최종 추론 - 앙상블 + TTA")
print(f"{'='*60}")

tta_predictions, confidences = ensemble_tta_inference_with_logging(
    models=ensemble_models, 
    loader=tta_loader, 
    transforms=essential_tta_transforms,
    confidence_threshold=0.9
)



🔧 앙상블 모델 준비 중...
Fold 1 모델 로드 완료
Fold 2 모델 로드 완료
Fold 3 모델 로드 완료
Fold 4 모델 로드 완료
Fold 5 모델 로드 완료
 총 5개 모델로 앙상블 구성


In [None]:
# =============================================================================
# 18. Final Results and Submission
# =============================================================================

print(f"\n 최종 결과 정리 중...")

# TTA 결과로 submission 파일 생성
tta_pred_df = pd.DataFrame(tta_dataset.df, columns=['ID', 'target'])
tta_pred_df['target'] = tta_predictions

# 기존 submission과 동일한 순서인지 확인
sample_submission_df = pd.read_csv("../data/sample_submission.csv")
assert (sample_submission_df['ID'] == tta_pred_df['ID']).all(), "ID 순서 불일치!"

# 예측 분포 분석
pred_distribution = tta_pred_df['target'].value_counts().sort_index()
pred_table = wandb.Table(columns=["Class", "Count", "Percentage"])

print(f"\n📊 예측 결과 분포:")
for class_id in range(17):
    count = pred_distribution.get(class_id, 0)
    percentage = count / len(tta_pred_df) * 100
    pred_table.add_data(class_id, count, percentage)
    print(f"Class {class_id:2d}: {count:4d} ({percentage:5.1f}%)")

# 신뢰도 분석
confidence_bins = [0.5, 0.7, 0.8, 0.9, 0.95, 1.0]
confidence_analysis = {}
for i, threshold in enumerate(confidence_bins):
    if i == 0:
        count = np.sum(np.array(confidences) >= threshold)
    else:
        prev_threshold = confidence_bins[i-1]
        count = np.sum((np.array(confidences) >= prev_threshold) & (np.array(confidences) < threshold))
    confidence_analysis[f"conf_{threshold}"] = count

# 최종 결과 로깅
try:
    if wandb.run is not None:
        wandb.run.log({
            "final_results/total_predictions": len(tta_predictions),
            "final_results/unique_classes_predicted": len(np.unique(tta_predictions)),
            "final_results/prediction_distribution_table": pred_table,
            "final_results/avg_confidence": np.mean(confidences),
            "final_results/median_confidence": np.median(confidences),
            "final_results/min_confidence": np.min(confidences),
            "final_results/max_confidence": np.max(confidences),
            "final_results/confidence_distribution": wandb.Histogram(confidences),
            **confidence_analysis
        })
        print("최종 결과 WandB 로깅 완료!")
    else:
        print("활성화된 run이 없어 로깅을 건너뜁니다.")
except Exception as e:
    print(f"WandB 로깅 중 에러: {e}")

# 콘솔 출력은 항상 실행
print(f"총 예측 수: {len(tta_predictions)}")
print(f"예측된 클래스 수: {len(np.unique(tta_predictions))}")
print(f"평균 신뢰도: {np.mean(confidences):.4f}")
print(f"신뢰도 범위: {np.min(confidences):.4f} ~ {np.max(confidences):.4f}")


# 예측 분포 바차트
try:
    if wandb.run is not None:
        pred_dist_data = [[f"Class_{i}", pred_distribution.get(i, 0)] for i in range(17)]
        wandb.run.log({
            "final_results/prediction_distribution_chart": wandb.plot.bar(
                wandb.Table(data=pred_dist_data, columns=["Class", "Count"]),
                "Class", "Count", 
                title="Final Prediction Distribution"
            )
        })
        print("예측 분포 차트 로깅 완료!")
    else:
        print("차트 로깅을 건너뜁니다.")
except Exception as e:
    print(f"차트 로깅 중 에러: {e}")

# 결과 저장
output_path = "../output/choice4.csv"
tta_pred_df.to_csv(output_path, index=False)

# 결과 파일을 WandB 아티팩트로 저장
artifact = wandb.Artifact(
    name="final_predictions",
    type="predictions",
    description=f"Final ensemble predictions with {N_FOLDS}-fold CV + TTA"
)
artifact.add_file(output_path)

try:
    if wandb.run is not None:
        wandb.run.log_artifact(artifact)
        print("실험 요약 로깅 완료!")
    else:
        print("활성화된 run이 없어 실험 요약 로깅을 건너뜁니다.")
except Exception as e:
    print(f"실험 요약 로깅 중 에러: {e}")


print(f"\n 최종 결과 저장 완료!")
print(f" 파일 위치: {output_path}")
print(f" 총 예측 수: {len(tta_predictions)}")


 TTA (Test Time Augmentation) 설정...
TTA 변환 5개 준비 완료


In [None]:
# =============================================================================
# 19. Experiment Summary and Cleanup
# =============================================================================

# 실험 요약 생성
experiment_summary = {
    "experiment_name": main_run.name,
    "model_architecture": model_name,
    "image_size": img_size,
    "cv_strategy": f"{N_FOLDS}-Fold StratifiedKFold",
    "cv_mean_f1": mean_f1,
    "cv_std_f1": std_f1,
    "cv_best_fold": max(val_f1_scores),
    "ensemble_models": len(ensemble_models),
    "tta_transforms": len(essential_tta_transforms),
    "total_training_time_min": sum([r['epochs_trained'] for r in fold_results]) * 2,  # 추정치
    "avg_prediction_confidence": np.mean(confidences),
    "high_confidence_predictions": np.sum(np.array(confidences) >= 0.9),
    "experiment_tags": ["baseline", "efficientnet-b3", "k-fold-cv", "tta", "ensemble"]
}

# 실험 요약
try:
    if wandb.run is not None:
        wandb.run.log({"experiment_summary": experiment_summary})
        print("실험 요약 로깅 완료!")
    else:
        print("활성화된 run이 없어 실험 요약 로깅을 건너뜁니다.")
except Exception as e:
    print(f"실험 요약 로깅 중 에러: {e}")


# 마지막 상태 업데이트
try:
    if wandb.run is not None:
        wandb.run.log({
            "status": "completed",
            "completion_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "total_runtime_hours": 0  # start_time 속성 문제로 일단 0으로 설정
        })
        print("최종 상태 업데이트 완료!")
    else:
        print("활성화된 run이 없어 상태 업데이트를 건너뜁니다.")
except Exception as e:
    print(f"상태 업데이트 중 에러: {e}")

print(f"\n실험 완료 시간: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

print(f"\n{'='*60}")
print("실험 완료!")
print(f"{'='*60}")

print(f" K-Fold CV 결과: {mean_f1:.4f} ± {std_f1:.4f}")
print(f" 최고 성능 Fold: {max(val_f1_scores):.4f}")
print(f" 앙상블 모델: {len(ensemble_models)}개")
print(f" TTA 변형: {len(essential_tta_transforms)}개")
print(f" 평균 예측 신뢰도: {np.mean(confidences):.4f}")
print(f" WandB 대시보드: {main_run.url}")

# Sample predictions 출력
print(f"\n 예측 결과 샘플:")
print(tta_pred_df.head(10))

# 메인 run 종료
main_run.finish()

print(f"\n 모든 작업 완료!")
print(f" 결과 파일: {output_path}")
print(f" WandB에서 전체 실험 결과를 확인하세요!")

# 메모리 정리
del ensemble_models
torch.cuda.empty_cache()

 TTA Dataset: 3140개 테스트 샘플
