In [None]:
# EfficientNet-B5 문서 이미지 분류 모델
# Train 데이터: 1570장 (깨끗한 이미지)
# Test 데이터: 3140장 (회전, 플립, 노이즈가 심한 이미지)
# 클래스: 17개 (불균형 데이터)

import os
import random
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
import wandb
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

In [None]:
# ================================
# 4. 데이터셋 클래스
# ================================
class DocumentDataset(Dataset):
    def __init__(self, df, img_dir, transform=None):
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.img_dir, row['ID'])
        image = Image.open(img_path).convert('RGB')
        image = np.array(image)
        
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        
        label = row['target']
        return image, label

In [None]:
# ================================
# 5. 강력한 데이터 증강 (Train)
# ================================
def get_train_transform(img_size):
    return A.Compose([
        A.Resize(height=img_size, width=img_size),
        # 노이즈 및 블러 (test 데이터 특성 반영)
        A.OneOf([
            A.GaussNoise(var_limit=(10.0, 800.0), p=0.75),
            A.GaussianBlur(blur_limit=(1, 7), p=0.5)
        ], p=0.75),
        # 회전 및 플립 (test 데이터 특성 반영)
        A.RandomRotate90(p=0.5),
        A.HorizontalFlip(p=0.75),
        A.Rotate(limit=30, p=0.75),
        A.Transpose(p=0.5),
        # 색상 및 밝기 변화
        A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.5),
        A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
        A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
        A.CLAHE(clip_limit=2.0, tile_grid_size=(8, 8), p=0.5),
        # 기하학적 변형
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=30, p=0.25),
        A.ElasticTransform(alpha=1, sigma=30, alpha_affine=30, p=0.5),
        A.OpticalDistortion(p=0.5),
        # Dropout
        A.CoarseDropout(max_holes=6, max_height=32, max_width=32, p=0.5),
        # 모션 블러
        A.MotionBlur(blur_limit=5, p=0.5),
        # 정규화
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

In [None]:
# ================================
# 6. 검증/테스트 증강 (최소한의 변형)
# ================================
def get_valid_transform(img_size):
    return A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])


In [None]:
# ================================
# 6. 검증/테스트 증강 (최소한의 변형)
# ================================
def get_valid_transform(img_size):
    return A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])
