# **📄 Document type classification baseline code**
> 문서 타입 분류 대회에 오신 여러분 환영합니다! 🎉     
> 아래 baseline에서는 ResNet 모델을 로드하여, 모델을 학습 및 예측 파일 생성하는 프로세스에 대해 알아보겠습니다.

## Contents
- Prepare Environments
- Import Library & Define Functions
- Hyper-parameters
- Load Data
- Train Model
- Inference & Save File


## 1. Prepare Environments

* 데이터 로드를 위한 구글 드라이브를 마운트합니다.
* 필요한 라이브러리를 설치합니다.

In [1]:
# 필요한 라이브러리를 설치합니다.
!pip install timm
!pip install matplotlib
!pip install seaborn
!pip install optuna

[0m

## 2. Import Library & Define Functions
* 학습 및 추론에 필요한 라이브러리를 로드합니다.
* 학습 및 추론에 필요한 함수와 클래스를 정의합니다.

In [None]:
import os
import time
import random
import copy

import optuna, math
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.cuda.amp import autocast, GradScaler  # Mixed Precision용

from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, StratifiedKFold

def mixup_data(x, y, alpha=1.0):
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1
    batch_size = x.size()[0]
    index = torch.randperm(batch_size).cuda()
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam


from collections import Counter
import warnings
warnings.filterwarnings('ignore')

In [3]:
# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

In [4]:
# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, data, path, transform=None):
        # CSV 파일이면 읽고, DataFrame이면 그대로 사용
        if isinstance(data, str):
            self.df = pd.read_csv(data).values
        else:
            self.df = data.values  # DataFrame을 numpy array로 변환
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [5]:
# one epoch 학습을 위한 함수입니다.
def train_one_epoch(loader, model, optimizer, loss_fn, device):
    scaler = GradScaler()  # Mixed Precision용
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)
        
        # Cutmix/Mixup 적용 (30% 확률)
        if random.random() < 0.3:
            mixed_x, y_a, y_b, lam = mixup_data(image, targets, alpha=1.0)
            with autocast(): preds = model(mixed_x)
            loss = lam * loss_fn(preds, y_a) + (1 - lam) * loss_fn(preds, y_b)
        else:
            with autocast(): preds = model(image)
            loss = loss_fn(preds, targets)

        model.zero_grad(set_to_none=True)

        scaler.scale(loss).backward()  # Mixed Precision용
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        scaler.step(optimizer); scaler.update()  # Mixed Precision용

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

    return ret

In [6]:
# validation을 위한 함수 추가
def validate_one_epoch(loader, model, loss_fn, device):
    """
    한 에폭 검증을 수행하는 함수
    - model.eval()로 모델을 평가 모드로 전환
    - torch.no_grad()로 gradient 계산 비활성화하여 메모리 절약
    - 검증 데이터에 대한 loss, accuracy, f1 score 계산
    """
    model.eval()  # 모델을 평가 모드로 전환 (dropout, batchnorm 비활성화)
    val_loss = 0
    preds_list = []
    targets_list = []
    
    with torch.no_grad():  # gradient 계산 비활성화로 메모리 절약
        pbar = tqdm(loader, desc="Validating")
        for image, targets in pbar:
            image = image.to(device)
            targets = targets.to(device)
            
            preds = model(image)  # 모델 예측
            loss = loss_fn(preds, targets)  # 손실 계산
            
            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())  # 예측 클래스 저장
            targets_list.extend(targets.detach().cpu().numpy())  # 실제 클래스 저장
            
            pbar.set_description(f"Val Loss: {loss.item():.4f}")
    
    val_loss /= len(loader)  # 평균 손실 계산
    val_acc = accuracy_score(targets_list, preds_list)  # 정확도 계산
    val_f1 = f1_score(targets_list, preds_list, average='macro')  # Macro F1 계산 (대회 평가지표)
    
    ret = {
        "val_loss": val_loss,
        "val_acc": val_acc,
        "val_f1": val_f1,
    }
    
    return ret

## 3. Hyper-parameters
* 학습 및 추론에 필요한 하이퍼파라미터들을 정의합니다.

In [7]:
# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# data config
data_path = '../data/'

# model config
model_name = 'efficientnet_b3' # 'resnet50' 'efficientnet-b0', ...

# training config
img_size = 384
LR = 5e-4
EPOCHS = 10
BATCH_SIZE = 32
num_workers = 30

In [8]:
# Optuna를 사용한 하이퍼파라미터 튜닝 (선택적 실행)
USE_OPTUNA = False  # True로 바꾸면 튜닝 실행

if USE_OPTUNA:
    def objective(trial):
        lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
        
        # 간단한 3-fold CV로 빠른 평가
        skf_simple = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
        fold_scores = []
        
        for fold, (train_idx, val_idx) in enumerate(skf_simple.split(train_df, train_df['target'])):
            # 모델 생성
            model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
            optimizer = Adam(model.parameters(), lr=lr)
            loss_fn = nn.CrossEntropyLoss()
            
            # 간단한 2 epoch 학습
            for epoch in range(2):
                train_ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device)
            
            val_ret = validate_one_epoch(val_loader, model, loss_fn, device)
            fold_scores.append(val_ret['val_f1'])
        
        return np.mean(fold_scores)
    
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=10)
    
    # 최적 파라미터 적용
    LR = study.best_params['lr']
    BATCH_SIZE = study.best_params['batch_size']
    print(f"Best params: {study.best_params}")

## 4. Load Data
* 학습, 테스트 데이터셋과 로더를 정의합니다.

In [9]:
# augmentation을 위한 transform 코드
trn_transform = A.Compose([
    # 비율 보존 리사이징 (핵심 개선)
    A.LongestMaxSize(max_size=img_size),
    A.PadIfNeeded(min_height=img_size, min_width=img_size, 
                  border_mode=0, value=0),
    
    # 문서 특화 회전 (정확한 90도 배수)
    A.OneOf([
        A.Rotate(limit=[90,90], p=1.0),
        A.Rotate(limit=[180,180], p=1.0),
        A.Rotate(limit=[270,270], p=1.0),
    ], p=0.6),
    
    # 테스트 특화 강화 증강
    A.OneOf([
        A.MotionBlur(blur_limit=7, p=1.0),
        A.GaussianBlur(blur_limit=7, p=1.0),
    ], p=0.9),
    
    A.RandomBrightnessContrast(
        brightness_limit=0.3, 
        contrast_limit=0.3, 
        p=0.8
    ),
    A.GaussNoise(var_limit=(30.0, 100.0), p=0.7),
    A.HorizontalFlip(p=0.5),
    
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.LongestMaxSize(max_size=img_size),
    A.PadIfNeeded(min_height=img_size, min_width=img_size, 
                  border_mode=0, value=0),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [10]:
# Optuna 튜닝 (선택적 실행)
USE_OPTUNA = False  # True로 바꾸면 튜닝 실행

if USE_OPTUNA:
    # 위의 objective 함수와 study 코드
    pass

# K-Fold 적용

In [11]:
# K-Fold 설정
N_FOLDS = 5  # 5-fold로 설정 (데이터가 적으므로)
skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=42)

# 클래스별 최소 샘플 보장 확인
# for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df['target'])):
#     assert len(np.unique(train_df.iloc[val_idx]['target'])) == 17

# 전체 학습 데이터 로드
train_df = pd.read_csv("../data/train.csv")

# K-Fold 결과를 저장할 리스트
fold_results = []
fold_models = []  # 각 fold의 최고 성능 모델을 저장

print(f"Starting {N_FOLDS}-Fold Cross Validation...")

# LR = best_params['lr']
# BATCH_SIZE = best_params['batch_size']

# K-Fold Cross Validation 시작
for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df['target'])):
    print(f"\n{'='*50}")
    print(f"FOLD {fold + 1}/{N_FOLDS}")
    print(f"{'='*50}")
    
    current_model = model_name
    
    # 현재 fold의 train/validation 데이터 분할
    train_fold_df = train_df.iloc[train_idx].reset_index(drop=True)
    val_fold_df = train_df.iloc[val_idx].reset_index(drop=True)
    
    # 현재 fold의 Dataset 생성
    trn_dataset = ImageDataset(
        train_fold_df,
        "../data/train/",
        transform=trn_transform
    )
    
    val_dataset = ImageDataset(
        val_fold_df,
        "../data/train/",
        transform=tst_transform  # 검증에는 증강 적용 안함
    )
    
    # 현재 fold의 DataLoader 생성
    trn_loader = DataLoader(
        trn_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True,
        drop_last=False
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )
    
    print(f"Train samples: {len(trn_dataset)}, Validation samples: {len(val_dataset)}")
    
    # 모델 초기화 (각 fold마다 새로운 모델)
    model = timm.create_model(
        current_model,
        pretrained=True,
        num_classes=17
    ).to(device)
    
    loss_fn = nn.CrossEntropyLoss(label_smoothing=0.2)  # Label Smoothing 적용
    optimizer = Adam(model.parameters(), lr=LR)
    
    # Learning Rate Scheduler 추가
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)
    
    # 현재 fold의 최고 성능 추적
    best_val_f1 = 0.0
    best_model = None
    
    # 현재 fold 학습
    for epoch in range(EPOCHS):
        # Training
        train_ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device)
        
        # Validation
        val_ret = validate_one_epoch(val_loader, model, loss_fn, device)
        
        # Scheduler step 추가
        scheduler.step()
        
        print(f"Epoch {epoch+1:2d} | "
              f"Train Loss: {train_ret['train_loss']:.4f} | "
              f"Train F1: {train_ret['train_f1']:.4f} | "
              f"Val Loss: {val_ret['val_loss']:.4f} | "
              f"Val F1: {val_ret['val_f1']:.4f}")
        
        # 최고 성능 모델 저장
        if val_ret['val_f1'] > best_val_f1:
            best_val_f1 = val_ret['val_f1']
            best_model = copy.deepcopy(model.state_dict())
    
    # 현재 fold 결과 저장
    fold_results.append({
        'fold': fold + 1,
        'best_val_f1': best_val_f1,
        'train_samples': len(trn_dataset),
        'val_samples': len(val_dataset)
    })
    
    fold_models.append(best_model)
    
    print(f"Fold {fold + 1} Best Validation F1: {best_val_f1:.4f}")

# K-Fold 결과 요약
print(f"\n{'='*60}")
print("K-FOLD CROSS VALIDATION RESULTS")
print(f"{'='*60}")

val_f1_scores = [result['best_val_f1'] for result in fold_results]
mean_f1 = np.mean(val_f1_scores)
std_f1 = np.std(val_f1_scores)

for result in fold_results:
    print(f"Fold {result['fold']}: {result['best_val_f1']:.4f}")

print(f"\nMean CV F1: {mean_f1:.4f} ± {std_f1:.4f}")
print(f"Best single fold: {max(val_f1_scores):.4f}")

Starting 5-Fold Cross Validation...

FOLD 1/5
Train samples: 1256, Validation samples: 314


Loss: 1.5469: 100%|██████████| 40/40 [00:34<00:00,  1.16it/s]
Val Loss: 1.6831: 100%|██████████| 10/10 [00:05<00:00,  1.69it/s]


Epoch  1 | Train Loss: 2.2281 | Train F1: 0.4274 | Val Loss: 1.5607 | Val F1: 0.7654


Loss: 2.3125: 100%|██████████| 40/40 [00:15<00:00,  2.57it/s]
Val Loss: 1.3776: 100%|██████████| 10/10 [00:03<00:00,  2.71it/s]


Epoch  2 | Train Loss: 1.7285 | Train F1: 0.6058 | Val Loss: 1.3403 | Val F1: 0.8549


Loss: 1.6650: 100%|██████████| 40/40 [00:14<00:00,  2.67it/s]
Val Loss: 1.2994: 100%|██████████| 10/10 [00:03<00:00,  2.64it/s]


Epoch  3 | Train Loss: 1.5229 | Train F1: 0.7223 | Val Loss: 1.2749 | Val F1: 0.8773


Loss: 1.5918: 100%|██████████| 40/40 [00:16<00:00,  2.45it/s]
Val Loss: 1.3369: 100%|██████████| 10/10 [00:03<00:00,  2.77it/s]


Epoch  4 | Train Loss: 1.4246 | Train F1: 0.7466 | Val Loss: 1.2967 | Val F1: 0.8697


Loss: 1.3496: 100%|██████████| 40/40 [00:15<00:00,  2.60it/s]
Val Loss: 1.2454: 100%|██████████| 10/10 [00:03<00:00,  2.62it/s]


Epoch  5 | Train Loss: 1.4004 | Train F1: 0.7617 | Val Loss: 1.2226 | Val F1: 0.8990


Loss: 1.2988: 100%|██████████| 40/40 [00:15<00:00,  2.52it/s]
Val Loss: 1.2343: 100%|██████████| 10/10 [00:03<00:00,  2.70it/s]


Epoch  6 | Train Loss: 1.3674 | Train F1: 0.7956 | Val Loss: 1.1938 | Val F1: 0.9157


Loss: 1.1855: 100%|██████████| 40/40 [00:15<00:00,  2.62it/s]
Val Loss: 1.2197: 100%|██████████| 10/10 [00:03<00:00,  2.57it/s]


Epoch  7 | Train Loss: 1.3581 | Train F1: 0.8145 | Val Loss: 1.1828 | Val F1: 0.9222


Loss: 1.1924: 100%|██████████| 40/40 [00:15<00:00,  2.53it/s]
Val Loss: 1.2188: 100%|██████████| 10/10 [00:03<00:00,  2.57it/s]


Epoch  8 | Train Loss: 1.3048 | Train F1: 0.8013 | Val Loss: 1.1743 | Val F1: 0.9279


Loss: 1.3018: 100%|██████████| 40/40 [00:15<00:00,  2.61it/s]
Val Loss: 1.2276: 100%|██████████| 10/10 [00:03<00:00,  2.51it/s]


Epoch  9 | Train Loss: 1.2630 | Train F1: 0.7782 | Val Loss: 1.1690 | Val F1: 0.9264


Loss: 1.1299: 100%|██████████| 40/40 [00:15<00:00,  2.53it/s]
Val Loss: 1.2222: 100%|██████████| 10/10 [00:03<00:00,  2.63it/s]


Epoch 10 | Train Loss: 1.2157 | Train F1: 0.8804 | Val Loss: 1.1689 | Val F1: 0.9295
Fold 1 Best Validation F1: 0.9295

FOLD 2/5
Train samples: 1256, Validation samples: 314


Loss: 1.3828: 100%|██████████| 40/40 [00:16<00:00,  2.42it/s]
Val Loss: 1.5338: 100%|██████████| 10/10 [00:04<00:00,  2.49it/s]


Epoch  1 | Train Loss: 2.4043 | Train F1: 0.3535 | Val Loss: 1.6411 | Val F1: 0.6883


Loss: 1.9434: 100%|██████████| 40/40 [00:15<00:00,  2.53it/s]
Val Loss: 1.4140: 100%|██████████| 10/10 [00:04<00:00,  2.38it/s]


Epoch  2 | Train Loss: 1.7125 | Train F1: 0.6463 | Val Loss: 1.3792 | Val F1: 0.8270


Loss: 1.4248: 100%|██████████| 40/40 [00:15<00:00,  2.57it/s]
Val Loss: 1.3452: 100%|██████████| 10/10 [00:04<00:00,  2.43it/s]


Epoch  3 | Train Loss: 1.5677 | Train F1: 0.6541 | Val Loss: 1.3094 | Val F1: 0.8586


Loss: 1.2461: 100%|██████████| 40/40 [00:16<00:00,  2.49it/s]
Val Loss: 1.2181: 100%|██████████| 10/10 [00:03<00:00,  2.62it/s]


Epoch  4 | Train Loss: 1.4748 | Train F1: 0.7667 | Val Loss: 1.2255 | Val F1: 0.9186


Loss: 1.3906: 100%|██████████| 40/40 [00:16<00:00,  2.46it/s]
Val Loss: 1.2280: 100%|██████████| 10/10 [00:03<00:00,  2.51it/s]


Epoch  5 | Train Loss: 1.3490 | Train F1: 0.7797 | Val Loss: 1.2121 | Val F1: 0.8993


Loss: 1.9297: 100%|██████████| 40/40 [00:16<00:00,  2.48it/s]
Val Loss: 1.1197: 100%|██████████| 10/10 [00:03<00:00,  2.55it/s]


Epoch  6 | Train Loss: 1.4455 | Train F1: 0.6873 | Val Loss: 1.2019 | Val F1: 0.9465


Loss: 1.7070: 100%|██████████| 40/40 [00:15<00:00,  2.59it/s]
Val Loss: 1.1253: 100%|██████████| 10/10 [00:03<00:00,  2.57it/s]


Epoch  7 | Train Loss: 1.2848 | Train F1: 0.8409 | Val Loss: 1.1794 | Val F1: 0.9265


Loss: 1.5264: 100%|██████████| 40/40 [00:16<00:00,  2.44it/s]
Val Loss: 1.1353: 100%|██████████| 10/10 [00:04<00:00,  2.43it/s]


Epoch  8 | Train Loss: 1.3306 | Train F1: 0.7962 | Val Loss: 1.1679 | Val F1: 0.9416


Loss: 1.1553: 100%|██████████| 40/40 [00:15<00:00,  2.53it/s]
Val Loss: 1.1460: 100%|██████████| 10/10 [00:04<00:00,  2.49it/s]


Epoch  9 | Train Loss: 1.2267 | Train F1: 0.8724 | Val Loss: 1.1652 | Val F1: 0.9466


Loss: 1.2070: 100%|██████████| 40/40 [00:16<00:00,  2.49it/s]
Val Loss: 1.1390: 100%|██████████| 10/10 [00:04<00:00,  2.43it/s]


Epoch 10 | Train Loss: 1.2475 | Train F1: 0.8755 | Val Loss: 1.1636 | Val F1: 0.9410
Fold 2 Best Validation F1: 0.9466

FOLD 3/5
Train samples: 1256, Validation samples: 314


Loss: 1.7158: 100%|██████████| 40/40 [00:16<00:00,  2.45it/s]
Val Loss: 1.3697: 100%|██████████| 10/10 [00:04<00:00,  2.43it/s]


Epoch  1 | Train Loss: 2.2169 | Train F1: 0.4587 | Val Loss: 1.4751 | Val F1: 0.7866


Loss: 1.3184: 100%|██████████| 40/40 [00:15<00:00,  2.50it/s]
Val Loss: 1.3262: 100%|██████████| 10/10 [00:03<00:00,  2.51it/s]


Epoch  2 | Train Loss: 1.6199 | Train F1: 0.6752 | Val Loss: 1.3642 | Val F1: 0.8068


Loss: 1.1855: 100%|██████████| 40/40 [00:16<00:00,  2.47it/s]
Val Loss: 1.2290: 100%|██████████| 10/10 [00:04<00:00,  2.38it/s]


Epoch  3 | Train Loss: 1.4862 | Train F1: 0.7202 | Val Loss: 1.2628 | Val F1: 0.8893


Loss: 1.3555: 100%|██████████| 40/40 [00:15<00:00,  2.51it/s]
Val Loss: 1.2027: 100%|██████████| 10/10 [00:04<00:00,  2.38it/s]


Epoch  4 | Train Loss: 1.3786 | Train F1: 0.8487 | Val Loss: 1.2526 | Val F1: 0.8921


Loss: 1.3379: 100%|██████████| 40/40 [00:15<00:00,  2.50it/s]
Val Loss: 1.1709: 100%|██████████| 10/10 [00:04<00:00,  2.38it/s]


Epoch  5 | Train Loss: 1.4067 | Train F1: 0.8060 | Val Loss: 1.2165 | Val F1: 0.9178


Loss: 1.4619: 100%|██████████| 40/40 [00:16<00:00,  2.42it/s]
Val Loss: 1.2056: 100%|██████████| 10/10 [00:04<00:00,  2.29it/s]


Epoch  6 | Train Loss: 1.3974 | Train F1: 0.7497 | Val Loss: 1.2067 | Val F1: 0.9087


Loss: 1.4297: 100%|██████████| 40/40 [00:15<00:00,  2.55it/s]
Val Loss: 1.2003: 100%|██████████| 10/10 [00:03<00:00,  2.52it/s]


Epoch  7 | Train Loss: 1.3134 | Train F1: 0.8346 | Val Loss: 1.1921 | Val F1: 0.9048


Loss: 1.1016: 100%|██████████| 40/40 [00:16<00:00,  2.47it/s]
Val Loss: 1.1678: 100%|██████████| 10/10 [00:03<00:00,  2.50it/s]


Epoch  8 | Train Loss: 1.2778 | Train F1: 0.8174 | Val Loss: 1.1841 | Val F1: 0.9060


Loss: 2.1602: 100%|██████████| 40/40 [00:16<00:00,  2.47it/s]
Val Loss: 1.1956: 100%|██████████| 10/10 [00:03<00:00,  2.50it/s]


Epoch  9 | Train Loss: 1.3796 | Train F1: 0.7964 | Val Loss: 1.2019 | Val F1: 0.9027


Loss: 1.1660: 100%|██████████| 40/40 [00:16<00:00,  2.46it/s]
Val Loss: 1.1815: 100%|██████████| 10/10 [00:04<00:00,  2.27it/s]


Epoch 10 | Train Loss: 1.3556 | Train F1: 0.7886 | Val Loss: 1.1859 | Val F1: 0.9089
Fold 3 Best Validation F1: 0.9178

FOLD 4/5
Train samples: 1256, Validation samples: 314


Loss: 2.2520: 100%|██████████| 40/40 [00:17<00:00,  2.35it/s]
Val Loss: 1.5740: 100%|██████████| 10/10 [00:04<00:00,  2.44it/s]


Epoch  1 | Train Loss: 2.3267 | Train F1: 0.3879 | Val Loss: 1.6144 | Val F1: 0.7407


Loss: 1.3486: 100%|██████████| 40/40 [00:15<00:00,  2.53it/s]
Val Loss: 1.2607: 100%|██████████| 10/10 [00:03<00:00,  2.51it/s]


Epoch  2 | Train Loss: 1.5735 | Train F1: 0.7120 | Val Loss: 1.4060 | Val F1: 0.8252


Loss: 1.3809: 100%|██████████| 40/40 [00:16<00:00,  2.42it/s]
Val Loss: 1.1722: 100%|██████████| 10/10 [00:04<00:00,  2.45it/s]


Epoch  3 | Train Loss: 1.4667 | Train F1: 0.7786 | Val Loss: 1.2762 | Val F1: 0.8837


Loss: 1.7158: 100%|██████████| 40/40 [00:16<00:00,  2.42it/s]
Val Loss: 1.2001: 100%|██████████| 10/10 [00:04<00:00,  2.39it/s]


Epoch  4 | Train Loss: 1.4159 | Train F1: 0.8379 | Val Loss: 1.2767 | Val F1: 0.8872


Loss: 1.9590: 100%|██████████| 40/40 [00:15<00:00,  2.60it/s]
Val Loss: 1.1814: 100%|██████████| 10/10 [00:04<00:00,  2.45it/s]


Epoch  5 | Train Loss: 1.4674 | Train F1: 0.6411 | Val Loss: 1.2267 | Val F1: 0.9112


Loss: 1.1816: 100%|██████████| 40/40 [00:15<00:00,  2.58it/s]
Val Loss: 1.1401: 100%|██████████| 10/10 [00:04<00:00,  2.39it/s]


Epoch  6 | Train Loss: 1.3383 | Train F1: 0.7906 | Val Loss: 1.2097 | Val F1: 0.9056


Loss: 1.2471: 100%|██████████| 40/40 [00:15<00:00,  2.53it/s]
Val Loss: 1.1213: 100%|██████████| 10/10 [00:04<00:00,  2.33it/s]


Epoch  7 | Train Loss: 1.3360 | Train F1: 0.8606 | Val Loss: 1.1953 | Val F1: 0.8982


Loss: 1.0547: 100%|██████████| 40/40 [00:15<00:00,  2.53it/s]
Val Loss: 1.1073: 100%|██████████| 10/10 [00:04<00:00,  2.44it/s]


Epoch  8 | Train Loss: 1.2069 | Train F1: 0.8853 | Val Loss: 1.1922 | Val F1: 0.9029


Loss: 1.2480: 100%|██████████| 40/40 [00:16<00:00,  2.37it/s]
Val Loss: 1.1125: 100%|██████████| 10/10 [00:04<00:00,  2.38it/s]


Epoch  9 | Train Loss: 1.3070 | Train F1: 0.8092 | Val Loss: 1.1875 | Val F1: 0.8907


Loss: 1.7588: 100%|██████████| 40/40 [00:15<00:00,  2.51it/s]
Val Loss: 1.1094: 100%|██████████| 10/10 [00:04<00:00,  2.39it/s]


Epoch 10 | Train Loss: 1.2820 | Train F1: 0.8177 | Val Loss: 1.1895 | Val F1: 0.8940
Fold 4 Best Validation F1: 0.9112

FOLD 5/5
Train samples: 1256, Validation samples: 314


Loss: 1.4150: 100%|██████████| 40/40 [00:16<00:00,  2.47it/s]
Val Loss: 1.4376: 100%|██████████| 10/10 [00:03<00:00,  2.51it/s]


Epoch  1 | Train Loss: 2.1993 | Train F1: 0.4716 | Val Loss: 1.4808 | Val F1: 0.7621


Loss: 2.1602: 100%|██████████| 40/40 [00:16<00:00,  2.50it/s]
Val Loss: 1.4130: 100%|██████████| 10/10 [00:04<00:00,  2.45it/s]


Epoch  2 | Train Loss: 1.5823 | Train F1: 0.7301 | Val Loss: 1.3658 | Val F1: 0.8358


Loss: 1.3086: 100%|██████████| 40/40 [00:15<00:00,  2.53it/s]
Val Loss: 1.2221: 100%|██████████| 10/10 [00:04<00:00,  2.43it/s]


Epoch  3 | Train Loss: 1.4333 | Train F1: 0.7779 | Val Loss: 1.2970 | Val F1: 0.8682


Loss: 1.2568: 100%|██████████| 40/40 [00:15<00:00,  2.57it/s]
Val Loss: 1.2431: 100%|██████████| 10/10 [00:03<00:00,  2.50it/s]


Epoch  4 | Train Loss: 1.4377 | Train F1: 0.7373 | Val Loss: 1.2587 | Val F1: 0.8761


Loss: 1.2354: 100%|██████████| 40/40 [00:15<00:00,  2.50it/s]
Val Loss: 1.1887: 100%|██████████| 10/10 [00:04<00:00,  2.44it/s]


Epoch  5 | Train Loss: 1.4295 | Train F1: 0.7145 | Val Loss: 1.2503 | Val F1: 0.8976


Loss: 1.3115: 100%|██████████| 40/40 [00:16<00:00,  2.43it/s]
Val Loss: 1.1851: 100%|██████████| 10/10 [00:04<00:00,  2.45it/s]


Epoch  6 | Train Loss: 1.3089 | Train F1: 0.8216 | Val Loss: 1.2393 | Val F1: 0.8859


Loss: 1.2480: 100%|██████████| 40/40 [00:16<00:00,  2.43it/s]
Val Loss: 1.1497: 100%|██████████| 10/10 [00:03<00:00,  2.50it/s]


Epoch  7 | Train Loss: 1.3759 | Train F1: 0.7273 | Val Loss: 1.2314 | Val F1: 0.8859


Loss: 1.0781: 100%|██████████| 40/40 [00:15<00:00,  2.52it/s]
Val Loss: 1.1246: 100%|██████████| 10/10 [00:03<00:00,  2.51it/s]


Epoch  8 | Train Loss: 1.3341 | Train F1: 0.8036 | Val Loss: 1.2122 | Val F1: 0.8977


Loss: 1.1240: 100%|██████████| 40/40 [00:16<00:00,  2.48it/s]
Val Loss: 1.1368: 100%|██████████| 10/10 [00:04<00:00,  2.44it/s]


Epoch  9 | Train Loss: 1.2364 | Train F1: 0.9079 | Val Loss: 1.2124 | Val F1: 0.8942


Loss: 1.0654: 100%|██████████| 40/40 [00:16<00:00,  2.44it/s]
Val Loss: 1.1368: 100%|██████████| 10/10 [00:03<00:00,  2.56it/s]

Epoch 10 | Train Loss: 1.3375 | Train F1: 0.8043 | Val Loss: 1.2080 | Val F1: 0.9068
Fold 5 Best Validation F1: 0.9068

K-FOLD CROSS VALIDATION RESULTS
Fold 1: 0.9295
Fold 2: 0.9466
Fold 3: 0.9178
Fold 4: 0.9112
Fold 5: 0.9068

Mean CV F1: 0.9224 ± 0.0143
Best single fold: 0.9466





In [12]:
# print(f"\nMean CV F1: {mean_f1:.4f} ± {std_f1:.4f}")
# print(f"Best single fold: {max(val_f1_scores):.4f}")

# # 여기에 추가
# # K-Fold 완료 후 최고 성능 모델 로드
# best_fold_idx = np.argmax(val_f1_scores)
# model = timm.create_model(
#     model_name,
#     pretrained=True,
#     num_classes=17
# ).to(device)
# model.load_state_dict(fold_models[best_fold_idx])
# print(f"Using best model from Fold {best_fold_idx + 1} (F1: {max(val_f1_scores):.4f}) for TTA inference")

In [13]:
# 5-Fold 앙상블 모델 준비
ensemble_models = []
for i, state_dict in enumerate(fold_models):
    fold_model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    fold_model.load_state_dict(state_dict)
    fold_model.eval()
    ensemble_models.append(fold_model)
print(f"Using ensemble of all {len(ensemble_models)} fold models for inference")

Using ensemble of all 5 fold models for inference


## 5. Train Model
* 모델을 로드하고, 학습을 진행합니다.

In [14]:
# # load model
# model = timm.create_model(
#     model_name,
#     pretrained=True,
#     num_classes=17
# ).to(device)
# loss_fn = nn.CrossEntropyLoss()
# optimizer = Adam(model.parameters(), lr=LR)

In [15]:
# for epoch in range(EPOCHS):
#     ret = train_one_epoch(trn_loader, model, optimizer, loss_fn, device=device)
#     ret['epoch'] = epoch

#     log = ""
#     for k, v in ret.items():
#       log += f"{k}: {v:.4f}\n"
#     print(log)

# 6. Inference & Save File
* 테스트 이미지에 대한 추론을 진행하고, 결과 파일을 저장합니다.

In [16]:
# preds_list = []

# model.eval()
# for image, _ in tqdm(tst_loader):
#     image = image.to(device)

#     with torch.no_grad():
#         preds = model(image)
#     preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

In [17]:
# pred_df = pd.DataFrame(tst_dataset.df, columns=['ID', 'target'])
# pred_df['target'] = preds_list

In [18]:
# sample_submission_df = pd.read_csv("../data/sample_submission.csv")
# assert (sample_submission_df['ID'] == pred_df['ID']).all()

In [19]:
# pred_df.to_csv("pred.csv", index=False)

In [20]:
# Temperature Scaling 클래스 정의
class TemperatureScaling(nn.Module):
    def __init__(self):
        super().__init__()
        self.temperature = nn.Parameter(torch.ones(1) * 1.5)
    
    def forward(self, logits):
        return logits / self.temperature

In [21]:
essential_tta_transforms = [
    # 원본
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
    # 90도 회전들
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.Rotate(limit=[90, 90], p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.Rotate(limit=[180, 180], p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.Rotate(limit=[-90, -90], p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
    # 밝기 개선
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.RandomBrightnessContrast(brightness_limit=[0.3, 0.3], contrast_limit=[0.3, 0.3], p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
]

In [22]:
# TTA 추론을 위한 Dataset 클래스
class TTAImageDataset(Dataset):
    def __init__(self, data, path, transforms):
        if isinstance(data, str):
            self.df = pd.read_csv(data).values
        else:
            self.df = data.values
        self.path = path
        self.transforms = transforms  # 여러 transform을 리스트로 받음

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        
        # 모든 transform을 적용한 결과를 리스트로 반환
        augmented_images = []
        for transform in self.transforms:
            aug_img = transform(image=img)['image']
            augmented_images.append(aug_img)
        
        return augmented_images, target

In [23]:
# TTA Dataset 생성
tta_dataset = TTAImageDataset(
    "../data/sample_submission.csv",
    "../data/test/",
    essential_tta_transforms
)

# TTA DataLoader (배치 크기를 줄여서 메모리 절약)
tta_loader = DataLoader(
    tta_dataset,
    batch_size=64,  # TTA는 메모리를 많이 사용하므로 배치 크기 줄임
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True
)

print(f"TTA Dataset size: {len(tta_dataset)}")

TTA Dataset size: 3140


In [24]:
# def adaptive_tta_inference(model, loader, transforms, confidence_threshold=0.9):
#     """
#     신뢰도 기반 적응적 TTA 추론
#     - 신뢰도가 높으면 원본만 사용하여 속도 향상
#     - 저신뢰도 이미지만 모든 변형 적용
#     """
    
#     all_predictions = []
#     total_batches = len(loader)
#     early_stop_count = 0  # 조기 중단된 이미지 수
        
#     model.eval()
#     temp_scaling = TemperatureScaling().to(device)
        
#     for batch_idx, (images_list, _) in enumerate(tqdm(loader, desc="Adaptive TTA")):
#         batch_predictions = []
        
#         with torch.no_grad():
#             # 1단계: 원본 이미지로 먼저 예측
#             original_images = images_list[0].to(device)  # 첫 번째가 원본
#             original_preds = model(original_images)
#             original_preds = temp_scaling(original_preds)  # 온도 조절 적용
#             original_probs = torch.softmax(original_preds, dim=1)
            
#             # 각 이미지의 최대 신뢰도 계산
#             max_confidences = torch.max(original_probs, dim=1)[0]
#             high_conf_mask = max_confidences > confidence_threshold
            
#             # 고신뢰도 이미지들은 원본 예측만 사용
#             batch_size = original_images.size(0)
#             final_probs = original_probs.clone()
            
#             # 저신뢰도 이미지들만 추가 변형 적용
#             low_conf_indices = (~high_conf_mask).nonzero().squeeze(1)
            
#             if len(low_conf_indices) > 0:
#                 # 저신뢰도 이미지들에 대해서만 모든 변형 적용
#                 all_aug_probs = [original_probs]
                
#                 for aug_idx in range(1, len(images_list)):  # 원본 제외한 나머지 변형들
#                     aug_images = images_list[aug_idx].to(device)
#                     aug_preds = model(aug_images)
#                     aug_probs = torch.softmax(aug_preds, dim=1)
#                     all_aug_probs.append(aug_probs)
                
#                 # 저신뢰도 이미지들에 대해서만 평균 계산
#                 avg_probs = torch.mean(torch.stack(all_aug_probs), dim=0)
#                 final_probs[low_conf_indices] = avg_probs[low_conf_indices]
            
#             # 최종 예측
#             final_preds = torch.argmax(final_probs, dim=1)
#             all_predictions.extend(final_preds.cpu().numpy())
            
#             # 통계 업데이트
#             early_stop_count += high_conf_mask.sum().item()
    
#     print(f"\\nAdaptive TTA completed:")
#     print(f"- High confidence images (early stop): {early_stop_count}/{len(all_predictions)} ({early_stop_count/len(all_predictions)*100:.1f}%)")
#     print(f"- Low confidence images (full TTA): {len(all_predictions)-early_stop_count}/{len(all_predictions)} ({(len(all_predictions)-early_stop_count)/len(all_predictions)*100:.1f}%)")
#     print(f"- Expected speedup: {1 + (early_stop_count/len(all_predictions)*4):.1f}x")
    
#     return all_predictions

# # 적응적 TTA 실행
# print("Starting Adaptive TTA inference...")
# tta_predictions = adaptive_tta_inference(
#     model=model, 
#     loader=tta_loader, 
#     transforms=essential_tta_transforms,
#     confidence_threshold=0.9  # 90% 신뢰도 이상이면 조기 중단
# )

# print(f"Total predictions: {len(tta_predictions)}")

In [25]:
def ensemble_tta_inference(models, loader, transforms, confidence_threshold=0.9):
    """5-Fold 모델 앙상블 + TTA 추론"""
    all_predictions = []
    
    for batch_idx, (images_list, _) in enumerate(tqdm(loader, desc="Ensemble TTA")):
        batch_size = images_list[0].size(0)
        ensemble_probs = torch.zeros(batch_size, 17).to(device)
        
        # 각 fold 모델별 예측
        for model in models:
            with torch.no_grad():
                # 각 TTA 변형별 예측
                for images in images_list:
                    images = images.to(device)
                    preds = model(images)
                    probs = torch.softmax(preds, dim=1)
                    ensemble_probs += probs / (len(models) * len(images_list))
        
        final_preds = torch.argmax(ensemble_probs, dim=1)
        all_predictions.extend(final_preds.cpu().numpy())
    
    return all_predictions

In [26]:
# 앙상블 TTA 실행
print("Starting Ensemble TTA inference...")
tta_predictions = ensemble_tta_inference(
    models=ensemble_models, 
    loader=tta_loader, 
    transforms=essential_tta_transforms,
    confidence_threshold=0.9
)

Starting Ensemble TTA inference...


Ensemble TTA: 100%|██████████| 50/50 [03:26<00:00,  4.12s/it]


In [27]:
# TTA 결과로 submission 파일 생성
tta_pred_df = pd.DataFrame(tta_dataset.df, columns=['ID', 'target'])
tta_pred_df['target'] = tta_predictions

In [28]:
# 기존 submission과 동일한 순서인지 확인
sample_submission_df = pd.read_csv("../data/sample_submission.csv")
assert (sample_submission_df['ID'] == tta_pred_df['ID']).all()

In [29]:
# TTA 결과 저장
tta_pred_df.to_csv("../output/choice.csv", index=False)
print("TTA predictions saved to 'pred_tta.csv'")

print("TTA Prediction sample:")

TTA predictions saved to 'pred_tta.csv'
TTA Prediction sample:


In [30]:
tta_pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,12
2,00396fbc1f6cc21d.jpg,5
3,00471f8038d9c4b6.jpg,6
4,00901f504008d884.jpg,2
