In [1]:
%pip install timm
%pip install albumentations augraphy torch opencv-python
%pip install --upgrade numpy pandas scikit-learn

[0mNote: you may need to restart the kernel to use updated packages.
[0mNote: you may need to restart the kernel to use updated packages.
[0mNote: you may need to restart the kernel to use updated packages.


In [14]:
import os
import cv2
import timm
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
import augraphy as aug
import albumentations as A
from datetime import datetime
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score

import torch
import torch.nn as nn
import torch.optim as optim


In [15]:
def set_seed(seed=42):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [None]:
IMG_SIZE = 384
TEST_IMG_DIR = "datasets_fin/test/"    
SUBMISSION_CSV = "datasets_fin/sample_submission.csv" 
MODEL_DIR = "model/"      # .pth 파일들이 저장된 폴더
N_CLASSES = 17
BATCH_SIZE = 4 
N_SPLITS = 5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
set_seed(42)

model_name = "convnext_base"

In [17]:
# --- (TTA용) Transform 정의 ---
def get_tta_transforms(img_size):
    """
    TTA에 사용할 Albumentations Transform 리스트를 반환합니다.
    문서의 특성을 고려하여 좌우반전 등은 제외합니다.
    """
    return [
        A.Compose([ # 원본 이미지 (기본 Resize + Normalize)
            A.Resize(img_size, img_size),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()
        ]),
        A.Compose([ # 약간 회전 1
            A.Rotate(limit=(5, 5), border_mode=cv2.BORDER_CONSTANT, value=0, p=1),
            A.Resize(img_size, img_size),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()
        ]),
        A.Compose([ # 약간 회전 2
            A.Rotate(limit=(-5, -5), border_mode=cv2.BORDER_CONSTANT, value=0, p=1),
            A.Resize(img_size, img_size),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()
        ]),
        A.Compose([ # 약간 원근 변형
            A.Perspective(scale=(0.01, 0.02), pad_mode=cv2.BORDER_CONSTANT, p=1),
            A.Resize(img_size, img_size),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()
        ]),
        A.Compose([ # 밝기/대비 조절
            A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=1),
            A.Resize(img_size, img_size),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()
        ]),
        A.Compose([ # 가벼운 가우시안 블러
            A.GaussianBlur(blur_limit=(3, 3), p=1),
            A.Resize(img_size, img_size),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()
        ]),
    ]

In [19]:
# --- (TTA용) Test Dataset ---
class TestDataset(Dataset):
    def __init__(self, df, img_dir, tta_transforms):
        self.img_dir = img_dir
        # self.transform = transform
        self.tta_transforms = tta_transforms
        
        df_reset = df.reset_index(drop=True)
        self.image_ids = df_reset['ID'].values
        self.length = len(df_reset)

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        img_id = self.image_ids[idx]
        img_path = os.path.join(self.img_dir, img_id)
        image = cv2.imread(img_path)
        
        if image is None:
            raise FileNotFoundError(f"이미지 파일 없음: {img_path} (idx: {idx})")

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # if self.transform:
        #     augmented = self.transform(image=image)
        #     image = augmented['image']

        tta_images = [transform(image=image)['image'] for transform in self.tta_transforms]
            
        return tta_images

In [21]:
# 1. Test 데이터 로드
sub_df = pd.read_csv(SUBMISSION_CSV)

# 2. Transform 및 Dataset, DataLoader 정의
tta_transforms = get_tta_transforms(IMG_SIZE)
test_dataset = TestDataset(sub_df, TEST_IMG_DIR, tta_transforms)
test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4
)
    
# 4. 5개 모델의 예측 확률(softmax)을 누적할 배열 생성
# (3150, 17) 형태의 0으로 채워진 배열
final_preds_accumulator = np.zeros((len(test_dataset), N_CLASSES))

# 5. K-Fold 모델 루프 (Soft Voting)
model_paths = [os.path.join(MODEL_DIR, f"best_conv_model_fold_{fold+1}.pth") for fold in range(N_SPLITS)]

for fold, model_path in enumerate(model_paths):
    print(f"--- FOLD {fold+1}/{N_SPLITS} 추론 시작 (TTA 적용) ---")
    
    # 3. 모델 정의 (학습 때 사용한 것과 *반드시* 동일해야 함)
    model = timm.create_model(
            model_name,
            pretrained=True,
            num_classes=17
        ).to(DEVICE)

    # 5.1 모델 가중치 로드
    try:
        model.load_state_dict(torch.load(model_path, map_location=DEVICE))
        print(f"모델 로드 성공: {model_path}")
    except Exception as e:
        print(f"!!! 모델 로드 실패: {model_path} !!!")
        print(e)
        continue
        
    model.eval()
    
    fold_tta_preds = [] # 이 Fold의 TTA 예측값을 임시 저장

    with torch.no_grad():
        for tta_images_batch in tqdm(test_loader, desc=f"Fold {fold+1} Inference"):
            batch_probs_sum = torch.zeros(tta_images_batch[0].size(0), N_CLASSES).to(DEVICE)
            
            # TTA 리스트를 순회하며(예: 6번) 예측
            for img_batch in tta_images_batch:
                img_batch = img_batch.to(DEVICE)
                
                outputs = model(img_batch)
                probs = outputs.softmax(dim=1) # (BATCH_SIZE, N_CLASSES)
                
                # (reshape이 아님) 각 TTA 버전의 확률을 그대로 더함
                batch_probs_sum += probs
            
            # TTA 횟수(예: 6)로 나누어 *평균* 확률 계산
            avg_tta_probs = (batch_probs_sum / len(tta_images_batch)).cpu().numpy()
            
            fold_tta_preds.append(avg_tta_probs)

    # 5.2 Fold 예측값을 하나로 합치고(concatenate) 누적 배열에 더함
    fold_tta_preds_np = np.concatenate(fold_tta_preds, axis=0)
    final_preds_accumulator += fold_tta_preds_np

# 6. K-Fold + TTA 예측 평균 계산
avg_final_preds = final_preds_accumulator / N_SPLITS

# 7. 평균 확률이 가장 높은 클래스 인덱스(0~16)를 최종 예측값으로
final_labels = np.argmax(avg_final_preds, axis=1)

# 8. 제출 파일 생성
sub_df['target'] = final_labels
now = datetime.now().strftime("%Y%m%d_%H%M%S")
submission_filename = f'submission/submission_{now}.csv'
sub_df.to_csv(submission_filename, index=False)

print("--- 추론 완료 ---")
print(f"{submission_filename} 파일이 생성되었습니다.")
print(sub_df.head())

--- FOLD 1/5 추론 시작 (TTA 적용) ---
모델 로드 성공: model/best_conv_model_fold_1.pth


Fold 1 Inference: 100%|██████████| 785/785 [02:39<00:00,  4.91it/s]


--- FOLD 2/5 추론 시작 (TTA 적용) ---
모델 로드 성공: model/best_conv_model_fold_2.pth


Fold 2 Inference: 100%|██████████| 785/785 [02:39<00:00,  4.93it/s]


--- FOLD 3/5 추론 시작 (TTA 적용) ---
모델 로드 성공: model/best_conv_model_fold_3.pth


Fold 3 Inference: 100%|██████████| 785/785 [02:38<00:00,  4.94it/s]


--- FOLD 4/5 추론 시작 (TTA 적용) ---
모델 로드 성공: model/best_conv_model_fold_4.pth


Fold 4 Inference: 100%|██████████| 785/785 [02:39<00:00,  4.92it/s]


--- FOLD 5/5 추론 시작 (TTA 적용) ---
모델 로드 성공: model/best_conv_model_fold_5.pth


Fold 5 Inference: 100%|██████████| 785/785 [02:38<00:00,  4.94it/s]

--- 추론 완료 ---
submission/submission_20251112_032603.csv 파일이 생성되었습니다.
                     ID  target
0  0008fdb22ddce0ce.jpg       2
1  00091bffdffd83de.jpg       6
2  00396fbc1f6cc21d.jpg       5
3  00471f8038d9c4b6.jpg       6
4  00901f504008d884.jpg       2



