In [1]:
# ============================================================
# üß† Document Classification (ConvNeXt-Tiny + Weighted TTA)
# ============================================================

import os
import time
import random
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split



In [2]:

# ============================================================
# ‚öôÔ∏è Environment & Reproducibility
# ============================================================
SEED = 42
os.environ["PYTHONHASHSEED"] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"üî• Using GPU: {torch.cuda.get_device_name(0)}" if torch.cuda.is_available() else "‚ö†Ô∏è Using CPU")



üî• Using GPU: NVIDIA GeForce RTX 3090


In [3]:
# üß© Dataset
# ============================================================
class ImageDataset(Dataset):
    def __init__(self, csv_or_df, path, transform=None):
        if isinstance(csv_or_df, str):
            self.df = pd.read_csv(csv_or_df).values
        else:
            self.df = csv_or_df.values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)).convert("RGB"))
        if self.transform:
            img = self.transform(image=img)["image"]
        return img, target



In [4]:
# ============================================================

# ============================================================
# üß™ Data Config
# ============================================================
data_path = "../../../data/raw/"
img_size = 384
BATCH_SIZE = 64
num_workers = 8
pin_memory = True
persistent_workers = True
prefetch_factor = 4

# Augmentation
trn_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.HorizontalFlip(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.02, scale_limit=0.05, rotate_limit=3, p=0.3),
    A.OneOf([
        A.MotionBlur(blur_limit=3, p=0.5),
        A.GaussNoise(var_limit=(5, 30), p=0.5),
    ], p=0.2),
    A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.2),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])
tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])



  original_init(self, **validated_kwargs)
  A.GaussNoise(var_limit=(5, 30), p=0.5),


In [5]:
# ============================================================
# üìÇ Split & Loader
# ============================================================
full_df = pd.read_csv(f"{data_path}/train.csv")
trn_df, val_df = train_test_split(full_df, test_size=0.2, stratify=full_df["target"], random_state=42)

trn_dataset = ImageDataset(trn_df, f"{data_path}/train/", transform=trn_transform)
val_dataset = ImageDataset(val_df, f"{data_path}/train/", transform=tst_transform)
tst_dataset = ImageDataset(f"{data_path}/sample_submission.csv", f"{data_path}/test/", transform=tst_transform)

trn_loader = DataLoader(trn_dataset, batch_size=BATCH_SIZE, shuffle=True,
                        num_workers=num_workers, pin_memory=pin_memory,
                        persistent_workers=persistent_workers, prefetch_factor=prefetch_factor, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False,
                        num_workers=num_workers, pin_memory=pin_memory,
                        persistent_workers=persistent_workers, prefetch_factor=prefetch_factor)
tst_loader = DataLoader(tst_dataset, batch_size=BATCH_SIZE, shuffle=False,
                        num_workers=num_workers, pin_memory=pin_memory,
                        persistent_workers=persistent_workers, prefetch_factor=prefetch_factor)



In [6]:
# ============================================================
# üß† Model / Loss / Optimizer / Scheduler
# ============================================================
model_name = "convnext_tiny.fb_in22k_ft_in1k"
model = timm.create_model(model_name, pretrained=True, num_classes=17, in_chans=3, drop_path_rate=0.1).to(device)

loss_fn = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15, eta_min=1e-5)
scaler = torch.cuda.amp.GradScaler()


# ============================================================
# üöÄ Train / Validate
# ============================================================
def train_one_epoch(loader, model, optimizer, loss_fn, device, scaler=None):
    model.train()
    train_loss, preds_list, targets_list = 0.0, [], []
    pbar = tqdm(loader, desc="Training", leave=False)

    for images, targets in pbar:
        images, targets = images.to(device, non_blocking=True), targets.to(device, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(scaler is not None)):
            preds = model(images)
            loss = loss_fn(preds, targets)

        if scaler:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())
        pbar.set_postfix(loss=f"{loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average="macro")
    return {"train_loss": train_loss, "train_acc": train_acc, "train_f1": train_f1}


@torch.no_grad()
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss, preds_list, targets_list = 0.0, [], []
    pbar = tqdm(loader, desc="Validating", leave=False)

    for images, targets in pbar:
        images, targets = images.to(device, non_blocking=True), targets.to(device, non_blocking=True)
        with torch.amp.autocast("cuda"):
            preds = model(images)
            loss = loss_fn(preds, targets)
        val_loss += loss.item()
        preds_list.extend(preds.argmax(1).cpu().numpy())
        targets_list.extend(targets.cpu().numpy())
        pbar.set_postfix(loss=f"{loss.item():.4f}")

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average="macro")
    return {"val_loss": val_loss, "val_acc": val_acc, "val_f1": val_f1}



  scaler = torch.cuda.amp.GradScaler()


In [7]:
# ============================================================
# üèÅ Training Loop
# ============================================================
best_f1 = 0.0
EPOCHS = 15

for epoch in range(EPOCHS):
    train_metrics = train_one_epoch(trn_loader, model, optimizer, loss_fn, device, scaler)
    val_metrics = validate(val_loader, model, loss_fn, device)
    scheduler.step()

    print(f"\n[Epoch {epoch+1}/{EPOCHS}]")
    print(f"Train | Loss: {train_metrics['train_loss']:.4f} | Acc: {train_metrics['train_acc']:.4f} | F1: {train_metrics['train_f1']:.4f}")
    print(f"Valid | Loss: {val_metrics['val_loss']:.4f} | Acc: {val_metrics['val_acc']:.4f} | F1: {val_metrics['val_f1']:.4f}")

    if val_metrics["val_f1"] > best_f1:
        best_f1 = val_metrics["val_f1"]
        print(f"‚úÖ Best model updated! (F1: {best_f1:.4f})")

  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 1/15]
Train | Loss: 2.1130 | Acc: 0.3997 | F1: 0.3651
Valid | Loss: 1.2714 | Acc: 0.6783 | F1: 0.6052
‚úÖ Best model updated! (F1: 0.6052)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 2/15]
Train | Loss: 1.2797 | Acc: 0.7204 | F1: 0.6893
Valid | Loss: 0.9651 | Acc: 0.8535 | F1: 0.8110
‚úÖ Best model updated! (F1: 0.8110)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 3/15]
Train | Loss: 1.0418 | Acc: 0.8166 | F1: 0.7980
Valid | Loss: 0.8638 | Acc: 0.8694 | F1: 0.8301
‚úÖ Best model updated! (F1: 0.8301)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 4/15]
Train | Loss: 0.8851 | Acc: 0.8783 | F1: 0.8634
Valid | Loss: 0.7969 | Acc: 0.9140 | F1: 0.9083
‚úÖ Best model updated! (F1: 0.9083)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 5/15]
Train | Loss: 0.8108 | Acc: 0.9079 | F1: 0.8972
Valid | Loss: 0.7334 | Acc: 0.9427 | F1: 0.9341
‚úÖ Best model updated! (F1: 0.9341)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 6/15]
Train | Loss: 0.7586 | Acc: 0.9293 | F1: 0.9238
Valid | Loss: 0.7361 | Acc: 0.9331 | F1: 0.9282


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 7/15]
Train | Loss: 0.7277 | Acc: 0.9433 | F1: 0.9416
Valid | Loss: 0.7129 | Acc: 0.9299 | F1: 0.9228


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 8/15]
Train | Loss: 0.6854 | Acc: 0.9572 | F1: 0.9557
Valid | Loss: 0.6936 | Acc: 0.9650 | F1: 0.9634
‚úÖ Best model updated! (F1: 0.9634)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 9/15]
Train | Loss: 0.6902 | Acc: 0.9556 | F1: 0.9519
Valid | Loss: 0.6987 | Acc: 0.9427 | F1: 0.9380


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 10/15]
Train | Loss: 0.6717 | Acc: 0.9663 | F1: 0.9655
Valid | Loss: 0.6885 | Acc: 0.9490 | F1: 0.9458


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 11/15]
Train | Loss: 0.6523 | Acc: 0.9762 | F1: 0.9762
Valid | Loss: 0.6883 | Acc: 0.9522 | F1: 0.9492


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 12/15]
Train | Loss: 0.6382 | Acc: 0.9811 | F1: 0.9805
Valid | Loss: 0.6659 | Acc: 0.9586 | F1: 0.9563


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 13/15]
Train | Loss: 0.6321 | Acc: 0.9844 | F1: 0.9839
Valid | Loss: 0.6655 | Acc: 0.9713 | F1: 0.9701
‚úÖ Best model updated! (F1: 0.9701)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 14/15]
Train | Loss: 0.6225 | Acc: 0.9901 | F1: 0.9900
Valid | Loss: 0.6685 | Acc: 0.9618 | F1: 0.9588


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 15/15]
Train | Loss: 0.6182 | Acc: 0.9877 | F1: 0.9868
Valid | Loss: 0.6634 | Acc: 0.9713 | F1: 0.9688




In [8]:
# ============================================================
# üéØ Random-Light TTA Inference (‚Üí pred_5.csv)
# ============================================================
import random

tta_rounds = 5
all_preds = []

print(f"\nüöÄ Running Random-Light TTA ({tta_rounds} rounds)\n")

for i in range(tta_rounds):
    # Îß§ ÎùºÏö¥ÎìúÎßàÎã§ ÏÇ¥Ïßù Îã§Î•∏ augmentation Ï†ÅÏö©
    tta_transform = A.Compose([
        A.Resize(height=img_size, width=img_size),
        A.HorizontalFlip(p=random.uniform(0.3, 0.7)),
        A.RandomBrightnessContrast(
            brightness_limit=0.05,
            contrast_limit=0.05,
            p=0.3
        ),
        A.Normalize(
            mean=(0.485, 0.456, 0.406),
            std=(0.229, 0.224, 0.225)
        ),
        ToTensorV2(),
    ])

    tta_dataset = ImageDataset(
        f"{data_path}/sample_submission.csv",
        f"{data_path}/test/",
        transform=tta_transform
    )

    tta_loader = DataLoader(
        tta_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )

    preds_list = []
    for images, _ in tqdm(tta_loader, desc=f"TTA {i+1}/{tta_rounds}"):
        images = images.to(device)
        with torch.no_grad(), torch.amp.autocast("cuda"):
            preds = model(images)
        preds_list.append(preds.softmax(1).cpu().numpy())

    all_preds.append(np.concatenate(preds_list, axis=0))

# üîπ ÌèâÍ∑† ÏïôÏÉÅÎ∏î (equal weights)
final_preds = np.mean(np.stack(all_preds), axis=0).argmax(axis=1)

# üîπ DataFrame ÏÉùÏÑ±
tta_df = pd.DataFrame(tst_dataset.df, columns=["ID", "target"])
tta_df["target"] = final_preds

# üîπ Ï†ÄÏû•
tta_df.to_csv("pred_5.csv", index=False)
print("‚úÖ Random-Light TTA complete! Saved to pred_5.csv")



üöÄ Running Random-Light TTA (5 rounds)



TTA 1/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:04<00:00, 10.17it/s]
TTA 2/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:04<00:00, 11.07it/s]
TTA 3/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚

‚úÖ Random-Light TTA complete! Saved to pred_5.csv



