In [1]:
# ============================================================
# üß† Document Classification (ConvNeXt-Tiny + Weighted TTA)
# ============================================================

import os
import time
import random
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split



In [2]:

# ============================================================
# ‚öôÔ∏è Environment & Reproducibility
# ============================================================
SEED = 42
os.environ["PYTHONHASHSEED"] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"üî• Using GPU: {torch.cuda.get_device_name(0)}" if torch.cuda.is_available() else "‚ö†Ô∏è Using CPU")



üî• Using GPU: NVIDIA GeForce RTX 3090


In [3]:
# üß© Dataset
# ============================================================
class ImageDataset(Dataset):
    def __init__(self, csv_or_df, path, transform=None):
        if isinstance(csv_or_df, str):
            self.df = pd.read_csv(csv_or_df).values
        else:
            self.df = csv_or_df.values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)).convert("RGB"))
        if self.transform:
            img = self.transform(image=img)["image"]
        return img, target



In [4]:
# ============================================================

# ============================================================
# üß™ Data Config
# ============================================================
data_path = "../../../data/raw/"
img_size = 384
BATCH_SIZE = 64
num_workers = 8
pin_memory = True
persistent_workers = True
prefetch_factor = 4

# Augmentation
trn_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.HorizontalFlip(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.02, scale_limit=0.05, rotate_limit=3, p=0.3),
    A.OneOf([
        A.MotionBlur(blur_limit=3, p=0.5),
        A.GaussNoise(var_limit=(5, 30), p=0.5),
    ], p=0.2),
    A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.2),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])
tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])



  original_init(self, **validated_kwargs)
  A.GaussNoise(var_limit=(5, 30), p=0.5),


In [5]:
# ============================================================
# üìÇ Split & Loader
# ============================================================
full_df = pd.read_csv(f"{data_path}/train.csv")
trn_df, val_df = train_test_split(full_df, test_size=0.2, stratify=full_df["target"], random_state=42)

trn_dataset = ImageDataset(trn_df, f"{data_path}/train/", transform=trn_transform)
val_dataset = ImageDataset(val_df, f"{data_path}/train/", transform=tst_transform)
tst_dataset = ImageDataset(f"{data_path}/sample_submission.csv", f"{data_path}/test/", transform=tst_transform)

trn_loader = DataLoader(trn_dataset, batch_size=BATCH_SIZE, shuffle=True,
                        num_workers=num_workers, pin_memory=pin_memory,
                        persistent_workers=persistent_workers, prefetch_factor=prefetch_factor, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False,
                        num_workers=num_workers, pin_memory=pin_memory,
                        persistent_workers=persistent_workers, prefetch_factor=prefetch_factor)
tst_loader = DataLoader(tst_dataset, batch_size=BATCH_SIZE, shuffle=False,
                        num_workers=num_workers, pin_memory=pin_memory,
                        persistent_workers=persistent_workers, prefetch_factor=prefetch_factor)



In [6]:
# ============================================================
# üß† Model / Loss / Optimizer / Scheduler
# ============================================================
model_name = "convnext_tiny.fb_in22k_ft_in1k"
model = timm.create_model(model_name, pretrained=True, num_classes=17, in_chans=3, drop_path_rate=0.1).to(device)

loss_fn = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15, eta_min=1e-5)
scaler = torch.cuda.amp.GradScaler()


# ============================================================
# üöÄ Train / Validate
# ============================================================
def train_one_epoch(loader, model, optimizer, loss_fn, device, scaler=None):
    model.train()
    train_loss, preds_list, targets_list = 0.0, [], []
    pbar = tqdm(loader, desc="Training", leave=False)

    for images, targets in pbar:
        images, targets = images.to(device, non_blocking=True), targets.to(device, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(scaler is not None)):
            preds = model(images)
            loss = loss_fn(preds, targets)

        if scaler:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())
        pbar.set_postfix(loss=f"{loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average="macro")
    return {"train_loss": train_loss, "train_acc": train_acc, "train_f1": train_f1}


@torch.no_grad()
def validate(loader, model, loss_fn, device):
    model.eval()
    val_loss, preds_list, targets_list = 0.0, [], []
    pbar = tqdm(loader, desc="Validating", leave=False)

    for images, targets in pbar:
        images, targets = images.to(device, non_blocking=True), targets.to(device, non_blocking=True)
        with torch.amp.autocast("cuda"):
            preds = model(images)
            loss = loss_fn(preds, targets)
        val_loss += loss.item()
        preds_list.extend(preds.argmax(1).cpu().numpy())
        targets_list.extend(targets.cpu().numpy())
        pbar.set_postfix(loss=f"{loss.item():.4f}")

    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average="macro")
    return {"val_loss": val_loss, "val_acc": val_acc, "val_f1": val_f1}



  scaler = torch.cuda.amp.GradScaler()


In [7]:
# ============================================================
# üèÅ Training Loop
# ============================================================
best_f1 = 0.0
EPOCHS = 15

for epoch in range(EPOCHS):
    train_metrics = train_one_epoch(trn_loader, model, optimizer, loss_fn, device, scaler)
    val_metrics = validate(val_loader, model, loss_fn, device)
    scheduler.step()

    print(f"\n[Epoch {epoch+1}/{EPOCHS}]")
    print(f"Train | Loss: {train_metrics['train_loss']:.4f} | Acc: {train_metrics['train_acc']:.4f} | F1: {train_metrics['train_f1']:.4f}")
    print(f"Valid | Loss: {val_metrics['val_loss']:.4f} | Acc: {val_metrics['val_acc']:.4f} | F1: {val_metrics['val_f1']:.4f}")

    if val_metrics["val_f1"] > best_f1:
        best_f1 = val_metrics["val_f1"]
        print(f"‚úÖ Best model updated! (F1: {best_f1:.4f})")

  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 1/15]
Train | Loss: 2.2244 | Acc: 0.3651 | F1: 0.3213
Valid | Loss: 1.3363 | Acc: 0.6975 | F1: 0.6121
‚úÖ Best model updated! (F1: 0.6121)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 2/15]
Train | Loss: 1.2434 | Acc: 0.7549 | F1: 0.7246
Valid | Loss: 0.9843 | Acc: 0.8376 | F1: 0.8037
‚úÖ Best model updated! (F1: 0.8037)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 3/15]
Train | Loss: 1.0226 | Acc: 0.8207 | F1: 0.7921
Valid | Loss: 0.8534 | Acc: 0.8822 | F1: 0.8312
‚úÖ Best model updated! (F1: 0.8312)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 4/15]
Train | Loss: 0.9075 | Acc: 0.8750 | F1: 0.8532
Valid | Loss: 0.8555 | Acc: 0.8567 | F1: 0.8308


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 5/15]
Train | Loss: 0.8249 | Acc: 0.9054 | F1: 0.8894
Valid | Loss: 0.7469 | Acc: 0.9363 | F1: 0.9328
‚úÖ Best model updated! (F1: 0.9328)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 6/15]
Train | Loss: 0.7610 | Acc: 0.9219 | F1: 0.9089
Valid | Loss: 0.7231 | Acc: 0.9459 | F1: 0.9430
‚úÖ Best model updated! (F1: 0.9430)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 7/15]
Train | Loss: 0.7219 | Acc: 0.9498 | F1: 0.9478
Valid | Loss: 0.7104 | Acc: 0.9427 | F1: 0.9340


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 8/15]
Train | Loss: 0.6953 | Acc: 0.9564 | F1: 0.9539
Valid | Loss: 0.6859 | Acc: 0.9554 | F1: 0.9499
‚úÖ Best model updated! (F1: 0.9499)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 9/15]
Train | Loss: 0.6644 | Acc: 0.9712 | F1: 0.9704
Valid | Loss: 0.6761 | Acc: 0.9618 | F1: 0.9604
‚úÖ Best model updated! (F1: 0.9604)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 10/15]
Train | Loss: 0.6468 | Acc: 0.9794 | F1: 0.9789
Valid | Loss: 0.6768 | Acc: 0.9650 | F1: 0.9616
‚úÖ Best model updated! (F1: 0.9616)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 11/15]
Train | Loss: 0.6415 | Acc: 0.9844 | F1: 0.9827
Valid | Loss: 0.6730 | Acc: 0.9650 | F1: 0.9621
‚úÖ Best model updated! (F1: 0.9621)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 12/15]
Train | Loss: 0.6341 | Acc: 0.9836 | F1: 0.9826
Valid | Loss: 0.6750 | Acc: 0.9618 | F1: 0.9569


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 13/15]
Train | Loss: 0.6294 | Acc: 0.9877 | F1: 0.9859
Valid | Loss: 0.6767 | Acc: 0.9650 | F1: 0.9616


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 14/15]
Train | Loss: 0.6155 | Acc: 0.9910 | F1: 0.9904
Valid | Loss: 0.6733 | Acc: 0.9618 | F1: 0.9569


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                               


[Epoch 15/15]
Train | Loss: 0.6168 | Acc: 0.9893 | F1: 0.9884
Valid | Loss: 0.6759 | Acc: 0.9650 | F1: 0.9621




In [8]:




# ============================================================
# üéØ Weighted TTA Inference (no file reload)
# ============================================================
model.eval()
tta_rounds = 5
weights = np.linspace(1, 0.6, tta_rounds)
weights /= weights.sum()

print(f"üöÄ Running Weighted TTA ({tta_rounds} rounds, weights={weights})")

all_preds = []
for tta_idx in range(tta_rounds):
    preds_list = []
    for images, _ in tqdm(tst_loader, desc=f"TTA {tta_idx+1}/{tta_rounds}"):
        images = images.to(device)
        with torch.no_grad(), torch.amp.autocast("cuda"):
            preds = model(images)
        preds_list.append(preds.softmax(1).cpu().numpy())
    all_preds.append(np.concatenate(preds_list, axis=0))

weighted_preds = np.average(np.stack(all_preds), axis=0, weights=weights)
final_preds = np.argmax(weighted_preds, axis=1)

pred_df = pd.DataFrame(tst_dataset.df, columns=["ID", "target"])
pred_df["target"] = final_preds

sample_submission_df = pd.read_csv(f"{data_path}/sample_submission.csv")
assert (sample_submission_df["ID"] == pred_df["ID"]).all(), "‚ö†Ô∏è ID mismatch!"

pred_df.to_csv("pred_4.csv", index=False)
print("üéØ Weighted TTA Inference complete! Saved to pred_4.csv")


üöÄ Running Weighted TTA (5 rounds, weights=[0.25  0.225 0.2   0.175 0.15 ])


TTA 1/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:05<00:00,  9.76it/s]
TTA 2/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:04<00:00, 11.27it/s]
TTA 3/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚

üéØ Weighted TTA Inference complete! Saved to pred_4.csv



