In [1]:
# train_swin_tiny_stage0_5.py
import os
import time
import random
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
import glob



# ------------------------
# ‚öôÔ∏è Device & Seed
# ------------------------
SEED = 42
os.environ["PYTHONHASHSEED"] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"üî• Using {device} ({torch.cuda.get_device_name(0) if device.type=='cuda' else 'CPU'})")


# ------------------------
# üìÅ Paths
# ------------------------
DATA_DIR = "/data/ephemeral/home/data/processed/stage0_5_train/"
META_PATH = "/data/ephemeral/home/data/meta_stage0_5_train.csv"
TRAIN_CSV = "/data/ephemeral/home/data/raw/train.csv"
TEST_PATH = "/data/ephemeral/home/data/processed/stage0_5_test/"   # ‚úÖ testÎèÑ Ï†ÑÏ≤òÎ¶¨ Î≤ÑÏ†Ñ ÏÇ¨Ïö©
SUB_PATH = "/data/ephemeral/home/data/raw/sample_submission.csv"


üî• Using cuda (NVIDIA GeForce RTX 3090)


In [2]:
# ------------------------
# üß© Dataset
# ------------------------
class ImageDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = row["filepath"]
        img = np.array(Image.open(img_path).convert("RGB"))
        target = int(row["target"])
        if self.transform:
            img = self.transform(image=img)["image"]
        return img, target


# ------------------------
# üß† Config
# ------------------------
model_name = "swin_tiny_patch4_window7_224"
IMG_SIZE = 224
NUM_CLASSES = 17
LR = 5e-4
EPOCHS = 25
BATCH_SIZE = 64
num_workers = 8


# ------------------------
# üß© Transform
# ------------------------
trn_transform = A.Compose([
    A.Resize(height=IMG_SIZE, width=IMG_SIZE),
    A.HorizontalFlip(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.02, scale_limit=0.05, rotate_limit=3, p=0.3),
    A.OneOf([
        A.MotionBlur(blur_limit=3, p=0.5),
        A.GaussNoise(var_limit=(5, 30), p=0.5),
    ], p=0.2),
    A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.2),
    A.Normalize(mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

tst_transform = A.Compose([
    A.Resize(height=IMG_SIZE, width=IMG_SIZE),
    A.Normalize(mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])


  original_init(self, **validated_kwargs)
  A.GaussNoise(var_limit=(5, 30), p=0.5),


In [3]:
# ------------------------
# üìà Split (meta_stage0_5.csv + train.csv merge)
# ------------------------
meta = pd.read_csv(META_PATH)
train_csv = pd.read_csv(TRAIN_CSV)

# ‚úÖ group prefix Ï†úÍ±∞ ÌõÑ Î≥ëÌï©
meta["basename"] = meta["filepath"].apply(lambda x: os.path.basename(x).split("_", 1)[-1])
train_csv["basename"] = train_csv["ID"].apply(lambda x: os.path.basename(x))

meta_joined = pd.merge(meta, train_csv[["basename", "target"]], on="basename", how="left")

print("‚úÖ meta_stage0_5 Î≥ëÌï© ÏôÑÎ£å:", meta_joined.shape)
print("NaN target Í∞úÏàò:", meta_joined["target"].isna().sum())

# ‚úÖ NaN Ï†úÍ±∞
meta_joined = meta_joined.dropna(subset=["target"]).reset_index(drop=True)

# Split
trn_df, val_df = train_test_split(
    meta_joined, test_size=0.2, stratify=meta_joined["target"], random_state=SEED
)
print("‚úÖ Split ÏôÑÎ£å:", trn_df.shape, val_df.shape)


‚úÖ meta_stage0_5 Î≥ëÌï© ÏôÑÎ£å: (1570, 4)
NaN target Í∞úÏàò: 0
‚úÖ Split ÏôÑÎ£å: (1256, 4) (314, 4)


In [4]:

# ------------------------
# üß± Dataset & Loader
# ------------------------
trn_dataset = ImageDataset(trn_df, transform=trn_transform)
val_dataset = ImageDataset(val_df, transform=tst_transform)

trn_loader = DataLoader(trn_dataset, batch_size=BATCH_SIZE, shuffle=True,
                        num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False,
                        num_workers=num_workers, pin_memory=True)


# ------------------------
# üß† Model
# ------------------------
model = timm.create_model(
    model_name,
    pretrained=True,
    num_classes=NUM_CLASSES,
    in_chans=3,
    drop_path_rate=0.1
).to(device)

loss_fn = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS, eta_min=1e-5)
scaler = torch.cuda.amp.GradScaler()



  scaler = torch.cuda.amp.GradScaler()


In [5]:
# ------------------------
# üßÆ Train / Validate
# ------------------------
def train_one_epoch(loader, model, optimizer, loss_fn, device, scaler=None):
    model.train()
    total_loss, preds_list, targets_list = 0, [], []
    for images, targets in tqdm(loader, desc="Training", leave=False):
        images, targets = images.to(device), targets.to(device)
        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(scaler is not None)):
            preds = model(images)
            loss = loss_fn(preds, targets)
        if scaler:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()
        total_loss += loss.item()
        preds_list.extend(preds.argmax(1).detach().cpu().numpy())
        targets_list.extend(targets.cpu().numpy())
    return {
        "train_loss": total_loss / len(loader),
        "train_acc": accuracy_score(targets_list, preds_list),
        "train_f1": f1_score(targets_list, preds_list, average="macro"),
    }


@torch.no_grad()
def validate(loader, model, loss_fn, device):
    model.eval()
    total_loss, preds_list, targets_list = 0, [], []
    for images, targets in tqdm(loader, desc="Validating", leave=False):
        images, targets = images.to(device), targets.to(device)
        with torch.amp.autocast("cuda"):
            preds = model(images)
            loss = loss_fn(preds, targets)
        total_loss += loss.item()
        preds_list.extend(preds.argmax(1).cpu().numpy())
        targets_list.extend(targets.cpu().numpy())
    return {
        "val_loss": total_loss / len(loader),
        "val_acc": accuracy_score(targets_list, preds_list),
        "val_f1": f1_score(targets_list, preds_list, average="macro"),
    }



In [6]:
# ------------------------
# üöÄ Train Loop
# ------------------------
best_f1 = 0.0
for epoch in range(EPOCHS):
    train_metrics = train_one_epoch(trn_loader, model, optimizer, loss_fn, device, scaler)
    val_metrics = validate(val_loader, model, loss_fn, device)
    scheduler.step()

    print(f"\n[Epoch {epoch+1}/{EPOCHS}]")
    print(f"Train | Loss: {train_metrics['train_loss']:.4f} | "
          f"Acc: {train_metrics['train_acc']:.4f} | "
          f"F1: {train_metrics['train_f1']:.4f}")
    print(f"Valid | Loss: {val_metrics['val_loss']:.4f} | "
          f"Acc: {val_metrics['val_acc']:.4f} | "
          f"F1: {val_metrics['val_f1']:.4f}")

    if val_metrics["val_f1"] > best_f1:
        best_f1 = val_metrics["val_f1"]
        torch.save(model.state_dict(), f"./best_swin_tiny_f1_{best_f1:.4f}.pt")
        print(f"‚úÖ Best model saved! (F1: {best_f1:.4f})")



  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 1/25]
Train | Loss: 1.9253 | Acc: 0.4514 | F1: 0.4033
Valid | Loss: 1.1568 | Acc: 0.7580 | F1: 0.7035
‚úÖ Best model saved! (F1: 0.7035)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 2/25]
Train | Loss: 1.1669 | Acc: 0.7818 | F1: 0.7526
Valid | Loss: 0.9262 | Acc: 0.8471 | F1: 0.8256
‚úÖ Best model saved! (F1: 0.8256)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 3/25]
Train | Loss: 0.9215 | Acc: 0.8479 | F1: 0.8281
Valid | Loss: 0.8399 | Acc: 0.8822 | F1: 0.8625
‚úÖ Best model saved! (F1: 0.8625)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 4/25]
Train | Loss: 0.8316 | Acc: 0.9013 | F1: 0.8950
Valid | Loss: 0.8395 | Acc: 0.8885 | F1: 0.8753
‚úÖ Best model saved! (F1: 0.8753)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 5/25]
Train | Loss: 0.7794 | Acc: 0.9204 | F1: 0.9152
Valid | Loss: 0.8222 | Acc: 0.9013 | F1: 0.8811
‚úÖ Best model saved! (F1: 0.8811)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 6/25]
Train | Loss: 0.7589 | Acc: 0.9212 | F1: 0.9128
Valid | Loss: 0.7954 | Acc: 0.9076 | F1: 0.8946
‚úÖ Best model saved! (F1: 0.8946)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 7/25]
Train | Loss: 0.7252 | Acc: 0.9363 | F1: 0.9311
Valid | Loss: 0.8008 | Acc: 0.9045 | F1: 0.8996
‚úÖ Best model saved! (F1: 0.8996)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 8/25]
Train | Loss: 0.7091 | Acc: 0.9490 | F1: 0.9459
Valid | Loss: 0.8175 | Acc: 0.9013 | F1: 0.8920


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 9/25]
Train | Loss: 0.7153 | Acc: 0.9475 | F1: 0.9436
Valid | Loss: 0.7729 | Acc: 0.9108 | F1: 0.9038
‚úÖ Best model saved! (F1: 0.9038)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 10/25]
Train | Loss: 0.6871 | Acc: 0.9546 | F1: 0.9519
Valid | Loss: 0.7389 | Acc: 0.9363 | F1: 0.9281
‚úÖ Best model saved! (F1: 0.9281)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 11/25]
Train | Loss: 0.6599 | Acc: 0.9721 | F1: 0.9708
Valid | Loss: 0.7923 | Acc: 0.9204 | F1: 0.9117


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 12/25]
Train | Loss: 0.6818 | Acc: 0.9618 | F1: 0.9568
Valid | Loss: 0.7623 | Acc: 0.9172 | F1: 0.9091


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 13/25]
Train | Loss: 0.6518 | Acc: 0.9769 | F1: 0.9740
Valid | Loss: 0.7513 | Acc: 0.9236 | F1: 0.9236


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 14/25]
Train | Loss: 0.6419 | Acc: 0.9761 | F1: 0.9760
Valid | Loss: 0.7377 | Acc: 0.9331 | F1: 0.9303
‚úÖ Best model saved! (F1: 0.9303)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 15/25]
Train | Loss: 0.6242 | Acc: 0.9817 | F1: 0.9816
Valid | Loss: 0.7627 | Acc: 0.9299 | F1: 0.9260


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 16/25]
Train | Loss: 0.6086 | Acc: 0.9873 | F1: 0.9860
Valid | Loss: 0.7693 | Acc: 0.9331 | F1: 0.9267


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 17/25]
Train | Loss: 0.6051 | Acc: 0.9904 | F1: 0.9904
Valid | Loss: 0.7672 | Acc: 0.9299 | F1: 0.9234


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 18/25]
Train | Loss: 0.6041 | Acc: 0.9920 | F1: 0.9927
Valid | Loss: 0.7845 | Acc: 0.9236 | F1: 0.9178


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 19/25]
Train | Loss: 0.5941 | Acc: 0.9952 | F1: 0.9945
Valid | Loss: 0.7870 | Acc: 0.9236 | F1: 0.9146


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 20/25]
Train | Loss: 0.6029 | Acc: 0.9889 | F1: 0.9881
Valid | Loss: 0.7469 | Acc: 0.9363 | F1: 0.9317
‚úÖ Best model saved! (F1: 0.9317)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 21/25]
Train | Loss: 0.5964 | Acc: 0.9944 | F1: 0.9941
Valid | Loss: 0.7563 | Acc: 0.9363 | F1: 0.9282


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 22/25]
Train | Loss: 0.5935 | Acc: 0.9936 | F1: 0.9936
Valid | Loss: 0.7294 | Acc: 0.9459 | F1: 0.9414
‚úÖ Best model saved! (F1: 0.9414)


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 23/25]
Train | Loss: 0.5929 | Acc: 0.9960 | F1: 0.9956
Valid | Loss: 0.7423 | Acc: 0.9363 | F1: 0.9319


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 24/25]
Train | Loss: 0.5899 | Acc: 0.9976 | F1: 0.9974
Valid | Loss: 0.7452 | Acc: 0.9363 | F1: 0.9328


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                                                


[Epoch 25/25]
Train | Loss: 0.5916 | Acc: 0.9952 | F1: 0.9951
Valid | Loss: 0.7354 | Acc: 0.9395 | F1: 0.9353




In [7]:
# ------------------------
# üßæ Inference
# ------------------------
print("\nüöÄ Inference ÏãúÏûë...")
model.load_state_dict(torch.load(f"./best_swin_tiny_f1_{best_f1:.4f}.pt"))
model.eval()

# ÌÖåÏä§Ìä∏ÏÖã Î°úÎìú
sub = pd.read_csv(SUB_PATH)
preds_list = []

# ‚úÖ Ï†ÑÏ≤òÎ¶¨Îêú ÌÖåÏä§Ìä∏ Í≤ΩÎ°úÎ°ú Î≥ÄÍ≤Ω
TEST_PATH = "/data/ephemeral/home/data/processed/stage0_5_test/"

for name in tqdm(sub["ID"], desc="Inference"):
    # ‚úÖ group Ìè¥Îçî ÎÇ¥Î∂ÄÎèÑ ÌÉêÏÉâ
    matches = glob.glob(os.path.join(TEST_PATH, "**", name), recursive=True)
    if not matches:
        raise FileNotFoundError(f"ÌÖåÏä§Ìä∏ Ïù¥ÎØ∏ÏßÄ ÏóÜÏùå: {name}")
    img_path = matches[0]

    # ‚úÖ Ï†ÑÏ≤òÎ¶¨Îêú Ïù¥ÎØ∏ÏßÄ Î°úÎìú
    img = np.array(Image.open(img_path).convert("RGB"))
    img = tst_transform(image=img)["image"].unsqueeze(0).to(device)

    with torch.no_grad():
        preds = model(img)

    preds_list.append(preds.argmax(1).item())

sub["target"] = preds_list
sub.to_csv("v1_swin_tiny_split.csv", index=False)
print("üéØ Inference complete! Saved to v1_swin_tiny_split.csv")



üöÄ Inference ÏãúÏûë...


Inference: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3140/3140 [00:45<00:00, 68.50it/s]

üéØ Inference complete! Saved to v1_swin_tiny_split.csv



