In [None]:
!pip install gdown
!gdown --id 1AsslIqApC-3Lg8K1CU7BtIpUPcmVlUF9

In [1]:
import pandas as pd

df = pd.read_csv('/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/train.csv')
print(df.head())

                                   uuid         style
0  d5f389b5-2cce-4450-8a3f-b66ba8404a2e   Ink scenery
1  8e122d11-8216-4af1-b059-dbf8507b9eb6   Ink scenery
2  cca68598-f417-4c4f-b259-ab2e13aa0b7a         pixel
3  d147e63b-499f-476b-aabb-96df5edfdc76  oil painting
4  007df8a8-4ef0-404a-8c69-a63016512da8   Ink scenery


In [None]:
import os, copy, random, warnings
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from torchvision.transforms import AutoAugment, AutoAugmentPolicy
from PIL import Image, ImageFile
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# ============================
# 1) CONFIG & SEED
# ============================
CFG = {
    "image_dir": "/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/image",
    "batch_size": 32,
    "epochs_stage1": 5,
    "epochs_stage2": 25,
    "patience": 6,
    "lr": 3e-4,
    "weight_decay": 1e-4,
    "label_smoothing": 0.1,
    "ema_decay": 0.999,
    "seed": 42,
    "unk_threshold": 0.4  # ถ้า max prob < threshold => UNK
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
use_amp = (device.type == "cuda")

def seed_everything(seed=42):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed); 
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG["seed"])
ImageFile.LOAD_TRUNCATED_IMAGES = True
warnings.filterwarnings("ignore", category=UserWarning)

# ============================
# 2) Dataset
# ============================
class ArtDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.loc[idx]
        img_path = row['path']
        label = row['label']
        # บางไฟล์อาจมีปัญหา ให้ retry/open แบบ robust
        with Image.open(img_path) as im:
            image = im.convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

# ============================
# 3) Prepare Data
# ============================
train_csv = "/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/train.csv"
df = pd.read_csv(train_csv)
df['path'] = df['uuid'].apply(lambda x: os.path.join(CFG["image_dir"], f"{x}.png"))

label_to_idx = {label: idx for idx, label in enumerate(sorted(df['style'].unique()))}
idx_to_label = {v: k for k, v in label_to_idx.items()}
df['label'] = df['style'].map(label_to_idx).astype(int)

train_df, val_df = train_test_split(
    df, test_size=0.2, stratify=df['label'], random_state=CFG["seed"]
)

# Augs
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.7, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    AutoAugment(policy=AutoAugmentPolicy.IMAGENET),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),
                         (0.229, 0.224, 0.225))
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),
                         (0.229, 0.224, 0.225))
])

# DataLoader ที่ปลอดภัยกับ Kaggle/CPU-only
num_workers = min(4, os.cpu_count() or 1)
use_persistent = (num_workers > 0) and (device.type == "cuda")

train_loader = DataLoader(
    ArtDataset(train_df, train_transform),
    batch_size=CFG["batch_size"], shuffle=True, num_workers=num_workers,
    pin_memory=(device.type=="cuda"), persistent_workers=use_persistent
)
val_loader = DataLoader(
    ArtDataset(val_df, val_transform),
    batch_size=CFG["batch_size"], shuffle=False, num_workers=num_workers,
    pin_memory=(device.type=="cuda"), persistent_workers=use_persistent
)

# ============================
# 4) Model
# ============================
num_classes_orig = len(label_to_idx)
num_classes = num_classes_orig + 1  # เพิ่ม UNK เป็นคลาสท้าย
model = models.efficientnet_b4(weights=models.EfficientNet_B4_Weights.IMAGENET1K_V1)
in_features = model.classifier[1].in_features
model.classifier[1] = nn.Linear(in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss(label_smoothing=CFG["label_smoothing"])
scaler = torch.cuda.amp.GradScaler(enabled=use_amp)

optimizer = optim.AdamW(model.parameters(), lr=CFG["lr"], weight_decay=CFG["weight_decay"])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=CFG["epochs_stage1"]+CFG["epochs_stage2"]
)

# EMA model
ema_model = copy.deepcopy(model)
for p in ema_model.parameters():
    p.requires_grad = False

@torch.no_grad()
def update_ema(model, ema_model, decay):
    msd, emsd = model.state_dict(), ema_model.state_dict()
    for k in msd.keys():
        emsd[k] = emsd[k] * decay + msd[k] * (1 - decay)

# ============================
# 5) Training Functions
# ============================
def run_epoch(loader, train_mode=True):
    model.train(train_mode)
    running_loss, correct, total = 0.0, 0, 0

    for images, labels in tqdm(loader, leave=False, desc="Train" if train_mode else "Val"):
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)

        with torch.cuda.amp.autocast(enabled=use_amp):
            outputs = model(images)
            loss = criterion(outputs, labels)

        if train_mode:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            update_ema(model, ema_model, CFG["ema_decay"])

        running_loss += loss.item()
        preds = outputs.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    avg_loss = running_loss / max(1, len(loader))
    acc = 100.0 * correct / max(1, total)
    return avg_loss, acc

def set_backbone_freeze(freeze: bool):
    if freeze:
        for param in model.features.parameters():
            param.requires_grad = False
    else:
        for param in model.parameters():
            param.requires_grad = True

def validate_on_val():
    # ใช้ model ปัจจุบัน (ไม่ใช่ EMA) เพื่อให้เทียบผลต่อเนื่องกับ training
    model.eval()
    loss, acc = run_epoch(val_loader, train_mode=False)
    return loss, acc

def train_model(stage_epochs, freeze_backbone=True):
    global best_acc, patience_counter, best_model_wts

    set_backbone_freeze(freeze_backbone)

    for epoch in range(stage_epochs):
        print(f"\nEpoch {epoch+1}/{stage_epochs} | {'Freeze' if freeze_backbone else 'Unfreeze'} Backbone")
        train_loss, train_acc = run_epoch(train_loader, True)
        val_loss, val_acc = validate_on_val()

        scheduler.step()
        print(f"Train Loss {train_loss:.4f} | Acc {train_acc:.2f}% || Val Loss {val_loss:.4f} | Acc {val_acc:.2f}%")

        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(best_model_wts, "best_model.pth")
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= CFG["patience"]:
                print("Early stopping.")
                return

# ============================
# 6) Training
# ============================
best_acc, patience_counter = 0.0, 0
best_model_wts = copy.deepcopy(model.state_dict())

train_model(CFG["epochs_stage1"], freeze_backbone=True)
train_model(CFG["epochs_stage2"], freeze_backbone=False)

# โหลดน้ำหนักที่ดีที่สุดกลับเข้า model และซิงก์ให้ ema_model ด้วย
model.load_state_dict(best_model_wts)
ema_model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), "last_model.pth")
print(f"✅ Best Val Acc: {best_acc:.2f}% | saved best_model.pth & last_model.pth")

# ============================
# 7) Inference + TTA + UNK
# ============================
# เพิ่ม variety ของ TTA อีกเล็กน้อย
tta_transforms = [
    # base
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ]),
    # hflip
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(p=1.0),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ]),
    # rotation
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomRotation(15),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ]),
    # center crop style
    transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ]),
    # jitter/perspective เล็กน้อย
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ColorJitter(0.05, 0.05, 0.05, 0.02),
        transforms.RandomPerspective(distortion_scale=0.2, p=0.3),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ]),
]

# ใช้ label_to_idx จาก training + เพิ่ม UNK
idx_to_label = {v: k for k, v in label_to_idx.items()}
idx_to_label[num_classes-1] = "UNK"

submission = pd.read_csv("/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/sample_submission.csv")
submission['path'] = submission['uuid'].apply(lambda x: os.path.join(CFG["image_dir"], f"{x}.png"))

ema_model.eval()
all_preds = []

with torch.no_grad():
    for i in tqdm(range(0, len(submission), CFG["batch_size"]), desc="Predicting"):
        batch_paths = submission['path'].iloc[i:i+CFG["batch_size"]].tolist()

        # preload images once
        imgs_loaded = []
        for pth in batch_paths:
            with Image.open(pth) as im:
                imgs_loaded.append(im.convert("RGB"))

        batch_sum = None
        for tform in tta_transforms:
            imgs = torch.stack([tform(img) for img in imgs_loaded]).to(device, non_blocking=True)
            outputs = ema_model(imgs)
            probs = torch.softmax(outputs, dim=1)
            batch_sum = probs if batch_sum is None else (batch_sum + probs)

        avg_probs = batch_sum / len(tta_transforms)
        max_probs, argmax = avg_probs.max(1)

        # threshold → UNK
        final_idx = [
            a.item() if mp.item() >= CFG["unk_threshold"] else (num_classes - 1)
            for a, mp in zip(argmax, max_probs)
        ]
        all_preds.extend([idx_to_label[j] for j in final_idx])

submission['style'] = all_preds
out_path = "submission_TTA_EMA_UNK_final.csv"
submission.to_csv(out_path, index=False)
print(f"🎯 Saved to {out_path}")

0.80964

In [None]:
import os, copy, random, warnings
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from torchvision.transforms import AutoAugment, AutoAugmentPolicy
from PIL import Image, ImageFile
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# ============================
# 1) CONFIG & SEED
# ============================
CFG = {
    "image_dir": "/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/image",
    "batch_size": 32,
    "epochs_stage1": 5,
    "epochs_stage2": 25,
    "patience": 6,
    "lr": 3e-4,
    "weight_decay": 1e-4,
    "label_smoothing": 0.1,
    "ema_decay": 0.999,
    "seed": 42,
    "unk_threshold": 0.4,
    "unk_margin": 0.1  # ส่วนต่าง top1-top2 อย่างน้อย
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
use_amp = (device.type == "cuda")

def seed_everything(seed=42):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed); 
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG["seed"])
ImageFile.LOAD_TRUNCATED_IMAGES = True
warnings.filterwarnings("ignore", category=UserWarning)

# ============================
# 2) Dataset
# ============================
class ArtDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.loc[idx]
        img_path = row['path']
        label = row['label']
        with Image.open(img_path) as im:
            image = im.convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

# ============================
# 3) Prepare Data
# ============================
train_csv = "/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/train.csv"
df = pd.read_csv(train_csv)
df['path'] = df['uuid'].apply(lambda x: os.path.join(CFG["image_dir"], f"{x}.png"))

label_to_idx = {label: idx for idx, label in enumerate(sorted(df['style'].unique()))}
idx_to_label = {v: k for k, v in label_to_idx.items()}
df['label'] = df['style'].map(label_to_idx).astype(int)

train_df, val_df = train_test_split(
    df, test_size=0.2, stratify=df['label'], random_state=CFG["seed"]
)

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.7, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    AutoAugment(policy=AutoAugmentPolicy.IMAGENET),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),
                         (0.229, 0.224, 0.225))
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),
                         (0.229, 0.224, 0.225))
])

num_workers = min(4, os.cpu_count() or 1)
use_persistent = (num_workers > 0) and (device.type == "cuda")

train_loader = DataLoader(
    ArtDataset(train_df, train_transform),
    batch_size=CFG["batch_size"], shuffle=True, num_workers=num_workers,
    pin_memory=(device.type=="cuda"), persistent_workers=use_persistent
)
val_loader = DataLoader(
    ArtDataset(val_df, val_transform),
    batch_size=CFG["batch_size"], shuffle=False, num_workers=num_workers,
    pin_memory=(device.type=="cuda"), persistent_workers=use_persistent
)

# ============================
# 4) Model
# ============================
num_classes_orig = len(label_to_idx)
num_classes = num_classes_orig + 1  # เพิ่ม UNK
model = models.efficientnet_b4(weights=models.EfficientNet_B4_Weights.IMAGENET1K_V1)
in_features = model.classifier[1].in_features
model.classifier[1] = nn.Linear(in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss(label_smoothing=CFG["label_smoothing"])
scaler = torch.amp.GradScaler("cuda", enabled=use_amp)

optimizer = optim.AdamW(model.parameters(), lr=CFG["lr"], weight_decay=CFG["weight_decay"])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=CFG["epochs_stage1"]+CFG["epochs_stage2"]
)

# EMA model
ema_model = copy.deepcopy(model)
for p in ema_model.parameters():
    p.requires_grad = False

@torch.no_grad()
def update_ema(model, ema_model, decay):
    msd = model.state_dict()
    emsd = ema_model.state_dict()
    for k, v in msd.items():
        if v.dtype.is_floating_point:
            emsd[k].mul_(decay).add_(v, alpha=1 - decay)
        else:
            emsd[k].copy_(v)

# ============================
# 5) Training Functions
# ============================
def freeze_backbone(model, freeze=True):
    for name, param in model.named_parameters():
        if "classifier" not in name:
            param.requires_grad = not freeze
        else:
            param.requires_grad = True

def train_one_epoch(loader):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for images, labels in tqdm(loader, leave=False, desc="Train"):
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast("cuda", enabled=use_amp):
            outputs = model(images)
            loss = criterion(outputs, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        update_ema(model, ema_model, CFG["ema_decay"])
        running_loss += loss.item()
        preds = outputs.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    return running_loss / max(1, len(loader)), 100.0 * correct / max(1, total)

@torch.no_grad()
def evaluate(loader, eval_model):
    eval_model.eval()
    running_loss, correct, total = 0.0, 0, 0
    for images, labels in tqdm(loader, leave=False, desc="Val"):
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        outputs = eval_model(images)
        loss = criterion(outputs, labels)
        running_loss += loss.item()
        preds = outputs.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    return running_loss / max(1, len(loader)), 100.0 * correct / max(1, total)

def train_model(stage_epochs, freeze_back=True):
    global best_acc, patience_counter, best_model_wts
    freeze_backbone(model, freeze_back)
    for epoch in range(stage_epochs):
        print(f"\nEpoch {epoch+1}/{stage_epochs} | {'Freeze' if freeze_back else 'Unfreeze'} Backbone")
        train_loss, train_acc = train_one_epoch(train_loader)
        val_loss, val_acc = evaluate(val_loader, model)
        scheduler.step()
        print(f"Train Loss {train_loss:.4f} | Acc {train_acc:.2f}% || Val Loss {val_loss:.4f} | Acc {val_acc:.2f}%")
        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(best_model_wts, "best_model.pth")
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= CFG["patience"]:
                print("Early stopping.")
                return

# เปิด cuDNN benchmark เพื่อเร่งความเร็วถ้า input size คงที่
if device.type == "cuda":
    torch.backends.cudnn.benchmark = True

# ============================
# 6) Training
# ============================
best_acc, patience_counter = 0.0, 0
best_model_wts = copy.deepcopy(model.state_dict())

train_model(CFG["epochs_stage1"], freeze_back=True)
train_model(CFG["epochs_stage2"], freeze_back=False)

model.load_state_dict(best_model_wts)
ema_model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), "last_model.pth")
print(f"✅ Best Val Acc: {best_acc:.2f}% | saved best_model.pth & last_model.pth")

# ============================
# 7) Inference + TTA + UNK
# ============================
tta_transforms = [
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ]),
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomRotation(15),
        transforms.RandomHorizontalFlip(p=1.0),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ]),
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ColorJitter(0.05, 0.05, 0.05, 0.02),
        transforms.RandomPerspective(distortion_scale=0.2, p=0.3),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ]),
    transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ]),
]

idx_to_label = {v: k for k, v in label_to_idx.items()}
idx_to_label[num_classes-1] = "UNK"

submission = pd.read_csv("/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/sample_submission.csv")
submission['path'] = submission['uuid'].apply(lambda x: os.path.join(CFG["image_dir"], f"{x}.png"))

ema_model.eval()
all_preds = []

with torch.no_grad():
    for i in tqdm(range(0, len(submission), CFG["batch_size"]), desc="Predicting"):
        batch_paths = submission['path'].iloc[i:i+CFG["batch_size"]].tolist()
        imgs_loaded = []
        for pth in batch_paths:
            with Image.open(pth) as im:
                imgs_loaded.append(im.convert("RGB"))
        batch_sum = None
        for tform in tta_transforms:
            imgs = torch.stack([tform(img) for img in imgs_loaded]).to(device, non_blocking=True)
            outputs = ema_model(imgs)
            probs = torch.softmax(outputs, dim=1)
            batch_sum = probs if batch_sum is None else (batch_sum + probs)
        avg_probs = batch_sum / len(tta_transforms)
        top2_probs, top2_idxs = avg_probs.topk(2, dim=1)
        final_idx = [
            t1.item() if (t1_prob.item() >= CFG["unk_threshold"] 
                          and (t1_prob - t2_prob).item() >= CFG["unk_margin"]) 
            else num_classes - 1
            for t1_prob, t2_prob, t1 in zip(top2_probs[:,0], top2_probs[:,1], top2_idxs[:,0])
        ]
        all_preds.extend([idx_to_label[j] for j in final_idx])

submission['style'] = all_preds
out_path = "submission_TTA_EMA_UNK_final.csv"
submission.to_csv(out_path, index=False)
print(f"🎯 Saved to {out_path}")

.2

0.85329

In [None]:
import os, copy, random, warnings
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from torchvision.transforms import AutoAugment, AutoAugmentPolicy
from PIL import Image, ImageFile
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from torch.optim.swa_utils import AveragedModel, SWALR

# ============================
# 1) CONFIG & SEED
# ============================
CFG = {
    "image_dir": "/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/image",
    "batch_size": 32,
    "epochs_stage1": 5,
    "epochs_stage2": 25,
    "patience": 6,
    "lr": 3e-4,
    "weight_decay": 1e-4,
    "label_smoothing": 0.1,
    "ema_decay": 0.999,
    "seed": 42,
    "unk_threshold": 0.4,
    "unk_margin": 0.1,
    "temperature": 1.5,
    "mixup_alpha": 0.4
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
use_amp = (device.type == "cuda")

def seed_everything(seed=42):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG["seed"])
ImageFile.LOAD_TRUNCATED_IMAGES = True
warnings.filterwarnings("ignore", category=UserWarning)

# ============================
# 2) Dataset
# ============================
class ArtDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.loc[idx]
        img_path = row['path']
        label = row['label']
        with Image.open(img_path) as im:
            image = im.convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

# ============================
# 3) Prepare Data
# ============================
train_csv = "/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/train.csv"
df = pd.read_csv(train_csv)
df['path'] = df['uuid'].apply(lambda x: os.path.join(CFG["image_dir"], f"{x}.png"))

label_to_idx = {label: idx for idx, label in enumerate(sorted(df['style'].unique()))}
idx_to_label = {v: k for k, v in label_to_idx.items()}
df['label'] = df['style'].map(label_to_idx).astype(int)

train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=CFG["seed"])

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.7, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    AutoAugment(policy=AutoAugmentPolicy.IMAGENET),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),
                         (0.229, 0.224, 0.225))
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),
                         (0.229, 0.224, 0.225))
])

num_workers = min(4, os.cpu_count() or 1)
use_persistent = (num_workers > 0) and (device.type == "cuda")

train_loader = DataLoader(
    ArtDataset(train_df, train_transform),
    batch_size=CFG["batch_size"], shuffle=True, num_workers=num_workers,
    pin_memory=(device.type=="cuda"), persistent_workers=use_persistent
)
val_loader = DataLoader(
    ArtDataset(val_df, val_transform),
    batch_size=CFG["batch_size"], shuffle=False, num_workers=num_workers,
    pin_memory=(device.type=="cuda"), persistent_workers=use_persistent
)

# ============================
# 4) Model
# ============================
num_classes_orig = len(label_to_idx)
num_classes = num_classes_orig + 1  # เพิ่ม UNK
model = models.efficientnet_b4(weights=models.EfficientNet_B4_Weights.IMAGENET1K_V1)
in_features = model.classifier[1].in_features
model.classifier[1] = nn.Linear(in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss(label_smoothing=CFG["label_smoothing"])
scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
optimizer = optim.AdamW(model.parameters(), lr=CFG["lr"], weight_decay=CFG["weight_decay"])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CFG["epochs_stage1"]+CFG["epochs_stage2"])

ema_model = copy.deepcopy(model)
for p in ema_model.parameters():
    p.requires_grad = False

@torch.no_grad()
def update_ema(model, ema_model, decay):
    msd = model.state_dict()
    emsd = ema_model.state_dict()
    for k, v in msd.items():
        if v.dtype.is_floating_point:
            emsd[k].mul_(decay).add_(v, alpha=1 - decay)
        else:
            emsd[k].copy_(v)

# ============================
# 5) Mixup + CutMix
# ============================
def mixup_data(x, y, alpha=1.0):
    if alpha <= 0: return x, y, y, 1.0
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(x.device)
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

def rand_bbox(size, lam):
    W, H = size[2], size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w, cut_h = int(W*cut_rat), int(H*cut_rat)
    cx, cy = np.random.randint(W), np.random.randint(H)
    bbx1, bby1 = np.clip(cx-cut_w//2, 0, W), np.clip(cy-cut_h//2, 0, H)
    bbx2, bby2 = np.clip(cx+cut_w//2, 0, W), np.clip(cy+cut_h//2, 0, H)
    return bbx1, bby1, bbx2, bby2

def cutmix_data(x, y, alpha=1.0):
    if alpha <= 0: return x, y, y, 1.0
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(x.device)
    bbx1, bby1, bbx2, bby2 = rand_bbox(x.size(), lam)
    x[:, :, bbx1:bbx2, bby1:bby2] = x[index, :, bbx1:bbx2, bby1:bby2]
    y_a, y_b = y, y[index]
    lam = 1 - ((bbx2-bbx1)*(bby2-bby1)/(x.size(-1)*x.size(-2)))
    return x, y_a, y_b, lam

def cutmix_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

# ============================
# 6) Training Functions
# ============================
def freeze_backbone(model, freeze=True):
    for name, param in model.named_parameters():
        if "classifier" not in name:
            param.requires_grad = not freeze
        else:
            param.requires_grad = True

def train_one_epoch(loader, use_cutmix=False):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for images, labels in tqdm(loader, leave=False, desc="Train"):
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        if use_cutmix:
            images, y_a, y_b, lam = cutmix_data(images, labels, CFG["mixup_alpha"])
            criterion_fn = cutmix_criterion
        else:
            images, y_a, y_b, lam = mixup_data(images, labels, CFG["mixup_alpha"])
            criterion_fn = mixup_criterion
        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=use_amp):
            outputs = model(images)
            loss = criterion_fn(criterion, outputs, y_a, y_b, lam)
        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
        scaler.step(optimizer)
        scaler.update()
        update_ema(model, ema_model, CFG["ema_decay"])
        running_loss += loss.item()
        preds = outputs.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    return running_loss / max(1, len(loader)), 100.0 * correct / max(1, total)

@torch.no_grad()
def evaluate(loader, eval_model):
    eval_model.eval()
    running_loss, correct, total = 0.0, 0, 0
    for images, labels in tqdm(loader, leave=False, desc="Val"):
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        outputs = eval_model(images)
        loss = criterion(outputs, labels)
        running_loss += loss.item()
        preds = outputs.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    return running_loss / max(1, len(loader)), 100.0 * correct / max(1, total)

# ============================
# 7) Training Loop + SWA
# ============================
best_acc, patience_counter = 0.0, 0
best_model_wts = copy.deepcopy(model.state_dict())
swa_model = AveragedModel(model)
swa_start = CFG["epochs_stage1"]
swa_scheduler = SWALR(optimizer, swa_lr=1e-5)

def train_model(stage_epochs, freeze_back=True, use_cutmix=False):
    global best_acc, patience_counter, best_model_wts
    freeze_backbone(model, freeze_back)
    for epoch in range(stage_epochs):
        print(f"\nEpoch {epoch+1}/{stage_epochs} | {'Freeze' if freeze_back else 'Unfreeze'} Backbone")
        train_loss, train_acc = train_one_epoch(train_loader, use_cutmix=use_cutmix)
        val_loss, val_acc = evaluate(val_loader, model)

        if not freeze_back and epoch >= swa_start:
            swa_model.update_parameters(model)
            swa_scheduler.step()
        else:
            scheduler.step()

        print(f"Train Loss {train_loss:.4f} | Acc {train_acc:.2f}% || Val Loss {val_loss:.4f} | Acc {val_acc:.2f}%")
        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(best_model_wts, "best_model.pth")
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= CFG["patience"]:
                print("Early stopping.")
                return
            elif patience_counter == 3:
                for g in optimizer.param_groups:
                    g['lr'] *= 0.2
                print(f"🔻 Reduce LR to {optimizer.param_groups[0]['lr']}")

if device.type == "cuda":
    torch.backends.cudnn.benchmark = True

# ============================
# 8) Run Training
# ============================
train_model(CFG["epochs_stage1"], freeze_back=True, use_cutmix=False)
train_model(CFG["epochs_stage2"], freeze_back=False, use_cutmix=True)

torch.optim.swa_utils.update_bn(train_loader, swa_model, device=device)
torch.save(swa_model.module.state_dict(), "swa_model.pth")
model.load_state_dict(best_model_wts)
ema_model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), "last_model.pth")
print(f"✅ Best Val Acc: {best_acc:.2f}% | saved best_model.pth, last_model.pth & swa_model.pth")

# ============================
# 9) Optimized Inference + TTA + UNK
# ============================
tta_transforms = [
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))
    ]),
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomRotation(15),
        transforms.RandomHorizontalFlip(p=1.0),
        transforms.ToTensor(),
        transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))
    ]),
]

idx_to_label[num_classes-1] = "UNK"
submission = pd.read_csv("/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/sample_submission.csv")
submission['path'] = submission['uuid'].apply(lambda x: os.path.join(CFG["image_dir"], f"{x}.png"))

ema_model.eval()
all_preds = []

with torch.no_grad():
    for i in tqdm(range(0, len(submission), CFG["batch_size"]), desc="Predicting"):
        batch_paths = submission['path'].iloc[i:i+CFG["batch_size"]].tolist()
        imgs_loaded = [Image.open(p).convert("RGB") for p in batch_paths]
        batch_sum = None
        for tform in tta_transforms:
            imgs = torch.stack([tform(img) for img in imgs_loaded]).to(device)
            outputs = ema_model(imgs)
            probs = torch.softmax(outputs / CFG["temperature"], dim=1)
            batch_sum = probs if batch_sum is None else batch_sum + probs
        avg_probs = batch_sum / len(tta_transforms)
        top2_probs, top2_idxs = avg_probs.topk(2, dim=1)
        final_idx = [
            t1.item() if (t1_prob.item() >= CFG["unk_threshold"] 
                          and (t1_prob - t2_prob).item() >= CFG["unk_margin"]) 
            else num_classes-1
            for t1_prob, t2_prob, t1 in zip(top2_probs[:,0], top2_probs[:,1], top2_idxs[:,0])
        ]
        all_preds.extend([idx_to_label[j] for j in final_idx])

submission['style'] = all_preds
out_path = "submission_TTA_EMA_UNK_optimized.csv"
submission.to_csv(out_path, index=False)
print(f"🎯 Saved to {out_path}")


In [None]:
# ============================
# 1) IMPORTS
# ============================
import os, copy, random, warnings
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from torchvision.transforms import AutoAugment, AutoAugmentPolicy
from PIL import Image, ImageFile
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from torch.optim.swa_utils import AveragedModel, SWALR

# ============================
# 2) CONFIG & SEED
# ============================
CFG = {
    "image_dir": "/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/image",
    "batch_size": 32,
    "epochs_stage1": 5,
    "epochs_stage2": 25,
    "patience": 6,
    "lr": 3e-4,
    "weight_decay": 1e-4,
    "label_smoothing": 0.1,
    "ema_decay": 0.999,
    "seed": 42,
    "unk_threshold": 0.4,
    "unk_margin": 0.1,
    "temperature": 1.5,
    "mixup_alpha": 0.4
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
use_amp = (device.type == "cuda")

def seed_everything(seed=42):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG["seed"])
ImageFile.LOAD_TRUNCATED_IMAGES = True
warnings.filterwarnings("ignore", category=UserWarning)

# ============================
# 3) DATASET
# ============================
class ArtDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.loc[idx]
        img_path = row['path']
        label = row['label']
        with Image.open(img_path) as im:
            image = im.convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

# ============================
# 4) PREPARE DATA
# ============================
train_csv = "/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/train.csv"
df = pd.read_csv(train_csv)
df['path'] = df['uuid'].apply(lambda x: os.path.join(CFG["image_dir"], f"{x}.png"))

label_to_idx = {label: idx for idx, label in enumerate(sorted(df['style'].unique()))}
idx_to_label = {v: k for k, v in label_to_idx.items()}
df['label'] = df['style'].map(label_to_idx).astype(int)

train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=CFG["seed"])

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.7, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    AutoAugment(policy=AutoAugmentPolicy.IMAGENET),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),
                         (0.229, 0.224, 0.225))
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),
                         (0.229, 0.224, 0.225))
])

num_workers = os.cpu_count()  # ใช้เต็มที่
use_persistent = (num_workers > 0) and (device.type == "cuda")

train_loader = DataLoader(
    ArtDataset(train_df, train_transform),
    batch_size=CFG["batch_size"], shuffle=True, num_workers=num_workers,
    pin_memory=(device.type=="cuda"), persistent_workers=use_persistent
)
val_loader = DataLoader(
    ArtDataset(val_df, val_transform),
    batch_size=CFG["batch_size"], shuffle=False, num_workers=num_workers,
    pin_memory=(device.type=="cuda"), persistent_workers=use_persistent
)

# ============================
# 5) MODEL
# ============================
num_classes_orig = len(label_to_idx)
num_classes = num_classes_orig + 1  # เพิ่ม UNK
model = models.efficientnet_b4(weights=models.EfficientNet_B4_Weights.IMAGENET1K_V1)
in_features = model.classifier[1].in_features
model.classifier[1] = nn.Linear(in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss(label_smoothing=CFG["label_smoothing"])
scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
optimizer = optim.AdamW(model.parameters(), lr=CFG["lr"], weight_decay=CFG["weight_decay"])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CFG["epochs_stage1"]+CFG["epochs_stage2"])

ema_model = copy.deepcopy(model)
for p in ema_model.parameters():
    p.requires_grad = False

@torch.no_grad()
def update_ema(model, ema_model, decay):
    msd = model.state_dict()
    emsd = ema_model.state_dict()
    for k, v in msd.items():
        if v.dtype.is_floating_point:
            emsd[k].mul_(decay).add_(v.detach(), alpha=1 - decay)
        else:
            emsd[k].copy_(v)

# ============================
# 6) MIXUP + CUTMIX
# ============================
def mixup_data(x, y, alpha=1.0):
    if alpha <= 0: return x, y, y, 1.0
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(x.device)
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

def rand_bbox(size, lam):
    W, H = size[3], size[2]
    cut_rat = np.sqrt(1. - lam)
    cut_w, cut_h = int(W*cut_rat), int(H*cut_rat)
    cx, cy = np.random.randint(W), np.random.randint(H)
    bbx1, bby1 = np.clip(cx-cut_w//2, 0, W), np.clip(cy-cut_h//2, 0, H)
    bbx2, bby2 = np.clip(cx+cut_w//2, 0, W), np.clip(cy+cut_h//2, 0, H)
    return bbx1, bby1, bbx2, bby2

def cutmix_data(x, y, alpha=1.0):
    if alpha <= 0: return x, y, y, 1.0
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(x.device)
    bbx1, bby1, bbx2, bby2 = rand_bbox(x.size(), lam)
    x[:, :, bby1:bby2, bbx1:bbx2] = x[index, :, bby1:bby2, bbx1:bbx2]
    y_a, y_b = y, y[index]
    lam = 1 - ((bbx2-bbx1)*(bby2-bby1)/(x.size(-1)*x.size(-2)))
    return x, y_a, y_b, lam

def cutmix_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

# ============================
# 7) TRAINING
# ============================
def freeze_backbone(model, freeze=True):
    for name, param in model.named_parameters():
        if "classifier" not in name:
            param.requires_grad = not freeze
        else:
            param.requires_grad = True

def train_one_epoch(loader, use_cutmix=False):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for images, labels in tqdm(loader, leave=False, desc="Train"):
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        if use_cutmix:
            images, y_a, y_b, lam = cutmix_data(images, labels, CFG["mixup_alpha"])
            criterion_fn = cutmix_criterion
        else:
            images, y_a, y_b, lam = mixup_data(images, labels, CFG["mixup_alpha"])
            criterion_fn = mixup_criterion

        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=use_amp):
            outputs = model(images)
            loss = criterion_fn(criterion, outputs, y_a, y_b, lam)
        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0)
        scaler.step(optimizer)
        scaler.update()
        update_ema(model, ema_model, CFG["ema_decay"])
        running_loss += loss.item()
        preds = outputs.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    return running_loss / max(1, len(loader)), 100.0 * correct / max(1, total)

@torch.no_grad()
def evaluate(loader, eval_model):
    eval_model.eval()
    running_loss, correct, total = 0.0, 0, 0
    for images, labels in tqdm(loader, leave=False, desc="Val"):
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        outputs = eval_model(images)
        loss = criterion(outputs, labels)
        running_loss += loss.item()
        preds = outputs.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    return running_loss / max(1, len(loader)), 100.0 * correct / max(1, total)

best_acc, patience_counter = 0.0, 0
best_model_wts = copy.deepcopy(model.state_dict())
swa_model = AveragedModel(model)
swa_start = CFG["epochs_stage1"]
swa_scheduler = SWALR(optimizer, swa_lr=1e-5)

def train_model(stage_epochs, freeze_back=True, use_cutmix=False):
    global best_acc, patience_counter, best_model_wts
    freeze_backbone(model, freeze_back)
    for epoch in range(stage_epochs):
        print(f"\nEpoch {epoch+1}/{stage_epochs} | {'Freeze' if freeze_back else 'Unfreeze'} Backbone")
        train_loss, train_acc = train_one_epoch(train_loader, use_cutmix=use_cutmix)
        val_loss, val_acc = evaluate(val_loader, model)

        if not freeze_back and (epoch >= swa_start):
            swa_model.update_parameters(model)
            swa_scheduler.step()
        else:
            scheduler.step()

        print(f"Train Loss {train_loss:.4f} | Acc {train_acc:.2f}% || Val Loss {val_loss:.4f} | Acc {val_acc:.2f}%")
        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(best_model_wts, "best_model.pth")
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= CFG["patience"]:
                print("Early stopping.")
                return
            elif patience_counter == 3:
                for g in optimizer.param_groups:
                    g['lr'] *= 0.2
                print(f"🔻 Reduce LR to {optimizer.param_groups[0]['lr']}")

# ============================
# 8) RUN TRAINING
# ============================
if device.type == "cuda":
    torch.backends.cudnn.benchmark = True

train_model(CFG["epochs_stage1"], freeze_back=True, use_cutmix=False)
train_model(CFG["epochs_stage2"], freeze_back=False, use_cutmix=True)

torch.optim.swa_utils.update_bn(train_loader, swa_model, device=device)
torch.save(swa_model.module.state_dict(), "swa_model.pth")
model.load_state_dict(best_model_wts)
ema_model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), "last_model.pth")
print(f"✅ Best Val Acc: {best_acc:.2f}% | saved best_model.pth, last_model.pth & swa_model.pth")

# ============================
# 9) INFERENCE + TTA + UNK
# ============================
idx_to_label[num_classes-1] = "UNK"

def predict_with_tta_paths(paths, eval_model, tta_transforms, batch_size=CFG["batch_size"], 
                           temperature=CFG["temperature"], dynamic_unk=True):
    eval_model.eval()
    preds_out = []
    unk_count, seen = 0, 0
    unk_threshold = CFG["unk_threshold"]
    unk_margin = CFG["unk_margin"]

    with torch.no_grad():
        for i in tqdm(range(0, len(paths), batch_size), desc="Predicting"):
            batch_paths = paths[i:i+batch_size]
            imgs_loaded = [Image.open(p).convert("RGB") for p in batch_paths]
            batch_sum = None
            for tform in tta_transforms:
                imgs = torch.stack([tform(img) for img in imgs_loaded]).to(device, non_blocking=True)
                with torch.cuda.amp.autocast(enabled=use_amp):
                    outputs = eval_model(imgs)
                    probs = torch.softmax(outputs / temperature, dim=1)
                batch_sum = probs if batch_sum is None else (batch_sum + probs)
            avg_probs = batch_sum / len(tta_transforms)
            top2_probs, top2_idxs = avg_probs.topk(2, dim=1)
            final_idx = []
            for t1_prob, t2_prob, t1 in zip(top2_probs[:, 0], top2_probs[:, 1], top2_idxs[:, 0]):
                t1p, t2p = t1_prob.item(), t2_prob.item()
                if (t1p >= unk_threshold) and ((t1p - t2p) >= unk_margin):
                    final_idx.append(t1.item())
                else:
                    final_idx.append(num_classes - 1)
            if dynamic_unk:
                seen += len(final_idx)
                unk_count += sum(1 for j in final_idx if j == num_classes - 1)
                curr_unk_rate = unk_count / max(1, seen)
                if curr_unk_rate > 0.30:
                    unk_threshold = max(0.20, unk_threshold - 0.05)
                elif curr_unk_rate < 0.05:
                    unk_threshold = min(0.80, unk_threshold + 0.02)
            preds_out.extend([idx_to_label[j] for j in final_idx])

    if dynamic_unk:
        print(f"ℹ️ Final UNK rate: {unk_count/max(1,seen):.2%} | final unk_threshold used: {unk_threshold:.2f}")
    return preds_out

submission = pd.read_csv("/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/sample_submission.csv")
submission['path'] = submission['uuid'].apply(lambda x: os.path.join(CFG["image_dir"], f"{x}.png"))

tta_transforms = [
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))
    ]),
    transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomRotation(15),
        transforms.RandomHorizontalFlip(p=1.0),
        transforms.ToTensor(),
        transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))
    ]),
]

preds = predict_with_tta_paths(
    submission['path'].tolist(),
    eval_model=ema_model,
    tta_transforms=tta_transforms,
    batch_size=CFG["batch_size"],
    temperature=CFG["temperature"],
    dynamic_unk=True
)

submission['style'] = preds
out_path = "submission_TTA_EMA_UNK_optimized.csv"
submission.to_csv(out_path, index=False)
print(f"🎯 Saved to {out_path}")


0.87108


In [None]:
# ============================
# 1) IMPORTS
# ============================
import os, copy, random, warnings
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from torchvision.transforms import AutoAugment, AutoAugmentPolicy
from PIL import Image, ImageFile
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from torch.optim.swa_utils import AveragedModel, SWALR
from timm.models.layers import DropPath

warnings.filterwarnings("ignore", category=UserWarning)
ImageFile.LOAD_TRUNCATED_IMAGES = True

# ============================
# 2) CONFIG & SEED
# ============================
CFG = {
    "image_dir": "/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/image",
    "train_csv": "/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/train.csv",
    "submission_csv": "/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/sample_submission.csv",
    "batch_size": 32,
    "epochs_stage1": 5,
    "epochs_stage2": 25,
    "patience": 6,
    "lr": 3e-4,
    "weight_decay": 1e-4,
    "label_smoothing": 0.1,
    "ema_decay": 0.999,
    "seed": 42,
    "unk_threshold": 0.4,
    "unk_margin": 0.1,
    "temperature": 1.5,
    "mixup_alpha": 0.4
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
use_amp = (device.type=="cuda")

def seed_everything(seed=42):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG["seed"])

# ============================
# 3) DATASET
# ============================
class ArtDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.loc[idx]
        img_path = row['path']
        label = row['label']
        with Image.open(img_path) as im:
            image = im.convert("RGB")
        if self.transform: image = self.transform(image)
        return image, label

# ============================
# 4) DATA PREP
# ============================
df = pd.read_csv(CFG["train_csv"])
df['path'] = df['uuid'].apply(lambda x: os.path.join(CFG["image_dir"], f"{x}.png"))
label_to_idx = {label: idx for idx, label in enumerate(sorted(df['style'].unique()))}
idx_to_label = {v:k for k,v in label_to_idx.items()}
df['label'] = df['style'].map(label_to_idx).astype(int)

train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=CFG["seed"])

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.7,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    AutoAugment(policy=AutoAugmentPolicy.IMAGENET),
    transforms.ToTensor(),
    transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))
])
val_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))
])

num_workers = os.cpu_count()
use_persistent = (num_workers>0) and (device.type=="cuda")

train_loader = DataLoader(ArtDataset(train_df, train_transform), batch_size=CFG["batch_size"],
                          shuffle=True, num_workers=num_workers, pin_memory=(device.type=="cuda"),
                          persistent_workers=use_persistent)
val_loader = DataLoader(ArtDataset(val_df, val_transform), batch_size=CFG["batch_size"],
                        shuffle=False, num_workers=num_workers, pin_memory=(device.type=="cuda"),
                        persistent_workers=use_persistent)

# ============================
# 5) MODEL: EfficientNetB4 + Dropout + Stochastic Depth
# ============================
class EfficientNetB4_Enhanced(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        base_model = models.efficientnet_b4(weights=models.EfficientNet_B4_Weights.IMAGENET1K_V1)
        # stochastic depth
        for name,module in base_model.features.named_modules():
            if hasattr(module,'drop_path'): module.drop_path = DropPath(0.2)
        in_features = base_model.classifier[1].in_features
        self.model = base_model
        self.model.classifier[1] = nn.Sequential(nn.Dropout(0.5), nn.Linear(in_features,num_classes))
    def forward(self,x): return self.model(x)

num_classes_orig = len(label_to_idx)
num_classes = num_classes_orig + 1  # เพิ่ม UNK
model = EfficientNetB4_Enhanced(num_classes).to(device)

criterion = nn.CrossEntropyLoss(label_smoothing=CFG["label_smoothing"])
scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
optimizer = optim.AdamW(model.parameters(), lr=CFG["lr"], weight_decay=CFG["weight_decay"])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CFG["epochs_stage1"]+CFG["epochs_stage2"])

ema_model = copy.deepcopy(model)
for p in ema_model.parameters(): p.requires_grad=False
swa_model = AveragedModel(model)
swa_start = CFG["epochs_stage1"]
swa_scheduler = SWALR(optimizer, swa_lr=1e-5)

@torch.no_grad()
def update_ema(model, ema_model, decay):
    msd = model.state_dict()
    emsd = ema_model.state_dict()
    for k,v in msd.items():
        if v.dtype.is_floating_point: emsd[k].mul_(decay).add_(v.detach(), alpha=1-decay)
        else: emsd[k].copy_(v)

# ============================
# 6) MIXUP + CUTMIX ADVANCED
# ============================
def mixup_data(x, y, alpha=0.4):
    lam = np.random.beta(alpha, alpha) if alpha>0 else 1.0
    index = torch.randperm(x.size(0)).to(x.device)
    return lam*x + (1-lam)*x[index,:], y, y[index], lam

def cutmix_data(x, y, alpha=0.4):
    lam = np.random.beta(alpha, alpha) if alpha>0 else 1.0
    index = torch.randperm(x.size(0)).to(x.device)
    W,H = x.size(3), x.size(2)
    cut_rat = np.sqrt(1-lam)
    cut_w, cut_h = int(W*cut_rat), int(H*cut_rat)
    cx,cy = np.random.randint(W), np.random.randint(H)
    bbx1, bby1 = np.clip(cx-cut_w//2,0,W), np.clip(cy-cut_h//2,0,H)
    bbx2, bby2 = np.clip(cx+cut_w//2,0,W), np.clip(cy+cut_h//2,0,H)
    x[:,:,bby1:bby2,bbx1:bbx2] = x[index,:,bby1:bby2,bbx1:bbx2]
    lam = 1-((bbx2-bbx1)*(bby2-bby1)/(W*H))
    return x, y, y[index], lam

def mixup_cutmix_prob(x,y,alpha=0.4,prob_cutmix=0.5):
    return cutmix_data(x,y,alpha) if np.random.rand()<prob_cutmix else mixup_data(x,y,alpha)

def mixup_cutmix_criterion(pred,y_a,y_b,lam):
    return lam*criterion(pred,y_a)+(1-lam)*criterion(pred,y_b)

# ============================
# 7) TRAINING FUNCTIONS
# ============================
def freeze_backbone(model, freeze=True):
    for name,param in model.named_parameters():
        param.requires_grad = not freeze if "classifier" not in name else True

def train_one_epoch_adv(loader):
    model.train(); running_loss, correct, total=0.,0,0
    for images, labels in tqdm(loader, leave=False, desc="Train"):
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        images, y_a, y_b, lam = mixup_cutmix_prob(images, labels, CFG["mixup_alpha"], 0.5)
        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=use_amp):
            outputs = model(images)
            loss = mixup_cutmix_criterion(outputs,y_a,y_b,lam)
        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(),10.0)
        scaler.step(optimizer); scaler.update()
        update_ema(model, ema_model, CFG["ema_decay"])
        running_loss += loss.item()
        preds = outputs.argmax(1)
        correct += (preds==labels).sum().item()
        total += labels.size(0)
    return running_loss/max(1,len(loader)),100.*correct/max(1,total)

@torch.no_grad()
def evaluate(loader, eval_model):
    eval_model.eval(); running_loss, correct, total=0.,0,0
    for images, labels in tqdm(loader, leave=False, desc="Val"):
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        outputs = eval_model(images)
        loss = criterion(outputs, labels)
        running_loss += loss.item()
        preds = outputs.argmax(1)
        correct += (preds==labels).sum().item()
        total += labels.size(0)
    return running_loss/max(1,len(loader)),100.*correct/max(1,total)

best_acc, patience_counter = 0.,0
best_model_wts = copy.deepcopy(model.state_dict())

def train_model(stage_epochs, freeze_back=True):
    global best_acc, patience_counter, best_model_wts
    freeze_backbone(model, freeze_back)
    for epoch in range(stage_epochs):
        print(f"\nEpoch {epoch+1}/{stage_epochs} | {'Freeze' if freeze_back else 'Unfreeze'} Backbone")
        train_loss, train_acc = train_one_epoch_adv(train_loader)
        val_loss, val_acc = evaluate(val_loader, model)
        if not freeze_back and epoch>=swa_start: swa_model.update_parameters(model); swa_scheduler.step()
        else: scheduler.step()
        print(f"Train Loss {train_loss:.4f} | Acc {train_acc:.2f}% || Val Loss {val_loss:.4f} | Acc {val_acc:.2f}%")
        if val_acc>best_acc:
            best_acc=val_acc; best_model_wts=copy.deepcopy(model.state_dict())
            torch.save(best_model_wts,"best_model.pth"); patience_counter=0
        else:
            patience_counter+=1
            if patience_counter>=CFG["patience"]: print("Early stopping."); return
            elif patience_counter==3:
                for g in optimizer.param_groups: g['lr']*=0.2
                print(f"🔻 Reduce LR to {optimizer.param_groups[0]['lr']}")

# ============================
# 8) RUN TRAINING
# ============================
if device.type=="cuda": torch.backends.cudnn.benchmark=True
train_model(CFG["epochs_stage1"], freeze_back=True)
train_model(CFG["epochs_stage2"], freeze_back=False)
torch.optim.swa_utils.update_bn(train_loader, swa_model, device=device)

torch.save(swa_model.state_dict(), "swa_model.pth")
torch.save(model.state_dict(), "last_model.pth")
torch.save(best_model_wts, "best_model.pth")
model.load_state_dict(best_model_wts)
ema_model.load_state_dict(best_model_wts)
print(f"✅ Best Val Acc: {best_acc:.2f}% | saved best_model.pth, last_model.pth & swa_model.pth")

# ============================
# 9) INFERENCE + TTA + Dynamic UNK + Ensemble
# ============================
idx_to_label[num_classes-1] = "UNK"
def combine_ema_swa(ema_model, swa_model, base_model):
    final_model = copy.deepcopy(base_model)
    final_sd = final_model.state_dict()
    ema_sd = ema_model.state_dict(); swa_sd = swa_model.state_dict()
    for k in final_sd.keys():
        if k in ema_sd and k in swa_sd: final_sd[k].copy_(0.5*ema_sd[k]+0.5*swa_sd[k])
    final_model.load_state_dict(final_sd)
    return final_model

ensemble_model = combine_ema_swa(ema_model, swa_model, model).to(device)

tta_transforms = [
    transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor(), transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))]),
    transforms.Compose([transforms.Resize((224,224)), transforms.RandomHorizontalFlip(p=1.0), transforms.ToTensor(), transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))]),
    transforms.Compose([transforms.Resize((224,224)), transforms.RandomRotation(15), transforms.ToTensor(), transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))]),
    transforms.Compose([transforms.Resize((224,224)), transforms.ColorJitter(0.2,0.2,0.2,0.1), transforms.ToTensor(), transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))])
]

submission = pd.read_csv(CFG["submission_csv"])
submission['path'] = submission['uuid'].apply(lambda x: os.path.join(CFG["image_dir"], f"{x}.png"))

preds = predict_with_tta_paths(submission['path'].tolist(), eval_model=ensemble_model,
                               tta_transforms=tta_transforms, batch_size=CFG["batch_size"],
                               temperature=CFG["temperature"], dynamic_unk=True)

submission['style'] = preds
submission.to_csv("submission_TTA_EMA_SWA_UNK_Enhanced.csv", index=False)
print(f"🎯 Saved to submission_TTA_EMA_SWA_UNK_Enhanced.csv")


0.87277

In [3]:
# ============================
# 1) IMPORTS
# ============================
import os, copy, random, warnings
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from torchvision.transforms import AutoAugment, AutoAugmentPolicy
from PIL import Image, ImageFile
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from torch.optim.swa_utils import AveragedModel, SWALR
from timm.models.layers import DropPath

warnings.filterwarnings("ignore", category=UserWarning)
ImageFile.LOAD_TRUNCATED_IMAGES = True

# ============================
# 2) CONFIG & SEED
# ============================
CFG = {
    "image_dir": "/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/image",
    "train_csv": "/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/train.csv",
    "submission_csv": "/kaggle/input/saig-tech-mastery-2025-art-style-classification/competition/sample_submission.csv",
    "batch_size": 32,
    "epochs_stage1": 5,
    "epochs_stage2": 25,
    "patience": 6,
    "lr": 3e-4,
    "weight_decay": 1e-4,
    "label_smoothing": 0.1,
    "ema_decay": 0.999,
    "seed": 42,
    "unk_threshold": 0.4,
    "unk_margin": 0.1,
    "temperature": 1.5,
    "mixup_alpha": 0.4
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
use_amp = (device.type=="cuda")

def seed_everything(seed=42):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG["seed"])

# ============================
# 3) DATASET
# ============================
class ArtDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.loc[idx]
        img_path = row['path']
        label = row['label']
        with Image.open(img_path) as im:
            image = im.convert("RGB")
        if self.transform: image = self.transform(image)
        return image, label

# ============================
# 4) DATA PREP
# ============================
df = pd.read_csv(CFG["train_csv"])
df['path'] = df['uuid'].apply(lambda x: os.path.join(CFG["image_dir"], f"{x}.png"))
label_to_idx = {label: idx for idx, label in enumerate(sorted(df['style'].unique()))}
idx_to_label = {v:k for k,v in label_to_idx.items()}
df['label'] = df['style'].map(label_to_idx).astype(int)

train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=CFG["seed"])

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.7,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    AutoAugment(policy=AutoAugmentPolicy.IMAGENET),
    transforms.ToTensor(),
    transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))
])
val_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))
])

num_workers = os.cpu_count()
use_persistent = (num_workers>0) and (device.type=="cuda")

train_loader = DataLoader(ArtDataset(train_df, train_transform), batch_size=CFG["batch_size"],
                          shuffle=True, num_workers=num_workers, pin_memory=(device.type=="cuda"),
                          persistent_workers=use_persistent)
val_loader = DataLoader(ArtDataset(val_df, val_transform), batch_size=CFG["batch_size"],
                        shuffle=False, num_workers=num_workers, pin_memory=(device.type=="cuda"),
                        persistent_workers=use_persistent)

# ============================
# 5) MODEL: EfficientNetB4 + Dropout + Stochastic Depth
# ============================
class EfficientNetB4_Enhanced(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        base_model = models.efficientnet_b4(weights=models.EfficientNet_B4_Weights.IMAGENET1K_V1)
        # stochastic depth
        for name,module in base_model.features.named_modules():
            if hasattr(module,'drop_path'): module.drop_path = DropPath(0.2)
        in_features = base_model.classifier[1].in_features
        self.model = base_model
        self.model.classifier[1] = nn.Sequential(nn.Dropout(0.5), nn.Linear(in_features,num_classes))
    def forward(self,x): return self.model(x)

num_classes_orig = len(label_to_idx)
num_classes = num_classes_orig + 1  # เพิ่ม UNK
model = EfficientNetB4_Enhanced(num_classes).to(device)

criterion = nn.CrossEntropyLoss(label_smoothing=CFG["label_smoothing"])
scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
optimizer = optim.AdamW(model.parameters(), lr=CFG["lr"], weight_decay=CFG["weight_decay"])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CFG["epochs_stage1"]+CFG["epochs_stage2"])

ema_model = copy.deepcopy(model)
for p in ema_model.parameters(): p.requires_grad=False
swa_model = AveragedModel(model)
swa_start = CFG["epochs_stage1"]
swa_scheduler = SWALR(optimizer, swa_lr=1e-5)

@torch.no_grad()
def update_ema(model, ema_model, decay):
    msd = model.state_dict()
    emsd = ema_model.state_dict()
    for k,v in msd.items():
        if v.dtype.is_floating_point: emsd[k].mul_(decay).add_(v.detach(), alpha=1-decay)
        else: emsd[k].copy_(v)

# ============================
# 6) MIXUP + CUTMIX ADVANCED
# ============================
def mixup_data(x, y, alpha=0.4):
    lam = np.random.beta(alpha, alpha) if alpha>0 else 1.0
    index = torch.randperm(x.size(0)).to(x.device)
    return lam*x + (1-lam)*x[index,:], y, y[index], lam

def cutmix_data(x, y, alpha=0.4):
    lam = np.random.beta(alpha, alpha) if alpha>0 else 1.0
    index = torch.randperm(x.size(0)).to(x.device)
    W,H = x.size(3), x.size(2)
    cut_rat = np.sqrt(1-lam)
    cut_w, cut_h = int(W*cut_rat), int(H*cut_rat)
    cx,cy = np.random.randint(W), np.random.randint(H)
    bbx1, bby1 = np.clip(cx-cut_w//2,0,W), np.clip(cy-cut_h//2,0,H)
    bbx2, bby2 = np.clip(cx+cut_w//2,0,W), np.clip(cy+cut_h//2,0,H)
    x[:,:,bby1:bby2,bbx1:bbx2] = x[index,:,bby1:bby2,bbx1:bbx2]
    lam = 1-((bbx2-bbx1)*(bby2-bby1)/(W*H))
    return x, y, y[index], lam

def mixup_cutmix_prob(x,y,alpha=0.4,prob_cutmix=0.5):
    return cutmix_data(x,y,alpha) if np.random.rand()<prob_cutmix else mixup_data(x,y,alpha)

def mixup_cutmix_criterion(pred,y_a,y_b,lam):
    return lam*criterion(pred,y_a)+(1-lam)*criterion(pred,y_b)

# ============================
# 7) TRAINING FUNCTIONS
# ============================
def freeze_backbone(model, freeze=True):
    for name,param in model.named_parameters():
        param.requires_grad = not freeze if "classifier" not in name else True

def train_one_epoch_adv(loader):
    model.train(); running_loss, correct, total=0.,0,0
    for images, labels in tqdm(loader, leave=False, desc="Train"):
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        images, y_a, y_b, lam = mixup_cutmix_prob(images, labels, CFG["mixup_alpha"], 0.5)
        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=use_amp):
            outputs = model(images)
            loss = mixup_cutmix_criterion(outputs,y_a,y_b,lam)
        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(),10.0)
        scaler.step(optimizer); scaler.update()
        update_ema(model, ema_model, CFG["ema_decay"])
        running_loss += loss.item()
        preds = outputs.argmax(1)
        correct += (preds==labels).sum().item()
        total += labels.size(0)
    return running_loss/max(1,len(loader)),100.*correct/max(1,total)

@torch.no_grad()
def evaluate(loader, eval_model):
    eval_model.eval(); running_loss, correct, total=0.,0,0
    for images, labels in tqdm(loader, leave=False, desc="Val"):
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        outputs = eval_model(images)
        loss = criterion(outputs, labels)
        running_loss += loss.item()
        preds = outputs.argmax(1)
        correct += (preds==labels).sum().item()
        total += labels.size(0)
    return running_loss/max(1,len(loader)),100.*correct/max(1,total)

best_acc, patience_counter = 0.,0
best_model_wts = copy.deepcopy(model.state_dict())

def train_model(stage_epochs, freeze_back=True):
    global best_acc, patience_counter, best_model_wts
    freeze_backbone(model, freeze_back)
    for epoch in range(stage_epochs):
        print(f"\nEpoch {epoch+1}/{stage_epochs} | {'Freeze' if freeze_back else 'Unfreeze'} Backbone")
        train_loss, train_acc = train_one_epoch_adv(train_loader)
        val_loss, val_acc = evaluate(val_loader, model)
        if not freeze_back and epoch>=swa_start: swa_model.update_parameters(model); swa_scheduler.step()
        else: scheduler.step()
        print(f"Train Loss {train_loss:.4f} | Acc {train_acc:.2f}% || Val Loss {val_loss:.4f} | Acc {val_acc:.2f}%")
        if val_acc>best_acc:
            best_acc=val_acc; best_model_wts=copy.deepcopy(model.state_dict())
            torch.save(best_model_wts,"best_model.pth"); patience_counter=0
        else:
            patience_counter+=1
            if patience_counter>=CFG["patience"]: print("Early stopping."); return
            elif patience_counter==3:
                for g in optimizer.param_groups: g['lr']*=0.2
                print(f"🔻 Reduce LR to {optimizer.param_groups[0]['lr']}")

# ============================
# 8) RUN TRAINING
# ============================
if device.type=="cuda": torch.backends.cudnn.benchmark=True
train_model(CFG["epochs_stage1"], freeze_back=True)
train_model(CFG["epochs_stage2"], freeze_back=False)
torch.optim.swa_utils.update_bn(train_loader, swa_model, device=device)

torch.save(swa_model.state_dict(), "swa_model.pth")
torch.save(model.state_dict(), "last_model.pth")
torch.save(best_model_wts, "best_model.pth")
model.load_state_dict(best_model_wts)
ema_model.load_state_dict(best_model_wts)
print(f"✅ Best Val Acc: {best_acc:.2f}% | saved best_model.pth, last_model.pth & swa_model.pth")

# ============================
# 9) INFERENCE + TTA + Dynamic UNK + Ensemble
# ============================
idx_to_label[num_classes-1] = "UNK"
def combine_ema_swa(ema_model, swa_model, base_model):
    final_model = copy.deepcopy(base_model)
    final_sd = final_model.state_dict()
    ema_sd = ema_model.state_dict(); swa_sd = swa_model.state_dict()
    for k in final_sd.keys():
        if k in ema_sd and k in swa_sd: final_sd[k].copy_(0.5*ema_sd[k]+0.5*swa_sd[k])
    final_model.load_state_dict(final_sd)
    return final_model

ensemble_model = combine_ema_swa(ema_model, swa_model, model).to(device)

tta_transforms = [
    transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor(), transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))]),
    transforms.Compose([transforms.Resize((224,224)), transforms.RandomHorizontalFlip(p=1.0), transforms.ToTensor(), transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))]),
    transforms.Compose([transforms.Resize((224,224)), transforms.RandomRotation(15), transforms.ToTensor(), transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))]),
    transforms.Compose([transforms.Resize((224,224)), transforms.ColorJitter(0.2,0.2,0.2,0.1), transforms.ToTensor(), transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))])
]

# ============================
# 10) PREDICT FUNCTION
# ============================
@torch.no_grad()
def predict_with_tta_paths(image_paths, eval_model, tta_transforms, batch_size, temperature=1.0, dynamic_unk=True):
    eval_model.eval()
    preds_all = []

    for transform in tta_transforms:
        dataset = ArtDataset(
            pd.DataFrame({"path": image_paths, "label": [0]*len(image_paths)}),
            transform=transform
        )
        loader = DataLoader(dataset, batch_size=batch_size, shuffle=False,
                            num_workers=os.cpu_count(),
                            pin_memory=(device.type=="cuda"))

        probs_list = []
        for images, _ in tqdm(loader, leave=False, desc="TTA"):
            images = images.to(device, non_blocking=True)
            outputs = eval_model(images) / temperature
            probs = torch.softmax(outputs, dim=1)
            probs_list.append(probs.cpu().numpy())
        
        preds_all.append(np.concatenate(probs_list, axis=0))
        torch.cuda.empty_cache()  # cleanup GPU memory

    # average probs across TTA runs
    probs_mean = np.mean(preds_all, axis=0)

    if dynamic_unk:
        max_probs = probs_mean.max(axis=1)
        preds = probs_mean.argmax(axis=1)
        unk_idx = num_classes - 1
        preds[max_probs < CFG["unk_threshold"]] = unk_idx
    else:
        preds = probs_mean.argmax(axis=1)

    return [idx_to_label[p] for p in preds]

# ============================
# 11) RUN FINAL INFERENCE
# ============================
submission = pd.read_csv(CFG["submission_csv"])
submission['path'] = submission['uuid'].apply(lambda x: os.path.join(CFG["image_dir"], f"{x}.png"))

preds = predict_with_tta_paths(
    submission['path'].tolist(),
    eval_model=ensemble_model,
    tta_transforms=tta_transforms,
    batch_size=CFG["batch_size"],
    temperature=CFG["temperature"],
    dynamic_unk=True
)

submission['style'] = preds
submission.to_csv("submission_TTA_EMA_SWA_UNK_Enhanced.csv", index=False)
print(f"🎯 Saved to submission_TTA_EMA_SWA_UNK_Enhanced.csv")


Downloading: "https://download.pytorch.org/models/efficientnet_b4_rwightman-23ab8bcd.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b4_rwightman-23ab8bcd.pth
100%|██████████| 74.5M/74.5M [00:00<00:00, 216MB/s]
  scaler = torch.cuda.amp.GradScaler(enabled=use_amp)



Epoch 1/5 | Freeze Backbone


  with torch.cuda.amp.autocast(enabled=use_amp):
                                                        

Train Loss 2.2070 | Acc 25.75% || Val Loss 2.0334 | Acc 60.46%

Epoch 2/5 | Freeze Backbone


                                                        

Train Loss 2.0083 | Acc 38.72% || Val Loss 1.8487 | Acc 68.49%

Epoch 3/5 | Freeze Backbone


                                                        

Train Loss 1.8745 | Acc 42.14% || Val Loss 1.6788 | Acc 72.05%

Epoch 4/5 | Freeze Backbone


                                                        

Train Loss 1.8059 | Acc 42.87% || Val Loss 1.5746 | Acc 73.59%

Epoch 5/5 | Freeze Backbone


                                                        

Train Loss 1.7329 | Acc 43.51% || Val Loss 1.4873 | Acc 75.14%

Epoch 1/25 | Unfreeze Backbone


                                                        

Train Loss 1.5407 | Acc 51.80% || Val Loss 1.0233 | Acc 83.47%

Epoch 2/25 | Unfreeze Backbone


                                                        

Train Loss 1.3964 | Acc 55.20% || Val Loss 0.9046 | Acc 87.18%

Epoch 3/25 | Unfreeze Backbone


                                                        

Train Loss 1.3311 | Acc 57.40% || Val Loss 0.8551 | Acc 89.50%

Epoch 4/25 | Unfreeze Backbone


                                                        

Train Loss 1.2975 | Acc 56.84% || Val Loss 0.8339 | Acc 89.65%

Epoch 5/25 | Unfreeze Backbone


                                                        

Train Loss 1.2014 | Acc 61.57% || Val Loss 0.7741 | Acc 90.35%

Epoch 6/25 | Unfreeze Backbone


                                                        

Train Loss 1.2089 | Acc 61.11% || Val Loss 0.7605 | Acc 91.20%

Epoch 7/25 | Unfreeze Backbone


                                                        

Train Loss 1.1509 | Acc 65.05% || Val Loss 0.7534 | Acc 91.58%

Epoch 8/25 | Unfreeze Backbone


                                                        

Train Loss 1.1490 | Acc 64.49% || Val Loss 0.7602 | Acc 91.97%

Epoch 9/25 | Unfreeze Backbone


                                                        

Train Loss 1.1690 | Acc 63.14% || Val Loss 0.7435 | Acc 92.74%

Epoch 10/25 | Unfreeze Backbone


                                                        

Train Loss 1.1496 | Acc 62.40% || Val Loss 0.7252 | Acc 92.36%

Epoch 11/25 | Unfreeze Backbone


                                                        

Train Loss 1.1070 | Acc 66.73% || Val Loss 0.7279 | Acc 92.43%

Epoch 12/25 | Unfreeze Backbone


                                                        

Train Loss 1.1020 | Acc 62.40% || Val Loss 0.7241 | Acc 93.20%

Epoch 13/25 | Unfreeze Backbone


                                                        

Train Loss 1.1075 | Acc 62.81% || Val Loss 0.7213 | Acc 92.90%

Epoch 14/25 | Unfreeze Backbone


                                                        

Train Loss 1.1518 | Acc 60.88% || Val Loss 0.7285 | Acc 93.05%

Epoch 15/25 | Unfreeze Backbone


                                                        

Train Loss 1.1506 | Acc 64.32% || Val Loss 0.7376 | Acc 92.90%
🔻 Reduce LR to 2.0000000000000003e-06

Epoch 16/25 | Unfreeze Backbone


                                                        

Train Loss 1.0887 | Acc 64.28% || Val Loss 0.7174 | Acc 92.97%

Epoch 17/25 | Unfreeze Backbone


                                                        

Train Loss 1.1146 | Acc 63.37% || Val Loss 0.7111 | Acc 92.97%

Epoch 18/25 | Unfreeze Backbone


                                                        

Train Loss 1.0874 | Acc 65.69% || Val Loss 0.7227 | Acc 93.20%
Early stopping.
✅ Best Val Acc: 93.20% | saved best_model.pth, last_model.pth & swa_model.pth


                                                    

🎯 Saved to submission_TTA_EMA_SWA_UNK_Enhanced.csv


