In [None]:
import os
import copy
import random
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms, models
from torch.cuda.amp import GradScaler, autocast

data_dir = "/kaggle/input/AI-OF-GOD-4/aog_data/train"
num_classes = 9
num_epochs = 10
batch_size = 8
accum_steps = 2
learning_rate = 1e-4
num_folds = 3
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
seed = 42

torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)

train_transform = transforms.Compose([
    transforms.Resize((384, 384)),
    transforms.RandomResizedCrop(384, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.05),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((384, 384)),
    transforms.CenterCrop(384),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

full_dataset = datasets.ImageFolder(root=data_dir)
class_names = full_dataset.classes
labels = [label for _, label in full_dataset.samples]
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=seed)

def create_model(num_classes=num_classes):
    model = models.efficientnet_v2_l(weights=models.EfficientNet_V2_L_Weights.IMAGENET1K_V1)
    in_features = model.classifier[1].in_features
    model.classifier[1] = nn.Linear(in_features, num_classes)
    return model.to(device)

def train_one_epoch(model, loader, criterion, optimizer, scaler):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    optimizer.zero_grad()
    for step, (inputs, labels) in enumerate(tqdm(loader, leave=False)):
        inputs, labels = inputs.to(device), labels.to(device)
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels) / accum_steps
        scaler.scale(loss).backward()
        if (step + 1) % accum_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
        running_loss += loss.item() * inputs.size(0) * accum_steps
        preds = outputs.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

def validate(model, loader, criterion):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for inputs, labels in tqdm(loader, leave=False):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            preds = outputs.argmax(1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    val_loss = running_loss / total
    val_acc = correct / total
    cm = confusion_matrix(all_labels, all_preds, labels=list(range(len(class_names))))
    per_class_acc = cm.diagonal() / cm.sum(axis=1)
    per_class_acc_dict = {cls: acc for cls, acc in zip(class_names, per_class_acc)}
    return val_loss, val_acc, per_class_acc_dict

for fold, (train_idx, val_idx) in enumerate(skf.split(np.zeros(len(labels)), labels)):
    print(f"\n========== Fold {fold + 1}/{num_folds} ==========")
    train_subset = Subset(full_dataset, train_idx)
    val_subset = Subset(full_dataset, val_idx)
    train_subset.dataset.transform = train_transform
    val_subset.dataset.transform = val_transform
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, num_workers=2)
    model = create_model()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    scaler = GradScaler()
    best_val_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())
    for epoch in range(num_epochs):
        print(f"\nEpoch [{epoch+1}/{num_epochs}] — Fold {fold+1}")
        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, scaler)
        val_loss, val_acc, per_class_acc = validate(model, val_loader, criterion)
        scheduler.step()
        print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
        print(f"Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.4f}")
        print("Per-class accuracies:")
        for cls, acc in per_class_acc.items():
            print(f"  {cls:25s} : {acc*100:.2f}%")
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            model_path = f"efficientnetv2l_best_fold{fold+1}.pth"
            torch.save(best_model_wts, model_path)
            print(f"✅ Model improved & saved: {model_path}")
    print(f"Best Val Accuracy for Fold {fold+1}: {best_val_acc:.4f}")

print("\n🎉 Training Complete for All Folds!")




  scaler = GradScaler()



Epoch [1/10] — Fold 1


  with autocast():
                                                   

Train Loss: 0.6148 | Train Acc: 0.8023
Val Loss:   0.2356 | Val Acc:   0.9305
Per-class accuracies:
  0                         : 89.73%
  1                         : 94.05%
  2                         : 92.00%
  3                         : 98.32%
  4                         : 97.79%
  5                         : 88.45%
  6                         : 96.97%
  7                         : 94.19%
  8                         : 95.04%
✅ Model improved & saved: efficientnetv2l_best_fold1.pth

Epoch [2/10] — Fold 1


  with autocast():
                                                   

Train Loss: 0.2314 | Train Acc: 0.9251
Val Loss:   0.1835 | Val Acc:   0.9403
Per-class accuracies:
  0                         : 93.92%
  1                         : 85.71%
  2                         : 93.13%
  3                         : 99.16%
  4                         : 98.23%
  5                         : 90.84%
  6                         : 99.57%
  7                         : 99.61%
  8                         : 93.80%
✅ Model improved & saved: efficientnetv2l_best_fold1.pth

Epoch [3/10] — Fold 1


  with autocast():
 24%|██▍       | 122/511 [00:31<01:38,  3.93it/s]  