In [None]:
import os
import copy
import random
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms, models

data_dir = "/kaggle/input/AI-OF-GOD-4/aog_data/train"
num_classes = 9
num_epochs = 10
batch_size = 16
learning_rate = 1e-4
num_folds = 3
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

train_transform = transforms.Compose([
    transforms.Resize((384, 384)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.05),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

dataset = datasets.ImageFolder(root=data_dir, transform=train_transform)
class_names = dataset.classes
labels = [label for _, label in dataset.samples]

def create_model(model_name="swin_v2_s", num_classes=num_classes, pretrained=True):
    if model_name == "regnet_y_16gf":
        model = models.regnet_y_16gf(weights=models.RegNet_Y_16GF_Weights.IMAGENET1K_V2 if pretrained else None)
        in_features = model.fc.in_features
        model.fc = nn.Linear(in_features, num_classes)
    elif model_name == "swin_v2_s":
        model = models.swin_v2_s(weights=models.Swin_V2_S_Weights.IMAGENET1K_V1 if pretrained else None)
        in_features = model.head.in_features
        model.head = nn.Linear(in_features, num_classes)
    else:
        raise ValueError("Unknown model name. Use 'regnet_y_16gf' or 'swin_v2_s'.")
    return model.to(device)

def train_one_epoch(model, loader, criterion, optimizer):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for inputs, labels in tqdm(loader, leave=False):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    epoch_loss = running_loss / len(loader.dataset)
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

def validate(model, loader, criterion, class_names):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for inputs, labels in tqdm(loader, leave=False):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    val_loss = running_loss / len(loader.dataset)
    val_acc = correct / total
    cm = confusion_matrix(all_labels, all_preds, labels=list(range(len(class_names))))
    per_class_acc = cm.diagonal() / cm.sum(axis=1)
    per_class_acc_dict = {cls: acc for cls, acc in zip(class_names, per_class_acc)}
    return val_loss, val_acc, per_class_acc_dict

skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=seed)

for fold, (train_idx, val_idx) in enumerate(skf.split(np.zeros(len(labels)), labels)):
    print(f"\n========== Fold {fold + 1}/{num_folds} ==========")
    train_subset = Subset(dataset, train_idx)
    val_subset = Subset(dataset, val_idx)
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, num_workers=2)
    model = create_model(model_name="swin_v2_s")
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    best_val_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())
    for epoch in range(num_epochs):
        print(f"\nEpoch [{epoch+1}/{num_epochs}] — Fold {fold+1}")
        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer)
        val_loss, val_acc, per_class_acc = validate(model, val_loader, criterion, class_names)
        scheduler.step()
        print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
        print(f"Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.4f}")
        print("Per-class accuracies:")
        for cls, acc in per_class_acc.items():
            print(f"  {cls:25s} : {acc*100:.2f}%")
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            model_path = f"best_model_fold{fold+1}_epoch{epoch+1}.pth"
            torch.save(best_model_wts, model_path)
            print(f"✅ Model improved & saved: {model_path}")
    print(f"Best Val Accuracy for Fold {fold+1}: {best_val_acc:.4f}")

print("\n🎉 Training Complete for All Folds!")

In [7]:
import os
import copy
import random
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms, models

data_dir = "/kaggle/input/AI-OF-GOD-4/aog_data/train"
num_classes = 9
num_epochs = 10
batch_size = 16
learning_rate = 1e-4
num_folds = 3
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

train_transform = transforms.Compose([
    transforms.Resize((384, 384)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.05),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

dataset = datasets.ImageFolder(root=data_dir, transform=train_transform)
class_names = dataset.classes
labels = [label for _, label in dataset.samples]

def create_model(model_name="swin_v2_s", num_classes=num_classes, pretrained=True):
    if model_name == "regnet_y_16gf":
        model = models.regnet_y_16gf(weights=models.RegNet_Y_16GF_Weights.IMAGENET1K_V2 if pretrained else None)
        in_features = model.fc.in_features
        model.fc = nn.Linear(in_features, num_classes)
    elif model_name == "swin_v2_s":
        model = models.swin_v2_s(weights=models.Swin_V2_S_Weights.IMAGENET1K_V1 if pretrained else None)
        in_features = model.head.in_features
        model.head = nn.Linear(in_features, num_classes)
    else:
        raise ValueError("Unknown model name. Use 'regnet_y_16gf' or 'swin_v2_s'.")
    return model.to(device)

def train_one_epoch(model, loader, criterion, optimizer):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for inputs, labels in tqdm(loader, leave=False):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    epoch_loss = running_loss / len(loader.dataset)
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

def validate(model, loader, criterion, class_names):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for inputs, labels in tqdm(loader, leave=False):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    val_loss = running_loss / len(loader.dataset)
    val_acc = correct / total
    cm = confusion_matrix(all_labels, all_preds, labels=list(range(len(class_names))))
    per_class_acc = cm.diagonal() / cm.sum(axis=1)
    per_class_acc_dict = {cls: acc for cls, acc in zip(class_names, per_class_acc)}
    return val_loss, val_acc, per_class_acc_dict

skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=seed)
fold2_checkpoint = "/kaggle/input/swin_v2_fold2_epoch3/pytorch/default/1/best_model_fold2_epoch3.pth"

for fold, (train_idx, val_idx) in enumerate(skf.split(np.zeros(len(labels)), labels)):
    if fold == 0:
        print("\n========== Skipping Fold 1 ==========")
        continue

    print(f"\n========== Fold {fold + 1}/{num_folds} ==========")
    train_subset = Subset(dataset, train_idx)
    val_subset = Subset(dataset, val_idx)
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, num_workers=2)

    model = create_model(model_name="swin_v2_s")
    start_epoch = 0

    if fold == 1:
        state_dict = torch.load(fold2_checkpoint, map_location=device)
        model.load_state_dict(state_dict)
        start_epoch = 3
        print(f"✅ Loaded checkpoint for Fold 2, resuming from epoch {start_epoch}")

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        print(f"✅ Using {torch.cuda.device_count()} GPUs")

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)

    best_val_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())

    for epoch in range(start_epoch, num_epochs):
        print(f"\nEpoch [{epoch+1}/{num_epochs}] — Fold {fold+1}")
        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer)
        val_loss, val_acc, per_class_acc = validate(model, val_loader, criterion, class_names)
        scheduler.step()
        print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
        print(f"Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.4f}")
        print("Per-class accuracies:")
        for cls, acc in per_class_acc.items():
            print(f"  {cls:25s} : {acc*100:.2f}%")
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            model_path = f"best_model_fold{fold+1}_epoch{epoch+1}.pth"
            torch.save(best_model_wts, model_path)
            print(f"✅ Model improved & saved: {model_path}")

    print(f"Best Val Accuracy for Fold {fold+1}: {best_val_acc:.4f}")

print("\n🎉 Training Complete for All Folds!")



✅ Loaded checkpoint for Fold 2, resuming from epoch 3
✅ Using 2 GPUs

Epoch [4/10] — Fold 2


                                                 

Train Loss: 0.2095 | Train Acc: 0.9338
Val Loss:   0.2517 | Val Acc:   0.9182
Per-class accuracies:
  0                         : 88.21%
  1                         : 96.02%
  2                         : 92.47%
  3                         : 98.73%
  4                         : 84.58%
  5                         : 92.43%
  6                         : 99.57%
  7                         : 94.96%
  8                         : 74.27%
✅ Model improved & saved: best_model_fold2_epoch4.pth

Epoch [5/10] — Fold 2


                                                 

Train Loss: 0.1464 | Train Acc: 0.9513
Val Loss:   0.1857 | Val Acc:   0.9395
Per-class accuracies:
  0                         : 90.87%
  1                         : 93.23%
  2                         : 98.12%
  3                         : 97.89%
  4                         : 85.90%
  5                         : 90.04%
  6                         : 90.04%
  7                         : 87.98%
  8                         : 79.25%
✅ Model improved & saved: best_model_fold2_epoch5.pth

Epoch [6/10] — Fold 2


                                                 

Train Loss: 0.1320 | Train Acc: 0.9573
Val Loss:   0.1847 | Val Acc:   0.9405
Per-class accuracies:
  0                         : 82.89%
  1                         : 90.44%
  2                         : 96.71%
  3                         : 91.98%
  4                         : 91.19%
  5                         : 93.23%
  6                         : 94.81%
  7                         : 93.80%
  8                         : 91.70%
✅ Model improved & saved: best_model_fold2_epoch6.pth

Epoch [7/10] — Fold 2


                                                 

Train Loss: 0.0891 | Train Acc: 0.9706
Val Loss:   0.1618 | Val Acc:   0.9530
Per-class accuracies:
  0                         : 89.73%
  1                         : 97.21%
  2                         : 97.37%
  3                         : 90.72%
  4                         : 88.99%
  5                         : 98.80%
  6                         : 98.27%
  7                         : 93.80%
  8                         : 86.72%
✅ Model improved & saved: best_model_fold2_epoch7.pth

Epoch [8/10] — Fold 2


                                                 

Train Loss: 0.0694 | Train Acc: 0.9775
Val Loss:   0.1799 | Val Acc:   0.9550
Per-class accuracies:
  0                         : 91.63%
  1                         : 93.63%
  2                         : 97.93%
  3                         : 96.62%
  4                         : 92.07%
  5                         : 95.62%
  6                         : 98.27%
  7                         : 93.41%
  8                         : 81.74%
✅ Model improved & saved: best_model_fold2_epoch8.pth

Epoch [9/10] — Fold 2


                                                 

Train Loss: 0.0406 | Train Acc: 0.9878
Val Loss:   0.1410 | Val Acc:   0.9677
Per-class accuracies:
  0                         : 93.92%
  1                         : 97.61%
  2                         : 98.02%
  3                         : 97.47%
  4                         : 96.04%
  5                         : 96.81%
  6                         : 96.97%
  7                         : 97.67%
  8                         : 86.72%
✅ Model improved & saved: best_model_fold2_epoch9.pth

Epoch [10/10] — Fold 2


                                                 

Train Loss: 0.0286 | Train Acc: 0.9902
Val Loss:   0.1390 | Val Acc:   0.9667
Per-class accuracies:
  0                         : 84.41%
  1                         : 98.80%
  2                         : 98.35%
  3                         : 97.89%
  4                         : 95.15%
  5                         : 96.81%
  6                         : 97.84%
  7                         : 97.67%
  8                         : 90.87%
Best Val Accuracy for Fold 2: 0.9677

✅ Using 2 GPUs

Epoch [1/10] — Fold 3


                                                 

Train Loss: 0.7461 | Train Acc: 0.7520
Val Loss:   0.3745 | Val Acc:   0.8869
Per-class accuracies:
  0                         : 70.23%
  1                         : 82.14%
  2                         : 91.01%
  3                         : 94.54%
  4                         : 94.71%
  5                         : 88.84%
  6                         : 97.39%
  7                         : 99.23%
  8                         : 63.90%
✅ Model improved & saved: best_model_fold3_epoch1.pth

Epoch [2/10] — Fold 3


                                                 

Train Loss: 0.3046 | Train Acc: 0.9015
Val Loss:   0.2632 | Val Acc:   0.9094
Per-class accuracies:
  0                         : 54.58%
  1                         : 80.95%
  2                         : 95.53%
  3                         : 97.06%
  4                         : 93.83%
  5                         : 84.06%
  6                         : 95.22%
  7                         : 95.75%
  8                         : 89.63%
✅ Model improved & saved: best_model_fold3_epoch2.pth

Epoch [3/10] — Fold 3


                                                 

Train Loss: 0.2162 | Train Acc: 0.9309
Val Loss:   0.1966 | Val Acc:   0.9390
Per-class accuracies:
  0                         : 71.76%
  1                         : 92.86%
  2                         : 95.67%
  3                         : 95.38%
  4                         : 97.36%
  5                         : 96.81%
  6                         : 95.65%
  7                         : 98.84%
  8                         : 88.80%
✅ Model improved & saved: best_model_fold3_epoch3.pth

Epoch [4/10] — Fold 3


                                                 

Train Loss: 0.1449 | Train Acc: 0.9541
Val Loss:   0.2437 | Val Acc:   0.9266
Per-class accuracies:
  0                         : 87.79%
  1                         : 78.17%
  2                         : 94.92%
  3                         : 98.74%
  4                         : 96.48%
  5                         : 97.21%
  6                         : 96.52%
  7                         : 77.61%
  8                         : 91.29%

Epoch [5/10] — Fold 3


                                                 

Train Loss: 0.1058 | Train Acc: 0.9645
Val Loss:   0.1716 | Val Acc:   0.9461
Per-class accuracies:
  0                         : 84.35%
  1                         : 88.89%
  2                         : 98.16%
  3                         : 97.90%
  4                         : 87.22%
  5                         : 90.84%
  6                         : 95.22%
  7                         : 94.59%
  8                         : 87.55%
✅ Model improved & saved: best_model_fold3_epoch5.pth

Epoch [6/10] — Fold 3


                                                 

Train Loss: 0.0641 | Train Acc: 0.9804
Val Loss:   0.1690 | Val Acc:   0.9542
Per-class accuracies:
  0                         : 91.98%
  1                         : 87.30%
  2                         : 98.45%
  3                         : 94.54%
  4                         : 94.27%
  5                         : 94.82%
  6                         : 94.35%
  7                         : 92.66%
  8                         : 87.55%
✅ Model improved & saved: best_model_fold3_epoch6.pth

Epoch [7/10] — Fold 3


                                                 

Train Loss: 0.0401 | Train Acc: 0.9879
Val Loss:   0.1414 | Val Acc:   0.9633
Per-class accuracies:
  0                         : 92.75%
  1                         : 96.43%
  2                         : 96.52%
  3                         : 97.90%
  4                         : 98.68%
  5                         : 95.62%
  6                         : 98.70%
  7                         : 96.91%
  8                         : 92.53%
✅ Model improved & saved: best_model_fold3_epoch7.pth

Epoch [8/10] — Fold 3


                                                 

Train Loss: 0.0234 | Train Acc: 0.9923
Val Loss:   0.1326 | Val Acc:   0.9682
Per-class accuracies:
  0                         : 94.27%
  1                         : 93.65%
  2                         : 98.02%
  3                         : 98.32%
  4                         : 98.24%
  5                         : 96.81%
  6                         : 96.96%
  7                         : 97.30%
  8                         : 88.80%
✅ Model improved & saved: best_model_fold3_epoch8.pth

Epoch [9/10] — Fold 3


                                                 

Train Loss: 0.0173 | Train Acc: 0.9941
Val Loss:   0.1231 | Val Acc:   0.9721
Per-class accuracies:
  0                         : 92.75%
  1                         : 96.03%
  2                         : 98.16%
  3                         : 99.16%
  4                         : 97.80%
  5                         : 97.61%
  6                         : 96.96%
  7                         : 98.07%
  8                         : 91.29%
✅ Model improved & saved: best_model_fold3_epoch9.pth

Epoch [10/10] — Fold 3


                                                 

Train Loss: 0.0127 | Train Acc: 0.9967
Val Loss:   0.1126 | Val Acc:   0.9745
Per-class accuracies:
  0                         : 91.98%
  1                         : 96.03%
  2                         : 98.45%
  3                         : 99.58%
  4                         : 98.68%
  5                         : 97.61%
  6                         : 97.83%
  7                         : 98.46%
  8                         : 91.29%
✅ Model improved & saved: best_model_fold3_epoch10.pth
Best Val Accuracy for Fold 3: 0.9745

🎉 Training Complete for All Folds!


In [32]:
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import numpy as np
from tqdm import tqdm
import pandas as pd

test_dir = "/kaggle/input/AI-OF-GOD-4/aog_data/test/images"
num_classes = 9
batch_size = 16
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_paths = [
    "/kaggle/input/swin-v2_s-fold-1/pytorch/default/1/best_model_fold1_epoch10.pth",
    "/kaggle/working/best_model_fold2_epoch9.pth",
    "/kaggle/working/best_model_fold3_epoch9.pth",
]

test_transform = transforms.Compose([
    transforms.Resize((384, 384)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

test_dataset = datasets.ImageFolder(root=os.path.dirname(test_dir), transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

def create_swin_v2_s(num_classes, pretrained=False):
    model = models.swin_v2_s(weights=models.Swin_V2_S_Weights.IMAGENET1K_V1 if pretrained else None)
    in_features = model.head.in_features
    model.head = nn.Linear(in_features, num_classes)
    return model

models_list = []
for i, path in enumerate(model_paths):
    print(f"\n🔹 Loading model {i+1} from {path}")
    model = create_swin_v2_s(num_classes=num_classes)
    state_dict = torch.load(path, map_location=device)
    if any(k.startswith("module.") for k in state_dict.keys()):
        state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
    model.load_state_dict(state_dict, strict=True)
    model.to(device)
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        print(f"✅ Using {torch.cuda.device_count()} GPUs")
    model.eval()
    models_list.append(model)

all_outputs = []
all_preds = []

print("\n🚀 Running Inference...")
with torch.no_grad():
    for inputs, _ in tqdm(test_loader):
        inputs = inputs.to(device)
        outputs_per_model = []
        for model in models_list:
            outputs = model(inputs)
            outputs_per_model.append(torch.softmax(outputs, dim=1))
        stacked_outputs = torch.stack(outputs_per_model)
        avg_outputs = torch.mean(stacked_outputs, dim=0)
        all_outputs.append(avg_outputs.cpu().numpy())
        preds_each_model = [torch.argmax(o, dim=1).cpu().numpy() for o in outputs_per_model]
        all_preds.append(preds_each_model)

avg_outputs = np.concatenate(all_outputs, axis=0)
final_preds = np.argmax(avg_outputs, axis=1)
model_preds = [np.concatenate([batch[i] for batch in all_preds]) for i in range(len(model_paths))]

os.makedirs("/kaggle/working/predictions", exist_ok=True)
filenames = [os.path.basename(p[0]) for p in test_dataset.samples]

for i, preds in enumerate(model_preds):
    df = pd.DataFrame({"filename": filenames, "pred_class": preds})
    df.to_csv(f"/kaggle/working/predictions/model{i+1}_preds.csv", index=False)
    print(f"✅ Saved: model{i+1}_preds.csv")

ensemble_df = pd.DataFrame({"filename": filenames, "ensemble_pred_class": final_preds})
ensemble_df.to_csv("/kaggle/working/predictions/ensemble_preds.csv", index=False)
print("✅ Saved: ensemble_preds.csv")

print("\n🎉 Inference Complete! All results are in /kaggle/working/predictions")


🔹 Loading model 1 from /kaggle/input/swin-v2_s-fold-1/pytorch/default/1/best_model_fold1_epoch10.pth
✅ Using 2 GPUs

🔹 Loading model 2 from /kaggle/working/best_model_fold2_epoch9.pth
✅ Using 2 GPUs

🔹 Loading model 3 from /kaggle/working/best_model_fold3_epoch9.pth
✅ Using 2 GPUs

🚀 Running Inference...


100%|██████████| 329/329 [04:25<00:00,  1.24it/s]

✅ Saved: model1_preds.csv
✅ Saved: model2_preds.csv
✅ Saved: model3_preds.csv
✅ Saved: ensemble_preds.csv

🎉 Inference Complete! All results are in /kaggle/working/predictions



