In [1]:
# ----------------------------
# Imports
# ----------------------------
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Subset
from torchvision import transforms, datasets
from torchvision.models import convnext_tiny, ConvNeXt_Tiny_Weights
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, confusion_matrix, accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.isotonic import IsotonicRegression
from scipy.special import softmax
import numpy as np
import pandas as pd
import os

# ----------------------------
# Device
# ----------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ----------------------------
# Paths & Hyperparameters
# ----------------------------
data_dir = "/kaggle/input/AI-OF-GOD-4/aog_data/train"
test_data_dir = "/kaggle/input/AI-OF-GOD-4/aog_data/test"
num_classes = 9
batch_size = 32
lr = 1e-4
epochs = 5
k_folds = 5
test_split_ratio = 0.05

# ----------------------------
# Transforms - use official ImageNet stats!
# ----------------------------

mean = [0.3789, 0.3898, 0.3800]
std = [0.2241, 0.2245, 0.2227]

train_tfms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

val_tfms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

# ----------------------------
# Load Datasets
# ----------------------------
full_dataset = datasets.ImageFolder(root=data_dir, transform=train_tfms)
test_dataset = datasets.ImageFolder(root=test_data_dir, transform=val_tfms)
num_samples = len(full_dataset)

# ----------------------------
# Initial Split for Hold-out Test
# ----------------------------
test_size = int(test_split_ratio * num_samples)
trainval_size = num_samples - test_size

indices = np.arange(num_samples)
np.random.shuffle(indices)
trainval_indices = indices[:trainval_size]
holdout_test_indices = indices[trainval_size:]

trainval_dataset = Subset(full_dataset, trainval_indices)
holdout_test_dataset = Subset(full_dataset, holdout_test_indices)
holdout_test_dataset.dataset.transform = val_tfms

# For stratified k-fold: get labels for trainval split
trainval_labels = [full_dataset.targets[i] for i in trainval_indices]

# ----------------------------
# Stratified K-Fold CV
# ----------------------------
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)
fold_best_metrics = []
fold_best_models = []
for fold, (fold_train_idx, fold_val_idx) in enumerate(skf.split(trainval_indices, trainval_labels)):
    print(f"\n========== Fold {fold+1}/{k_folds} ==========")
    train_indices = [trainval_indices[i] for i in fold_train_idx]
    val_indices = [trainval_indices[i] for i in fold_val_idx]
    train_subset = Subset(full_dataset, train_indices)
    val_subset = Subset(full_dataset, val_indices)
    val_subset.dataset.transform = val_tfms

    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, num_workers=2)

    model = convnext_tiny(weights=ConvNeXt_Tiny_Weights.IMAGENET1K_V1)
    for param in model.parameters():
        param.requires_grad = True   # <-- FULL FINE-TUNING
    in_features = model.classifier[2].in_features
    model.classifier[2] = nn.Linear(in_features, num_classes)
    model = model.to(device)


    fold_train_labels = [full_dataset.targets[i] for i in train_indices]
    class_counts = np.bincount(fold_train_labels, minlength=num_classes)
    class_weights = 1.0 / (np.sqrt(np.maximum(class_counts, 1)))
    weights_tensor = torch.FloatTensor(class_weights).to(device)

    criterion = nn.CrossEntropyLoss(weight=weights_tensor)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    best_metric = -1
    best_model_state = None
    best_filename = None

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for images, labels in tqdm(train_loader, desc=f"Fold {fold+1} Epoch {epoch+1} [Train]"):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch [{epoch+1}] Loss: {running_loss/len(train_loader):.4f}")

        model.eval()
        all_labels, all_preds = [], []
        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc=f"Fold {fold+1} Epoch {epoch+1} [Val]"):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                all_labels.extend(labels.cpu().numpy())
                all_preds.extend(predicted.cpu().numpy())
        all_labels = np.array(all_labels)
        all_preds = np.array(all_preds)

        cm = confusion_matrix(all_labels, all_preds, labels=range(num_classes))
        per_class_acc = cm.diagonal() / cm.sum(axis=1)
        per_class_prec = precision_score(all_labels, all_preds, labels=range(num_classes), average=None, zero_division=0)
        per_class_rec = recall_score(all_labels, all_preds, labels=range(num_classes), average=None, zero_division=0)
        overall_acc = np.mean(all_preds == all_labels) * 100
        overall_prec = precision_score(all_labels, all_preds, average='macro', zero_division=0)
        overall_rec = recall_score(all_labels, all_preds, average='macro', zero_division=0)

        print(f"\n{'='*15} Fold {fold+1} Epoch {epoch+1} Validation Metrics {'='*15}")
        print(f"{'Class':<8}{'Accuracy':>10}{'Precision':>12}{'Recall':>10}")
        print("-" * 40)
        for i in range(num_classes):
            acc = per_class_acc[i] if not np.isnan(per_class_acc[i]) else 0.0
            prec = per_class_prec[i] if not np.isnan(per_class_prec[i]) else 0.0
            rec = per_class_rec[i] if not np.isnan(per_class_rec[i]) else 0.0
            print(f"{i:<8}{acc:>10.4f}{prec:>12.4f}{rec:>10.4f}")
        print("-" * 40)
        print(f"{'Overall':<8}{overall_acc:>10.2f}{overall_prec*100:>12.2f}{overall_rec*100:>10.2f}")
        print(f"{'='*55}\n")

        composite_metric = overall_acc + overall_prec * 100 + overall_rec * 100
        if composite_metric > best_metric:
            best_metric = composite_metric
            best_model_state = model.state_dict()
            best_filename = f"bestmodel_fold{fold+1}_epoch{epoch+1}_acc{overall_acc:.2f}_prec{overall_prec:.4f}_rec{overall_rec:.4f}.pt"
            torch.save(best_model_state, best_filename)
            print(f"Best model for fold {fold+1} updated and saved as {best_filename}")

    fold_best_metrics.append(best_metric)
    fold_best_models.append(best_filename)

# ----------------------------
# Retrain Best Fold Model On Full Trainval
# ----------------------------
best_fold_idx = np.argmax(fold_best_metrics)
best_model_path = fold_best_models[best_fold_idx]
print(f"\nUsing best model from fold {best_fold_idx+1}: {best_model_path}")

model = convnext_tiny(weights=ConvNeXt_Tiny_Weights.IMAGENET1K_V1)
for param in model.parameters():
    param.requires_grad = True   # <-- FULL FINE-TUNING
in_features = model.classifier[2].in_features
model.classifier[2] = nn.Linear(in_features, num_classes)
model = model.to(device)
model.load_state_dict(torch.load(best_model_path))

train_labels = [full_dataset.targets[i] for i in trainval_indices]
class_counts = np.bincount(train_labels, minlength=num_classes)
class_weights = 1. / (np.sqrt(np.maximum(class_counts, 1)))
weights_tensor = torch.FloatTensor(class_weights).to(device)
criterion = nn.CrossEntropyLoss(weight=weights_tensor)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

full_train_loader = DataLoader(trainval_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for images, labels in tqdm(full_train_loader, desc=f"Retrain Epoch {epoch+1}"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch [{epoch+1}] Loss: {running_loss/len(full_train_loader):.4f}")

# ----------------------------
# CALIBRATION AND TEST EVALUATION
# ----------------------------
# --- Collect validation logits/labels for calibration
val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, num_workers=2)
val_logits, val_labels = [], []
model.eval()
with torch.no_grad():
    for images, labels in tqdm(val_loader, desc="Collecting validation logits"):
        images = images.to(device)
        outputs = model(images)
        val_logits.append(outputs.cpu().numpy())
        val_labels.append(labels.cpu().numpy())
val_logits = np.concatenate(val_logits)
val_labels = np.concatenate(val_labels)
val_probs = softmax(val_logits, axis=1)

# --- Fit isotonic regression per class
calibrators = []
for i in range(num_classes):
    y_bin = (val_labels == i).astype(int)
    calibrator = IsotonicRegression(out_of_bounds="clip")
    calibrator.fit(val_probs[:, i], y_bin)
    calibrators.append(calibrator)

# --- Gather logits for hold-out test set
holdout_test_loader = DataLoader(holdout_test_dataset, batch_size=1, shuffle=False, num_workers=2)
test_logits, test_labels = [], []
with torch.no_grad():
    for images, labels in tqdm(holdout_test_loader, desc="Holdout test logits"):
        images = images.to(device)
        outputs = model(images)
        test_logits.append(outputs.cpu().numpy())
        test_labels.append(labels.cpu().numpy())
test_logits = np.concatenate(test_logits)
test_labels = np.concatenate(test_labels)
test_probs = softmax(test_logits, axis=1)

# --- Calibrate hold-out test probabilities
cal_test_probs = np.zeros_like(test_probs)
for i in range(num_classes):
    cal_test_probs[:, i] = calibrators[i].transform(test_probs[:, i])
cal_test_preds = np.argmax(cal_test_probs, axis=1)

# --- Print calibrated test metrics
cm = confusion_matrix(test_labels, cal_test_preds, labels=range(num_classes))
per_class_acc = cm.diagonal() / cm.sum(axis=1)
per_class_prec = precision_score(test_labels, cal_test_preds, labels=range(num_classes), average=None, zero_division=0)
per_class_rec = recall_score(test_labels, cal_test_preds, labels=range(num_classes), average=None, zero_division=0)
overall_acc = accuracy_score(test_labels, cal_test_preds) * 100
overall_prec = precision_score(test_labels, cal_test_preds, average='macro', zero_division=0)
overall_rec = recall_score(test_labels, cal_test_preds, average='macro', zero_division=0)

print(f"\n{'='*15} Hold-out Test Metrics (Calibrated) {'='*15}")
print(f"{'Class':<8}{'Accuracy':>10}{'Precision':>12}{'Recall':>10}")
print("-" * 40)
for i in range(num_classes):
    acc = per_class_acc[i] if not np.isnan(per_class_acc[i]) else 0.0
    prec = per_class_prec[i] if not np.isnan(per_class_prec[i]) else 0.0
    rec = per_class_rec[i] if not np.isnan(per_class_rec[i]) else 0.0
    print(f"{i:<8}{acc:>10.4f}{prec:>12.4f}{rec:>10.4f}")
print("-" * 40)
print(f"{'Overall':<8}{overall_acc:>10.2f}{overall_prec*100:>12.2f}{overall_rec*100:>10.2f}")
print(f"{'='*55}\n")


Using device: cuda



Downloading: "https://download.pytorch.org/models/convnext_tiny-983f1562.pth" to /root/.cache/torch/hub/checkpoints/convnext_tiny-983f1562.pth
100%|██████████| 109M/109M [00:01<00:00, 78.6MB/s] 
Fold 1 Epoch 1 [Train]: 100%|██████████| 292/292 [02:40<00:00,  1.82it/s]


Epoch [1] Loss: 0.6371


Fold 1 Epoch 1 [Val]: 100%|██████████| 73/73 [00:11<00:00,  6.16it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.6887      0.9369    0.6887
1           0.8741      0.9843    0.8741
2           0.9373      0.9660    0.9373
3           1.0000      0.9172    1.0000
4           0.9924      0.9091    0.9924
5           0.9790      0.8917    0.9790
6           0.9699      0.9214    0.9699
7           0.9728      0.8034    0.9728
8           0.9044      0.8092    0.9044
----------------------------------------
Overall      92.87       90.44     92.43

Best model for fold 1 updated and saved as bestmodel_fold1_epoch1_acc92.87_prec0.9044_rec0.9243.pt


Fold 1 Epoch 2 [Train]: 100%|██████████| 292/292 [02:52<00:00,  1.69it/s]


Epoch [2] Loss: 0.1614


Fold 1 Epoch 2 [Val]: 100%|██████████| 73/73 [00:11<00:00,  6.10it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.8278      0.9328    0.8278
1           0.9720      0.8742    0.9720
2           0.9530      0.9593    0.9530
3           0.9323      0.9841    0.9323
4           1.0000      0.8733    1.0000
5           0.9580      0.9013    0.9580
6           0.9474      0.9921    0.9474
7           0.8844      0.8966    0.8844
8           0.8971      0.9242    0.8971
----------------------------------------
Overall      93.99       92.64     93.02

Best model for fold 1 updated and saved as bestmodel_fold1_epoch2_acc93.99_prec0.9264_rec0.9302.pt


Fold 1 Epoch 3 [Train]: 100%|██████████| 292/292 [02:54<00:00,  1.68it/s]


Epoch [3] Loss: 0.0545


Fold 1 Epoch 3 [Val]: 100%|██████████| 73/73 [00:11<00:00,  6.09it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.7483      0.9741    0.7483
1           0.9720      0.9205    0.9720
2           0.9323      0.9809    0.9323
3           0.9850      0.9850    0.9850
4           1.0000      0.7486    1.0000
5           0.9510      0.9714    0.9510
6           0.9774      0.9848    0.9774
7           0.9592      0.9338    0.9592
8           0.9412      0.7151    0.9412
----------------------------------------
Overall      93.56       91.27     94.07



Fold 1 Epoch 4 [Train]: 100%|██████████| 292/292 [02:51<00:00,  1.70it/s]


Epoch [4] Loss: 0.0412


Fold 1 Epoch 4 [Val]: 100%|██████████| 73/73 [00:11<00:00,  6.09it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.8146      0.9462    0.8146
1           0.9510      0.9067    0.9510
2           0.9538      0.9706    0.9538
3           1.0000      0.9433    1.0000
4           1.0000      0.9357    1.0000
5           0.9301      0.9110    0.9301
6           0.9850      0.9493    0.9850
7           0.9252      0.9510    0.9252
8           0.9191      0.8333    0.9191
----------------------------------------
Overall      94.63       92.74     94.21

Best model for fold 1 updated and saved as bestmodel_fold1_epoch4_acc94.63_prec0.9274_rec0.9421.pt


Fold 1 Epoch 5 [Train]: 100%|██████████| 292/292 [02:54<00:00,  1.67it/s]


Epoch [5] Loss: 0.0332


Fold 1 Epoch 5 [Val]: 100%|██████████| 73/73 [00:11<00:00,  6.09it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.6887      0.9811    0.6887
1           0.9371      0.9178    0.9371
2           0.9612      0.9620    0.9612
3           0.9624      0.9922    0.9624
4           0.9466      0.9538    0.9466
5           0.9510      0.9252    0.9510
6           0.9925      0.9103    0.9925
7           0.9796      0.8675    0.9796
8           0.8676      0.7919    0.8676
----------------------------------------
Overall      93.82       92.24     92.08




Fold 2 Epoch 1 [Train]: 100%|██████████| 292/292 [02:54<00:00,  1.68it/s]


Epoch [1] Loss: 0.6423


Fold 2 Epoch 1 [Val]: 100%|██████████| 73/73 [00:11<00:00,  6.08it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.8808      0.7644    0.8808
1           0.8951      0.9481    0.8951
2           0.9349      0.9742    0.9349
3           0.8872      0.9916    0.8872
4           0.9154      0.9520    0.9154
5           0.9650      0.7931    0.9650
6           0.9774      0.9220    0.9774
7           0.9796      0.8623    0.9796
8           0.8456      0.8846    0.8456
----------------------------------------
Overall      92.70       89.91     92.01

Best model for fold 2 updated and saved as bestmodel_fold2_epoch1_acc92.70_prec0.8991_rec0.9201.pt


Fold 2 Epoch 2 [Train]: 100%|██████████| 292/292 [02:51<00:00,  1.70it/s]


Epoch [2] Loss: 0.1655


Fold 2 Epoch 2 [Val]: 100%|██████████| 73/73 [00:11<00:00,  6.09it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.9470      0.7688    0.9470
1           0.8881      0.9922    0.8881
2           0.9365      0.9776    0.9365
3           0.9474      0.9767    0.9474
4           0.9538      0.9764    0.9538
5           0.9161      0.9161    0.9161
6           0.9925      0.8250    0.9925
7           0.9932      0.9241    0.9932
8           0.8897      0.8897    0.8897
----------------------------------------
Overall      93.86       91.63     94.05

Best model for fold 2 updated and saved as bestmodel_fold2_epoch2_acc93.86_prec0.9163_rec0.9405.pt


Fold 2 Epoch 3 [Train]: 100%|██████████| 292/292 [02:53<00:00,  1.68it/s]


Epoch [3] Loss: 0.0599


Fold 2 Epoch 3 [Val]: 100%|██████████| 73/73 [00:12<00:00,  6.07it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.8146      0.9179    0.8146
1           0.8951      0.9624    0.8951
2           0.9827      0.9342    0.9827
3           0.9774      0.9559    0.9774
4           0.9538      0.9688    0.9538
5           0.9371      0.9504    0.9371
6           0.9474      0.9844    0.9474
7           0.9660      0.9281    0.9660
8           0.7279      0.9900    0.7279
----------------------------------------
Overall      94.38       95.47     91.13

Best model for fold 2 updated and saved as bestmodel_fold2_epoch3_acc94.38_prec0.9547_rec0.9113.pt


Fold 2 Epoch 4 [Train]: 100%|██████████| 292/292 [02:52<00:00,  1.69it/s]


Epoch [4] Loss: 0.0443


Fold 2 Epoch 4 [Val]: 100%|██████████| 73/73 [00:12<00:00,  6.07it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.9205      0.8528    0.9205
1           0.9091      0.9701    0.9091
2           0.9258      0.9851    0.9258
3           0.9925      0.9429    0.9925
4           0.9923      0.7371    0.9923
5           0.9441      0.9122    0.9441
6           0.9624      0.9552    0.9624
7           0.9932      0.9125    0.9932
8           0.8750      0.8815    0.8750
----------------------------------------
Overall      93.65       90.55     94.61



Fold 2 Epoch 5 [Train]: 100%|██████████| 292/292 [02:54<00:00,  1.67it/s]


Epoch [5] Loss: 0.0376


Fold 2 Epoch 5 [Val]: 100%|██████████| 73/73 [00:11<00:00,  6.09it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.8212      0.9254    0.8212
1           0.9301      0.9500    0.9301
2           0.9819      0.9430    0.9819
3           0.9850      0.9776    0.9850
4           0.9154      0.9754    0.9154
5           0.9510      0.9128    0.9510
6           0.9474      1.0000    0.9474
7           0.9456      0.9456    0.9456
8           0.8015      0.9561    0.8015
----------------------------------------
Overall      94.80       95.40     91.99

Best model for fold 2 updated and saved as bestmodel_fold2_epoch5_acc94.80_prec0.9540_rec0.9199.pt



Fold 3 Epoch 1 [Train]: 100%|██████████| 292/292 [02:53<00:00,  1.68it/s]


Epoch [1] Loss: 0.6400


Fold 3 Epoch 1 [Val]: 100%|██████████| 73/73 [00:12<00:00,  6.07it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.7533      0.8760    0.7533
1           0.9792      0.7382    0.9792
2           0.8038      0.9909    0.8038
3           1.0000      0.7037    1.0000
4           0.9846      0.6845    0.9846
5           0.8741      0.8446    0.8741
6           0.9697      0.8366    0.9697
7           0.9932      0.7737    0.9932
8           0.8971      0.7722    0.8971
----------------------------------------
Overall      86.39       80.23     91.72

Best model for fold 3 updated and saved as bestmodel_fold3_epoch1_acc86.39_prec0.8023_rec0.9172.pt


Fold 3 Epoch 2 [Train]: 100%|██████████| 292/292 [02:52<00:00,  1.70it/s]


Epoch [2] Loss: 0.1660


Fold 3 Epoch 2 [Val]: 100%|██████████| 73/73 [00:11<00:00,  6.09it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.9067      0.8662    0.9067
1           0.8333      0.9917    0.8333
2           0.9736      0.9395    0.9736
3           0.9549      0.9769    0.9549
4           0.9462      0.9609    0.9462
5           0.9441      0.9000    0.9441
6           0.9848      0.9220    0.9848
7           0.8581      0.9845    0.8581
8           0.8235      0.9655    0.8235
----------------------------------------
Overall      94.07       94.53     91.39

Best model for fold 3 updated and saved as bestmodel_fold3_epoch2_acc94.07_prec0.9453_rec0.9139.pt


Fold 3 Epoch 3 [Train]: 100%|██████████| 292/292 [02:53<00:00,  1.68it/s]


Epoch [3] Loss: 0.0582


Fold 3 Epoch 3 [Val]: 100%|██████████| 73/73 [00:12<00:00,  6.08it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.7867      0.9516    0.7867
1           0.9861      0.7396    0.9861
2           0.9489      0.9812    0.9489
3           0.9925      0.9778    0.9925
4           0.9692      0.9692    0.9692
5           0.9371      0.9371    0.9371
6           0.9848      0.9559    0.9848
7           0.9730      0.9231    0.9730
8           0.8676      0.8429    0.8676
----------------------------------------
Overall      94.25       91.98     93.84

Best model for fold 3 updated and saved as bestmodel_fold3_epoch3_acc94.25_prec0.9198_rec0.9384.pt


Fold 3 Epoch 4 [Train]: 100%|██████████| 292/292 [02:51<00:00,  1.70it/s]


Epoch [4] Loss: 0.0332


Fold 3 Epoch 4 [Val]: 100%|██████████| 73/73 [00:11<00:00,  6.09it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.6200      0.9894    0.6200
1           0.8889      0.8889    0.8889
2           0.9621      0.9358    0.9621
3           0.9925      0.9565    0.9925
4           0.9615      0.8013    0.9615
5           0.8531      0.9173    0.8531
6           0.9394      0.9538    0.9394
7           0.9730      0.8471    0.9730
8           0.7647      0.8889    0.7647
----------------------------------------
Overall      91.84       90.88     88.39



Fold 3 Epoch 5 [Train]: 100%|██████████| 292/292 [02:54<00:00,  1.68it/s]


Epoch [5] Loss: 0.0356


Fold 3 Epoch 5 [Val]: 100%|██████████| 73/73 [00:11<00:00,  6.10it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.8533      0.9078    0.8533
1           0.9722      0.9032    0.9722
2           0.9555      0.9772    0.9555
3           0.9850      0.9704    0.9850
4           0.9462      0.9840    0.9462
5           0.9510      0.9067    0.9510
6           0.9848      0.9701    0.9848
7           0.9865      0.9241    0.9865
8           0.9118      0.8552    0.9118
----------------------------------------
Overall      95.19       93.32     94.96

Best model for fold 3 updated and saved as bestmodel_fold3_epoch5_acc95.19_prec0.9332_rec0.9496.pt



Fold 4 Epoch 1 [Train]: 100%|██████████| 292/292 [02:53<00:00,  1.68it/s]


Epoch [1] Loss: 0.6604


Fold 4 Epoch 1 [Val]: 100%|██████████| 73/73 [00:11<00:00,  6.09it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.8267      0.8052    0.8267
1           0.9861      0.6368    0.9861
2           0.8384      0.9760    0.8384
3           0.9925      0.7097    0.9925
4           0.9846      0.7033    0.9846
5           0.6875      0.9519    0.6875
6           0.9318      0.9389    0.9318
7           0.9932      0.7766    0.9932
8           0.7794      0.8908    0.7794
----------------------------------------
Overall      86.60       82.10     89.11

Best model for fold 4 updated and saved as bestmodel_fold4_epoch1_acc86.60_prec0.8210_rec0.8911.pt


Fold 4 Epoch 2 [Train]: 100%|██████████| 292/292 [02:53<00:00,  1.68it/s]


Epoch [2] Loss: 0.1596


Fold 4 Epoch 2 [Val]: 100%|██████████| 73/73 [00:11<00:00,  6.08it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.7000      0.9459    0.7000
1           0.9583      0.9200    0.9583
2           0.9744      0.9494    0.9744
3           0.9925      0.9565    0.9925
4           0.9769      0.8944    0.9769
5           0.8750      0.9065    0.8750
6           0.9394      0.9841    0.9394
7           0.9864      0.9603    0.9864
8           0.8162      0.8740    0.8162
----------------------------------------
Overall      94.03       93.23     91.32

Best model for fold 4 updated and saved as bestmodel_fold4_epoch2_acc94.03_prec0.9323_rec0.9132.pt


Fold 4 Epoch 3 [Train]: 100%|██████████| 292/292 [02:54<00:00,  1.68it/s]


Epoch [3] Loss: 0.0643


Fold 4 Epoch 3 [Val]: 100%|██████████| 73/73 [00:12<00:00,  6.08it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.9133      0.7527    0.9133
1           0.8958      0.9773    0.8958
2           0.9431      0.9720    0.9431
3           0.9850      0.9357    0.9850
4           0.9385      0.9683    0.9385
5           0.8542      0.9389    0.8542
6           0.9697      0.9014    0.9697
7           0.9796      0.9290    0.9796
8           0.9118      0.8611    0.9118
----------------------------------------
Overall      93.69       91.52     93.23



Fold 4 Epoch 4 [Train]: 100%|██████████| 292/292 [02:52<00:00,  1.70it/s]


Epoch [4] Loss: 0.0308


Fold 4 Epoch 4 [Val]: 100%|██████████| 73/73 [00:11<00:00,  6.09it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.8533      0.8767    0.8533
1           0.9375      0.9507    0.9375
2           0.9662      0.9750    0.9662
3           1.0000      0.9638    1.0000
4           0.9308      0.9918    0.9308
5           0.9306      0.9116    0.9306
6           0.9773      0.9627    0.9773
7           0.9932      0.9481    0.9932
8           0.9265      0.8750    0.9265
----------------------------------------
Overall      95.49       93.95     94.61

Best model for fold 4 updated and saved as bestmodel_fold4_epoch4_acc95.49_prec0.9395_rec0.9461.pt


Fold 4 Epoch 5 [Train]: 100%|██████████| 292/292 [02:53<00:00,  1.69it/s]


Epoch [5] Loss: 0.0266


Fold 4 Epoch 5 [Val]: 100%|██████████| 73/73 [00:11<00:00,  6.09it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.8333      0.8803    0.8333
1           0.9514      0.9580    0.9514
2           0.9662      0.9513    0.9662
3           0.9549      0.9922    0.9549
4           0.7769      1.0000    0.7769
5           0.9375      0.9247    0.9375
6           0.9167      0.9918    0.9167
7           0.9864      0.9295    0.9864
8           0.9044      0.7736    0.9044
----------------------------------------
Overall      93.86       93.35     91.42




Fold 5 Epoch 1 [Train]: 100%|██████████| 292/292 [02:54<00:00,  1.68it/s]


Epoch [1] Loss: 0.5958


Fold 5 Epoch 1 [Val]: 100%|██████████| 73/73 [00:12<00:00,  6.08it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.7020      0.9138    0.7020
1           0.9790      0.6635    0.9790
2           0.9167      0.9841    0.9167
3           0.9699      0.9416    0.9699
4           0.9308      0.8121    0.9308
5           0.9371      0.8645    0.9371
6           0.9015      0.9917    0.9015
7           0.9388      0.9718    0.9388
8           0.9265      0.7500    0.9265
----------------------------------------
Overall      91.28       87.70     91.14

Best model for fold 5 updated and saved as bestmodel_fold5_epoch1_acc91.28_prec0.8770_rec0.9114.pt


Fold 5 Epoch 2 [Train]: 100%|██████████| 292/292 [02:55<00:00,  1.67it/s]


Epoch [2] Loss: 0.1547


Fold 5 Epoch 2 [Val]: 100%|██████████| 73/73 [00:12<00:00,  6.08it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.9272      0.8434    0.9272
1           0.9650      0.7005    0.9650
2           0.9497      0.9746    0.9497
3           0.9774      0.9420    0.9774
4           0.9231      0.9524    0.9231
5           0.9021      0.9556    0.9021
6           0.9470      0.9542    0.9470
7           0.9320      0.9514    0.9320
8           0.7574      0.9450    0.7574
----------------------------------------
Overall      93.38       91.32     92.01

Best model for fold 5 updated and saved as bestmodel_fold5_epoch2_acc93.38_prec0.9132_rec0.9201.pt


Fold 5 Epoch 3 [Train]: 100%|██████████| 292/292 [02:52<00:00,  1.69it/s]


Epoch [3] Loss: 0.0548


Fold 5 Epoch 3 [Val]: 100%|██████████| 73/73 [00:12<00:00,  6.08it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.9603      0.7632    0.9603
1           0.9231      0.9851    0.9231
2           0.9382      0.9777    0.9382
3           0.9925      0.9362    0.9925
4           0.8000      0.9811    0.8000
5           0.9860      0.8011    0.9860
6           0.9470      0.8446    0.9470
7           0.9252      1.0000    0.9252
8           0.8676      0.8872    0.8676
----------------------------------------
Overall      93.26       90.85     92.66

Best model for fold 5 updated and saved as bestmodel_fold5_epoch3_acc93.26_prec0.9085_rec0.9266.pt


Fold 5 Epoch 4 [Train]: 100%|██████████| 292/292 [02:53<00:00,  1.68it/s]


Epoch [4] Loss: 0.0297


Fold 5 Epoch 4 [Val]: 100%|██████████| 73/73 [00:12<00:00,  6.05it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.9603      0.7923    0.9603
1           0.8951      0.9922    0.8951
2           0.9580      0.9773    0.9580
3           0.9925      0.9362    0.9925
4           0.9308      0.9237    0.9308
5           0.9720      0.9026    0.9720
6           0.9470      0.8929    0.9470
7           0.9592      0.9792    0.9592
8           0.8235      0.9573    0.8235
----------------------------------------
Overall      94.72       92.82     93.76

Best model for fold 5 updated and saved as bestmodel_fold5_epoch4_acc94.72_prec0.9282_rec0.9376.pt


Fold 5 Epoch 5 [Train]: 100%|██████████| 292/292 [02:54<00:00,  1.67it/s]


Epoch [5] Loss: 0.0449


Fold 5 Epoch 5 [Val]: 100%|██████████| 73/73 [00:12<00:00,  6.06it/s]



Class     Accuracy   Precision    Recall
----------------------------------------
0           0.9205      0.8688    0.9205
1           0.9161      0.9493    0.9161
2           0.9670      0.9568    0.9670
3           0.9699      0.9773    0.9699
4           0.8846      0.9746    0.8846
5           0.9790      0.8750    0.9790
6           0.9318      0.9248    0.9318
7           0.9796      0.9351    0.9796
8           0.7721      0.9813    0.7721
----------------------------------------
Overall      94.46       93.81     92.45


Using best model from fold 4: bestmodel_fold4_epoch4_acc95.49_prec0.9395_rec0.9461.pt


Retrain Epoch 1: 100%|██████████| 364/364 [03:36<00:00,  1.68it/s]


Epoch [1] Loss: 0.0884


Retrain Epoch 2: 100%|██████████| 364/364 [03:36<00:00,  1.68it/s]


Epoch [2] Loss: 0.0313


Retrain Epoch 3: 100%|██████████| 364/364 [03:35<00:00,  1.69it/s]


Epoch [3] Loss: 0.0359


Retrain Epoch 4: 100%|██████████| 364/364 [03:35<00:00,  1.69it/s]


Epoch [4] Loss: 0.0368


Retrain Epoch 5: 100%|██████████| 364/364 [03:35<00:00,  1.69it/s]


Epoch [5] Loss: 0.0192


Collecting validation logits: 100%|██████████| 73/73 [00:12<00:00,  6.07it/s]
Holdout test logits: 100%|██████████| 612/612 [00:05<00:00, 112.57it/s]


Class     Accuracy   Precision    Recall
----------------------------------------
0           0.9143      0.9143    0.9143
1           0.9737      0.9487    0.9737
2           0.9808      0.9503    0.9808
3           0.8750      0.9545    0.8750
4           0.9655      0.9655    0.9655
5           0.8919      0.9706    0.8919
6           0.9667      1.0000    0.9667
7           0.8974      0.9459    0.8974
8           0.9545      0.9767    0.9545
----------------------------------------
Overall      95.42       95.85     93.55






In [2]:
import pandas as pd
from torch.utils.data import DataLoader
from tqdm import tqdm
import numpy as np
from scipy.special import softmax

# Set model to evaluation mode
model.eval()

test_dataset = datasets.ImageFolder(root=test_data_dir, transform=val_tfms)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

submission_rows = []
with torch.no_grad():
    for batch_idx, (images, _) in enumerate(tqdm(test_loader, desc="Generating Submission")):
        batch_start = batch_idx * test_loader.batch_size
        filenames = [
            test_dataset.imgs[batch_start + i][0].split('/')[-1]
            for i in range(len(images))
        ]
        images = images.to(device)
        outputs = model(images)                       # [batch, num_classes]
        logits = outputs.cpu().numpy()                # [batch, num_classes]
        probs = softmax(logits, axis=1)               # [batch, num_classes]

        # Calibrate probabilities using fitted calibrators
        calibrated_probs = np.zeros_like(probs)
        for i in range(num_classes):
            calibrated_probs[:, i] = calibrators[i].transform(probs[:, i])
        pred_labels = np.argmax(calibrated_probs, axis=1)

        for fname, label in zip(filenames, pred_labels):
            submission_rows.append({'filename': fname, 'label': label})

submission_df = pd.DataFrame(submission_rows)
submission_df.to_csv('submission.csv', index=False)
print("Saved sample submission to submission.csv")


Generating Submission: 100%|██████████| 165/165 [00:27<00:00,  6.10it/s]

Saved sample submission to submission.csv



