In [2]:
import os, copy, random
from collections import defaultdict
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import StratifiedShuffleSplit, StratifiedKFold
from sklearn.metrics import confusion_matrix

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset, Dataset
from torchvision import datasets, transforms
from torchvision.models import efficientnet_b2, EfficientNet_B2_Weights
from PIL import Image

# ============ CONFIG ============
data_dir = "/kaggle/input/AI-OF-GOD-4/aog_data/train"
num_classes = 9
num_epochs = 10
batch_size = 4                    # 🔻 smaller to prevent OOM
learning_rate = 1e-4
num_folds = 3
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# ============ TRANSFORMS ============
base_transform = transforms.Compose([
    transforms.Resize((256, 256)),        # 🔻 smaller than 384x384
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x if x.size(0)==3 else x.expand(3,*x.shape[1:])),
    transforms.Normalize(mean=[0.485,0.456,0.406],
                         std=[0.229,0.224,0.225])
])
train_augment = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.05)
])

# ============ DATASET ============
dataset_original = datasets.ImageFolder(root=data_dir, transform=base_transform)
samples = dataset_original.samples
labels_all = [label for _, label in samples]
class_names = dataset_original.classes

# Holdout (1%)
from sklearn.model_selection import StratifiedShuffleSplit
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.01, random_state=seed)
trainval_idx, holdout_idx = next(sss.split(np.zeros(len(labels_all)), labels_all))
holdout_dataset = Subset(dataset_original, holdout_idx)
trainval_indices = trainval_idx
trainval_labels = [labels_all[i] for i in trainval_indices]
print(f"Train/Val size: {len(trainval_indices)}, Holdout size: {len(holdout_idx)}")

# ============ KFOLD ============
skf_inner = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=seed)
fold_best_models = []

for fold, (train_idx_raw, val_idx_raw) in enumerate(skf_inner.split(trainval_indices, trainval_labels)):
    print(f"\n========== Fold {fold+1}/{num_folds} ==========")
    fold_train_idx = [trainval_indices[i] for i in train_idx_raw]
    fold_val_idx = [trainval_indices[i] for i in val_idx_raw]

    # --- Oversampling with augmentation ---
    class_items = defaultdict(list)
    for i in fold_train_idx:
        img_path, label = samples[i]
        class_items[label].append((img_path, label))
    max_n = max(len(v) for v in class_items.values())

    fold_train_samples = []
    for label, items in class_items.items():
        fold_train_samples += [(p, label, False) for p, label in items]
        n_needed = max_n - len(items)
        for _ in range(n_needed):
            p, label = random.choice(items)
            img = Image.open(p).convert("RGB")
            img = train_augment(img)
            fold_train_samples.append((img, label, True))

    # --- Custom dataset ---
    class FoldAugDataset(Dataset):
        def __init__(self, samples, transform):
            self.samples = samples
            self.transform = transform
        def __len__(self): return len(self.samples)
        def __getitem__(self, idx):
            s = self.samples[idx]
            if s[2]:  # pre-augmented PIL
                image = self.transform(s[0])
                return image, s[1]
            else:
                img = Image.open(s[0]).convert("RGB")
                image = self.transform(img)
                return image, s[1]

    train_dataset = FoldAugDataset(fold_train_samples, base_transform)
    val_dataset = Subset(dataset_original, fold_val_idx)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
    val_loader   = DataLoader(val_dataset,   batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

    # --- Model ---
    def create_model(num_classes):
        weights = EfficientNet_B2_Weights.IMAGENET1K_V1
        model = efficientnet_b2(weights=weights)
        in_f = model.classifier[1].in_features
        model.classifier[1] = nn.Linear(in_f, num_classes)
        return model

    model = create_model(num_classes)
    model = model.to(device)  # ✅ before wrapping
    # (Disable DataParallel for single-GPU Kaggle; it adds memory overhead)
    # if torch.cuda.device_count() > 1:
    #     model = nn.DataParallel(model)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)

    # --- Mixed precision for memory ---
    from torch.cuda.amp import GradScaler, autocast
    scaler = GradScaler()

    def train_one_epoch(model, loader, criterion, optimizer):
        model.train()
        total_loss, correct, total = 0.0, 0, 0
        for inputs, labels in tqdm(loader, leave=False):
            inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            optimizer.zero_grad()
            with autocast():
                outputs = model(inputs)
                loss = criterion(outputs, labels)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            total_loss += loss.item() * inputs.size(0)
            correct += (outputs.argmax(1) == labels).sum().item()
            total += labels.size(0)
        return total_loss/len(loader.dataset), correct/total

    def validate(model, loader, criterion):
        model.eval()
        total_loss, correct, total = 0.0, 0, 0
        all_preds, all_labels = [], []
        with torch.no_grad(), autocast():
            for inputs, labels in tqdm(loader, leave=False):
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                total_loss += loss.item() * inputs.size(0)
                correct += (outputs.argmax(1) == labels).sum().item()
                total += labels.size(0)
                all_preds += outputs.argmax(1).cpu().tolist()
                all_labels += labels.cpu().tolist()
        cm = confusion_matrix(all_labels, all_preds, labels=list(range(num_classes)))
        per_cls = {c: (cm[i,i]/cm[i].sum() if cm[i].sum()>0 else 0.0) for i,c in enumerate(class_names)}
        return total_loss/len(loader.dataset), correct/total, per_cls

    # --- Train ---
    best_val_acc = 0
    best_wts = copy.deepcopy(model.state_dict())
    for epoch in range(num_epochs):
        print(f"\nEpoch [{epoch+1}/{num_epochs}] — Fold {fold+1}")
        tr_loss, tr_acc = train_one_epoch(model, train_loader, criterion, optimizer)
        val_loss, val_acc, per_cls = validate(model, val_loader, criterion)
        scheduler.step()
        print(f"Train Loss {tr_loss:.4f} | Train Acc {tr_acc:.4f}")
        print(f"Val   Loss {val_loss:.4f} | Val   Acc {val_acc:.4f}")
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_wts = copy.deepcopy(model.state_dict())
            torch.save(best_wts, f"best_model_fold{fold+1}.pth")
            print("✅ Improved model saved.")
        torch.cuda.empty_cache()
    fold_best_models.append(best_val_acc)
    print(f"Best Val Acc Fold {fold+1}: {best_val_acc:.4f}")

print("\n🎉 Training complete for all folds.")

Train/Val size: 12133, Holdout size: 123



  scaler = GradScaler()



Epoch [1/10] — Fold 1


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.5038 | Train Acc 0.8369
Val   Loss 0.3023 | Val   Acc 0.9058
✅ Improved model saved.

Epoch [2/10] — Fold 1


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.1444 | Train Acc 0.9540
Val   Loss 0.3270 | Val   Acc 0.9075
✅ Improved model saved.

Epoch [3/10] — Fold 1


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0812 | Train Acc 0.9748
Val   Loss 0.2993 | Val   Acc 0.9172
✅ Improved model saved.

Epoch [4/10] — Fold 1


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0490 | Train Acc 0.9853
Val   Loss 0.2364 | Val   Acc 0.9377
✅ Improved model saved.

Epoch [5/10] — Fold 1


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0330 | Train Acc 0.9894
Val   Loss 0.3506 | Val   Acc 0.9234

Epoch [6/10] — Fold 1


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0196 | Train Acc 0.9938
Val   Loss 0.2924 | Val   Acc 0.9337

Epoch [7/10] — Fold 1


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0121 | Train Acc 0.9965
Val   Loss 0.2987 | Val   Acc 0.9335

Epoch [8/10] — Fold 1


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0065 | Train Acc 0.9980
Val   Loss 0.2803 | Val   Acc 0.9412
✅ Improved model saved.

Epoch [9/10] — Fold 1


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0046 | Train Acc 0.9988
Val   Loss 0.2723 | Val   Acc 0.9407

Epoch [10/10] — Fold 1


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0036 | Train Acc 0.9991
Val   Loss 0.2898 | Val   Acc 0.9444
✅ Improved model saved.
Best Val Acc Fold 1: 0.9444



  scaler = GradScaler()



Epoch [1/10] — Fold 2


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.5151 | Train Acc 0.8344
Val   Loss 0.3213 | Val   Acc 0.9001
✅ Improved model saved.

Epoch [2/10] — Fold 2


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.1440 | Train Acc 0.9545
Val   Loss 0.4379 | Val   Acc 0.8731

Epoch [3/10] — Fold 2


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0787 | Train Acc 0.9748
Val   Loss 0.3067 | Val   Acc 0.9288
✅ Improved model saved.

Epoch [4/10] — Fold 2


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0501 | Train Acc 0.9841
Val   Loss 0.2577 | Val   Acc 0.9360
✅ Improved model saved.

Epoch [5/10] — Fold 2


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0315 | Train Acc 0.9901
Val   Loss 0.3521 | Val   Acc 0.9238

Epoch [6/10] — Fold 2


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0203 | Train Acc 0.9935
Val   Loss 0.2890 | Val   Acc 0.9424
✅ Improved model saved.

Epoch [7/10] — Fold 2


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0124 | Train Acc 0.9961
Val   Loss 0.3220 | Val   Acc 0.9384

Epoch [8/10] — Fold 2


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0069 | Train Acc 0.9980
Val   Loss 0.2901 | Val   Acc 0.9444
✅ Improved model saved.

Epoch [9/10] — Fold 2


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0048 | Train Acc 0.9989
Val   Loss 0.2976 | Val   Acc 0.9446
✅ Improved model saved.

Epoch [10/10] — Fold 2


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0031 | Train Acc 0.9991
Val   Loss 0.3415 | Val   Acc 0.9434
Best Val Acc Fold 2: 0.9446



  scaler = GradScaler()



Epoch [1/10] — Fold 3


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.5107 | Train Acc 0.8340
Val   Loss 0.3224 | Val   Acc 0.8909
✅ Improved model saved.

Epoch [2/10] — Fold 3


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.1442 | Train Acc 0.9550
Val   Loss 0.2637 | Val   Acc 0.9221
✅ Improved model saved.

Epoch [3/10] — Fold 3


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0758 | Train Acc 0.9760
Val   Loss 0.2630 | Val   Acc 0.9241
✅ Improved model saved.

Epoch [4/10] — Fold 3


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0513 | Train Acc 0.9845
Val   Loss 0.3024 | Val   Acc 0.9179

Epoch [5/10] — Fold 3


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0314 | Train Acc 0.9902
Val   Loss 0.3371 | Val   Acc 0.9186

Epoch [6/10] — Fold 3


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0196 | Train Acc 0.9937
Val   Loss 0.2528 | Val   Acc 0.9342
✅ Improved model saved.

Epoch [7/10] — Fold 3


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0105 | Train Acc 0.9969
Val   Loss 0.2358 | Val   Acc 0.9444
✅ Improved model saved.

Epoch [8/10] — Fold 3


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0072 | Train Acc 0.9975
Val   Loss 0.2452 | Val   Acc 0.9392

Epoch [9/10] — Fold 3


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0053 | Train Acc 0.9984
Val   Loss 0.2236 | Val   Acc 0.9451
✅ Improved model saved.

Epoch [10/10] — Fold 3


  with autocast():
  with torch.no_grad(), autocast():
                                                   

Train Loss 0.0037 | Train Acc 0.9990
Val   Loss 0.2104 | Val   Acc 0.9535
✅ Improved model saved.
Best Val Acc Fold 3: 0.9535

🎉 Training complete for all folds.




In [12]:
import os
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, classification_report
from scipy.special import softmax  # To use softmax on the ensemble logits
from tqdm import tqdm  # Ensure you have tqdm imported for progress bar

# ==============================
# ENSEMBLE ON HOLDOUT
# ==============================

# Ensure base_transform and dataset_original are defined and available
dataset_original.transform = base_transform

# Define the DataLoader for the holdout dataset
holdout_loader = DataLoader(holdout_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

# Initialize an empty list to store logits from each model in the ensemble
ensemble_logits = []

# Define the paths for the best fold models (adjust paths as needed)
fold_best_models = [
    "/kaggle/working/best_model_fold1.pth",
    "/kaggle/working/best_model_fold2.pth",
    "/kaggle/working/best_model_fold3.pth"
]

# Check if the model paths exist
for best_model_path in fold_best_models:
    if not os.path.exists(best_model_path):
        print(f"Error: Model path '{best_model_path}' does not exist!")
        continue  # Skip this model if the path is invalid
    else:
        print(f"Loading model from: {best_model_path}")
    
    # Create the model (assuming `create_model` initializes the model architecture)
    model = create_model(num_classes=9)

    # If you're using multiple GPUs, use DataParallel
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    
    # Load the state dict for the current model
    try:
        model.load_state_dict(torch.load(best_model_path, map_location=device))
        model.eval()  # Set model to evaluation mode
    except Exception as e:
        print(f"Error loading model from {best_model_path}: {e}")
        continue  # Skip to the next model if there's an issue
    
    # Collect logits for the holdout dataset
    logits = []
    with torch.no_grad():  # No need to track gradients during inference
        for images, _ in tqdm(holdout_loader, desc=f"Holdout Inference - {best_model_path}", leave=False):
            images = images.to(device)
            outputs = model(images)
            logits.append(outputs.cpu().numpy())
    
    # Concatenate all logits into one array for this model
    logits = np.concatenate(logits, axis=0)
    
    # Append this model's logits to the ensemble logits list
    ensemble_logits.append(logits)

# Check if ensemble_logits has at least one model
if len(ensemble_logits) == 0:
    raise ValueError("No valid models were loaded. Please check your model paths.")

# Combine all logits from the models in the ensemble by averaging
ensemble_logits = np.mean(np.stack(ensemble_logits, axis=0), axis=0)

# Apply softmax to convert logits into class probabilities
ensemble_probs = softmax(ensemble_logits, axis=1)

# Get the predicted class by taking the argmax over the class dimension
ensemble_preds = np.argmax(ensemble_probs, axis=1)

# Assuming you have the true labels for the holdout set
true_labels = holdout_labels  # Ensure this variable exists and is correctly defined

# Print the ensemble results
print("\n========== ENSEMBLE RESULTS ON HOLDOUT ==========")
print("Holdout Ensemble Accuracy:", accuracy_score(true_labels, ensemble_preds))
print(classification_report(true_labels, ensemble_preds, target_names=class_names))


Loading model from: /kaggle/working/best_model_fold1.pth
Error loading model from /kaggle/working/best_model_fold1.pth: Error(s) in loading state_dict for DataParallel:
	Missing key(s) in state_dict: "module.features.0.0.weight", "module.features.0.1.weight", "module.features.0.1.bias", "module.features.0.1.running_mean", "module.features.0.1.running_var", "module.features.1.0.block.0.0.weight", "module.features.1.0.block.0.1.weight", "module.features.1.0.block.0.1.bias", "module.features.1.0.block.0.1.running_mean", "module.features.1.0.block.0.1.running_var", "module.features.1.0.block.1.fc1.weight", "module.features.1.0.block.1.fc1.bias", "module.features.1.0.block.1.fc2.weight", "module.features.1.0.block.1.fc2.bias", "module.features.1.0.block.2.0.weight", "module.features.1.0.block.2.1.weight", "module.features.1.0.block.2.1.bias", "module.features.1.0.block.2.1.running_mean", "module.features.1.0.block.2.1.running_var", "module.features.1.1.block.0.0.weight", "module.features.1.

ValueError: No valid models were loaded. Please check your model paths.

In [13]:
import os
import torch
import pandas as pd
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from PIL import Image
from sklearn.preprocessing import LabelEncoder

# ============ CONFIG ============
test_data_dir = "/kaggle/input/AI-OF-GOD-4/aog_data/test"  # Adjust the path to your test dataset
submission_file = "/kaggle/working/submission.csv"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 4  # Ensure to use the same batch size as training

# ============ TRANSFORMS ============
test_transform = transforms.Compose([
    transforms.Resize((256, 256)),        # Same as the train transform size
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x if x.size(0) == 3 else x.expand(3, *x.shape[1:])),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# ============ LOAD TEST DATA ============
test_dataset = datasets.ImageFolder(root=test_data_dir, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

# ============ LOAD BEST MODELS ============
def load_model(fold_num, num_classes=9):
    model = efficientnet_b2(weights=EfficientNet_B2_Weights.IMAGENET1K_V1)
    in_f = model.classifier[1].in_features
    model.classifier[1] = torch.nn.Linear(in_f, num_classes)
    model.load_state_dict(torch.load(f"/kaggle/working/best_model_fold{fold_num}.pth"))
    model = model.to(device)
    model.eval()
    return model

models = [load_model(fold_num) for fold_num in range(1, 4)]  # Load models for all 3 folds

# ============ PREDICT AND AVERAGE ============
def predict(models, test_loader):
    all_preds = []
    with torch.no_grad():
        for inputs, _ in test_loader:
            inputs = inputs.to(device)
            fold_preds = []
            # Get predictions from all 3 models (folds)
            for model in models:
                outputs = model(inputs)
                fold_preds.append(outputs.cpu())
            # Average the predictions from all 3 folds
            avg_preds = torch.stack(fold_preds).mean(0)
            all_preds.append(avg_preds)
    return torch.cat(all_preds, 0)

# Get the predictions for the test set
test_preds = predict(models, test_loader)

# Convert to class labels (get the index with the highest probability)
predicted_labels = torch.argmax(test_preds, dim=1).numpy()

# ============ CREATE SUBMISSION FILE ============
# Get the filenames in the test set (required for the submission format)
test_filenames = [os.path.basename(path[0]) for path in test_dataset.samples]

# Create the submission DataFrame
submission_df = pd.DataFrame({
    'filename': test_filenames,
    'label': predicted_labels
})

# ============ SAVE TO CSV ============
submission_df.to_csv(submission_file, index=False)

print(f"Submission saved to {submission_file}")


Submission saved to /kaggle/working/submission.csv


In [None]:
print("HI")

In [None]:
print("HI")