Scan the dataset and create mappings

In [1]:
import os

def scan_dataset(root_dir):
    """
    root_dir: 'data/train' or 'data/val' or 'data/test'
    Returns: list of image paths and their class names
    """
    image_paths = []
    labels = []
    
    for class_name in sorted(os.listdir(root_dir)):
        class_folder = os.path.join(root_dir, class_name)
        if not os.path.isdir(class_folder):
            continue
        for fname in sorted(os.listdir(class_folder)):
            if fname.lower().endswith(('.jpg','.jpeg','.png')):
                image_paths.append(os.path.join(class_folder, fname))
                labels.append(class_name)
    
    return image_paths, labels

train_paths, train_labels = scan_dataset("data/train")
val_paths, val_labels = scan_dataset("data/val")
test_paths, _ = scan_dataset("data/test")  

# Build a mapping
classes = sorted(set(train_labels))  # ['eiffel','stonehenge',...]
class_to_idx = {c:i for i,c in enumerate(classes)}

# Map labels to integers
train_labels_idx = [class_to_idx[c] for c in train_labels]
val_labels_idx = [class_to_idx[c] for c in val_labels]


Pytorch dataset

In [2]:
from torch.utils.data import Dataset
from PIL import Image
from torchvision import transforms

class LandmarkDataset(Dataset):
    def __init__(self, image_paths, labels=None, transform=None): # Stores image paths, labels, and transforms.
        self.image_paths = image_paths
        self.labels = labels  # None for test set
        self.transform = transform
        
    def __len__(self): # returns the number of images in the dataset.
        return len(self.image_paths)
    
    def __getitem__(self, idx): # loads and returns an image and its label (if available).
        img_path = self.image_paths[idx]
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        
        if self.labels is not None:
            label = self.labels[idx]
            return img, label
        else:
            return img, img_path  # return path for query images


Transforms

In [4]:
from torchvision import transforms

train_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(0.2,0.2,0.2,0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],
                         std=[0.229,0.224,0.225])
])

val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],
                         std=[0.229,0.224,0.225])
])


DataLoaders

train_loader → used for global/local feature training. <br>
val_loader → used for monitoring accuracy / mAP. <br>
test_loader → used for retrieval / feature extraction, returns image path so you can save .npy or .npz. <br>
You can easily modify image size to match your DELF / DELG backbone input (e.g., 512, 1024). <br>

In [5]:
from torch.utils.data import DataLoader

train_dataset = LandmarkDataset(train_paths, train_labels_idx, transform=train_transform)
val_dataset = LandmarkDataset(val_paths, val_labels_idx, transform=val_transform)
test_dataset = LandmarkDataset(test_paths, labels=None, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [None]:
# Print first item
img, label = train_dataset[0]
print("Image object:", img)
print("Label ID:", label)

# Print shape if it is a tensor
print("Image shape:", img.shape)  # [C,H,W]
