In [1]:
import os
import json

# Paths to dataset and annotations
image_dir = "/home/renaldy_fredyan/PhDResearch/LOCA/Dataset/images_384_VarV2"
annotation_file = "/home/renaldy_fredyan/PhDResearch/LOCA/Dataset/annotation_FSC147_384.json"

# Load annotations
with open(annotation_file, "r") as f:
    annotations = json.load(f)

# Count total annotated images
annotated_images = list(annotations.keys())
print(f"Total annotated images: {len(annotated_images)}")

# Check consistency between annotations and available images
missing_images = []
for img_name in annotated_images:
    img_path = os.path.join(image_dir, img_name)
    if not os.path.exists(img_path):
        missing_images.append(img_name)

print(f"Total available images in folder: {len(os.listdir(image_dir))}")
print(f"Missing images in folder: {len(missing_images)}")
if missing_images:
    print("Missing image examples:", missing_images[:5])  # Print first 5 missing images


Total annotated images: 6146
Total available images in folder: 6146
Missing images in folder: 0


In [3]:
import os
import json
from torch.utils.data import Dataset

class ObjectCountingDataset(Dataset):
    def __init__(self, data_path, img_size, split='train', tiling_p=0.5):
        self.data_path = data_path
        self.img_size = img_size
        self.split = split
        self.tiling_p = tiling_p

        # Load annotations
        annotation_file = os.path.join(data_path, 'annotation_FSC147_384.json')
        with open(annotation_file, 'r') as f:
            self.annotations = json.load(f)
        
        # Log total annotations loaded
        print(f"Total annotations loaded: {len(self.annotations)}")

        # Load image splits
        if split in ['train', 'val', 'test']:
            split_file = os.path.join(data_path, 'Train_Test_Val_FSC_147.json')
            with open(split_file, 'r') as f:
                splits = json.load(f)
                self.image_names = splits.get(split, [])
        else:
            self.image_names = list(self.annotations.keys())
        
        # Log total images in the specified split
        print(f"Split: {split}, Total images in split: {len(self.image_names)}")

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        img_path = os.path.join(self.data_path, 'images_384_VarV2', img_name)

        # Check if image exists
        if not os.path.exists(img_path):
            print(f"Image {img_name} is missing.")
            raise FileNotFoundError(f"{img_path} not found.")

        # Return dummy data for debugging
        return img_name

# Example Usage
dataset_path = "/home/renaldy_fredyan/PhDResearch/LOCA/Dataset/"
dataset = ObjectCountingDataset(data_path=dataset_path, img_size=512, split='train')
print(f"Dataset size: {len(dataset)}")


Total annotations loaded: 6146
Split: train, Total images in split: 3659
Dataset size: 3659
