In [8]:
import os
import json
from PIL import Image

def load_train_data(dataset_path):
    """
    Loads image paths and annotations from only the training dataset.

    Args:
        dataset_path (str): Path to the main dataset directory

    Returns:
        tuple: (image_paths, annotations) - Lists of image file paths and annotation data
    """
    image_paths = []
    annotations = []

    # Only process train split
    img_dir = os.path.join(dataset_path, 'train', 'img')
    ann_dir = os.path.join(dataset_path, 'train', 'ann')

    # Ensure directories exist
    if not os.path.exists(img_dir) or not os.path.exists(ann_dir):
        print(f"Error: Directory {img_dir} or {ann_dir} not found.")
        return image_paths, annotations

    for filename in os.listdir(img_dir):
        if filename.endswith('.jpg'):
            image_path = os.path.join(img_dir, filename)
            annotation_path = os.path.join(ann_dir, filename + '.json')

            if os.path.exists(annotation_path):
                try:
                    with open(annotation_path, 'r') as f:
                        annotation = json.load(f)
                    image_paths.append(image_path)
                    annotations.append(annotation)
                except json.JSONDecodeError:
                    print(f"Error: Invalid JSON in {annotation_path}")
            else:
                print(f"Warning: No annotation found for {filename}")

    return image_paths, annotations

# Example Usage
if __name__ == '__main__':
    dataset_root = '/Users/spgohil/Developer/Projects/ROAD-CONDITION-DETECTION/dataset/DatasetNinja_RDD2022_MIX/'
    
    image_paths, annotations = load_train_data(dataset_root)
    
    print(f"Loaded {len(image_paths)} training images and {len(annotations)} annotations.")
    
    # Example: Print the first image path and its annotation
    if image_paths:
        print(f"First image path: {image_paths[0]}")
        print(f"First annotation: {annotations[0]}")

Loaded 804 training images and 804 annotations.
First image path: /Users/spgohil/Developer/Projects/ROAD-CONDITION-DETECTION/dataset/DatasetNinja_RDD2022_MIX/train/img/Japan_005850.jpg
First annotation: {'description': '', 'tags': [{'id': 16552613, 'tagId': 29663, 'name': 'Japan', 'value': None, 'labelerLogin': 'inbox@datasetninja.com', 'createdAt': '2024-01-22T15:32:59.633Z', 'updatedAt': '2024-01-22T15:32:59.633Z'}], 'size': {'height': 600, 'width': 600}, 'objects': [{'id': 138956045, 'classId': 6512000, 'objectId': None, 'description': '', 'geometryType': 'rectangle', 'labelerLogin': 'inbox@datasetninja.com', 'createdAt': '2024-01-22T15:32:59.634Z', 'updatedAt': '2024-01-22T15:32:59.634Z', 'tags': [{'id': 104942971, 'tagId': 29666, 'name': 'detail', 'value': 'equal interval', 'labelerLogin': 'inbox@datasetninja.com', 'createdAt': '2024-01-22T15:32:59.634Z', 'updatedAt': '2024-01-22T15:32:59.634Z'}], 'classTitle': 'transverse crack', 'points': {'exterior': [[250, 352], [522, 388]], '

class MyDataset(torchvision.datasets.ImageFolder):

def __init__(self, train_folder_path='.', transform=None, target_transform=None):
    super().__init__(train_folder_path, transform, target_transform)