# Implementation: Custom Image Dataset

**Goal**: Build a Class that acts like a PyTorch Dataset (Lazy Loading).

In [None]:
import numpy as np
import os

# 1. Mock File System
# Imagine we have a folder with 100 images
all_files = [f"img_{i}.jpg" for i in range(100)]
labels = [i % 2 for i in range(100)] # 0 or 1 class

# 2. Dataset Class
class ImageDataset:
    def __init__(self, file_paths, targets, transform=None):
        self.files = file_paths
        self.targets = targets
        self.transform = transform
        
    def __len__(self):
        # How big is the dataset?
        return len(self.files)
    
    def __getitem__(self, idx):
        # Retrieve ONE item at index idx
        f_path = self.files[idx]
        label = self.targets[idx]
        
        # Mock Load Image (in real life: cv2.imread(f_path))
        image = np.random.randn(28, 28, 3) 
        
        # Apply Transformations (Augmentation)
        if self.transform:
            image = self.transform(image)
            
        return image, label

# 3. Usage
dataset = ImageDataset(all_files, labels)
print(f"Dataset Size: {len(dataset)}")

img, y = dataset[5]
print(f"Item 5: Image Shape {img.shape}, Label {y}")

## Conclusion
This class doesn't store 100 images in RAM. It stores 100 strings (paths). It loads images only when `dataset[i]` is called.