In [4]:
import os
import cv2
import numpy as np

def load_casia_b_data(dataset_path):
    """
    Loads CASIA-B dataset, organizing images by subject, condition, and view.
    Args:
        dataset_path: Path to the root of the CASIA-B dataset.
    Returns:
        X: List of images (as numpy arrays).
        y: Corresponding labels (subject ID, condition, view).
    """
    X = []
    y = []
    
    # Iterate through each subject directory (e.g., "001", "002", etc.)
    for subject_dir in sorted(os.listdir(dataset_path)):
        subject_path = os.path.join(dataset_path, subject_dir)
        
        if os.path.isdir(subject_path):
            # Iterate through condition directories (e.g., "bg-01", "cl-01", "nm-01", etc.)
            for condition_dir in sorted(os.listdir(subject_path)):
                condition_path = os.path.join(subject_path, condition_dir)
                
                if os.path.isdir(condition_path):
                    # Iterate through view directories (e.g., "000", "018", "036", etc.)
                    for view_dir in sorted(os.listdir(condition_path)):
                        view_path = os.path.join(condition_path, view_dir)
                        
                        if os.path.isdir(view_path):
                            # Iterate through images in the view directory
                            for img_name in sorted(os.listdir(view_path)):
                                img_path = os.path.join(view_path, img_name)
                                
                                # Load the image as grayscale
                                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                                if img is not None:
                                    X.append(img)
                                    
                                    # Label as a tuple (subject, condition, view)
                                    label = (int(subject_dir), condition_dir, int(view_dir))
                                    y.append(label)
    
    return np.array(X), np.array(y)

# Path to CASIA-B dataset
dataset_path = "output"

# Load the data
X, y = load_casia_b_data(dataset_path)

print(f"Loaded {len(X)} images from CASIA-B dataset.")


Loaded 1117083 images from CASIA-B dataset.
