# Data Loading using CPU

In [None]:
import os
import cv2
import numpy as np
import concurrent.futures

def load_images_from_folder(folder_path):
    images = []
    for root, _, files in os.walk(folder_path):
        for file_name in files:
            if file_name.endswith((".jpg", ".jpeg", ".png")):
                image_path = os.path.join(root, file_name)
                image = cv2.imread(image_path)
                if image is not None:
                    images.append(image)
    return images

def load_images_parallel(folder_paths, num_workers):
    images = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
        image_lists = list(executor.map(load_images_from_folder, folder_paths))
        for image_list in image_lists:
            images.extend(image_list)
    return images

base_folder = "Vision_data"
train_folder = os.path.join(base_folder, "train")
test_folder = os.path.join(base_folder, "test")
validation_folder = os.path.join(base_folder, "validation")

# Load images from train, test, and validation folders in parallel
num_workers = 4  # Adjust the number of workers based on your CPU cores
train_images = load_images_parallel([train_folder], num_workers)
test_images = load_images_parallel([test_folder], num_workers)
validation_images = load_images_parallel([validation_folder], num_workers)

# Convert the image lists to NumPy arrays for further processing
train_images = np.array(train_images)
test_images = np.array(test_images)
validation_images = np.array(validation_images)