In [None]:
def get_png_files(data_path, recursive=True):
    
    # Retrieve PNG files from the data path
    png_files = [file for file in glob.glob(data_path + r"\**\*.png", recursive=recursive)]
    print(f"Found {len(png_files)} PNG files.")
    return png_files

def save_npy_array_from_png(png_files, image_shape, save_npy=True, npy_filename='images'):
    
    # Loop through the PNG files and initialize an empty array that will contain np arrays
    print("Starting pipeline...")
    np_arrays = []
    curr_progress, prev_progress = 0.0, -0.5
    for i, png_file in enumerate(png_files):

        # Open the image and convert into grayscale
        img = Image.open(png_file)
        img_gray = img.convert("L")
        
        # Resize the image to the desired shape and into a numpy array
        img_resized = img_gray.resize(image_shape)
        img_array = np.array(img_resized)
        
        # Reshape the array to (image shape, 1)
        new_shape = tuple(list(image_shape) + [1])
        img_array = img_array.reshape(new_shape)
        np_arrays.append(img_array)
        
        # Print progress, but not on every loop
        curr_progress = round(100 * (i+1) / len(png_files), 2)
        if (curr_progress - prev_progress) >= 0.5:
            prev_progress = float(curr_progress)
            print(f"{curr_progress}%", end=" ")
            if curr_progress != 0.0 and curr_progress % 10.0 == 0:
                print(f"({i+1} files done)")
    
    # Save the voxel matrix as an NPY file
    np_arrays_stacked = np.stack(tuple(np_arrays), axis=0)
    print(f"\nFinal NumPy array shape: {np_arrays_stacked.shape}")
    
    # Save the NumPy array as an NPY file if specified, otherwise, return the array
    if save_npy:
        np.save(npy_filename + ".npy", np_arrays_stacked)
        return None
    return np_arrays_stacked

def save_npy_array_from_png_per_category(data_path, image_shape):
    
    # Get the category names
    categories = get_category_names(data_path)
    print(f"Found {len(categories)} categories.")

    # Loop through the categories
    for i, category in enumerate(categories):
        print(f"================ Starting process for category #{i+1}, name: {category} ================")

        # Set the category data path and retrieve all images in PNG format
        category_path = data_path + '\\' + category
        png_files = get_png_files(category_path, recursive=True)
        
        # Fixing the image files order due to naming error during generation, now files are in proper id order
        png_files = list(sorted(png_files, key=lambda x: int(x.split('\\')[-1].split('_')[2])))

        # Convert PNG files into Numpy arrays
        save_npy_array_from_png(png_files, image_shape, save_npy=True, npy_filename=category+"_images")

# Read all images, convert them into a proper format, and save them into a Numpy array
data_path = r"C:\Users\aanal\Documents\sem3\nureal_network_and_deep_learning\Project\op"
save_npy_array_from_png_per_category(data_path, (128, 128))