In [1]:
from PIL import Image
import numpy as np

def extract_sub_images(image_path):
    # Read the image
    img = Image.open(image_path)
    
    # Convert to numpy array
    img_array = np.array(img)
    
    # Find rows where all pixels are white
    white_rows = np.all(img_array == 255, axis=(1, 2))
    
    # Get indices of separator rows
    row_separators = np.where(white_rows)[0]
    
    # List to store all sub-images
    all_sub_images = []
    
    # Process each row section
    for i in range(len(row_separators) - 1):
        start_row = row_separators[i] + 1
        end_row = row_separators[i + 1]
        
        # Extract the row section
        row_section = img_array[start_row:end_row]
        
        if row_section.shape[0] > 0:
            # Find white columns in this section
            white_cols = np.all(row_section >= 250, axis=(0, 2))
            
            # Get indices of separator columns
            col_separators = np.where(white_cols)[0]
            
            # If no column separators found, add the entire row section
            if len(col_separators) == 0:
                all_sub_images.append(row_section)
            else:
                # Extract sub-images between column separators
                row_sub_images = []
                
                # Add section before first separator
                if col_separators[0] > 0:
                    sub_img = row_section[:, 0:col_separators[0]]
                    if sub_img.size > 0:
                        row_sub_images.append(sub_img)
                
                # Add sections between separators
                for j in range(len(col_separators) - 1):
                    start_col = col_separators[j] + 1
                    end_col = col_separators[j + 1]
                    sub_img = row_section[:, start_col:end_col]
                    if sub_img.size > 0:
                        row_sub_images.append(sub_img)
                
                # Add section after last separator
                if col_separators[-1] < row_section.shape[1] - 1:
                    sub_img = row_section[:, col_separators[-1] + 1:]
                    if sub_img.size > 0:
                        row_sub_images.append(sub_img)
                
                all_sub_images.append(row_sub_images)
    
    return all_sub_images

# Optional: To visualize a specific sub-image
def show_sub_image(sub_image):
    img = Image.fromarray(sub_image)
    img.show()

# Usage example:
# sub_images = extract_sub_images('your_image.png')
# Access first row's first sub-image: sub_images[0][0]
# Show it: show_sub_image(sub_images[0][0])

In [2]:
import os

def extract_and_save_sub_images(image_path, min_size, output_folder):
    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Get base name of input file without extension
    base_name = os.path.splitext(os.path.basename(image_path))[0]
    
    def squeeze_white_pixels(sub_image):
        # Find non-white rows and columns
        non_white_rows = ~np.all(sub_image == 255, axis=(1, 2))
        non_white_cols = ~np.all(sub_image == 255, axis=(0, 2))
        
        # Extract only non-white rows and columns
        return sub_image[non_white_rows][:, non_white_cols]
    
    # Extract all sub-images using your existing function
    sub_images = extract_sub_images(image_path)
    
    # Track the total number of saved images
    saved_count = 0
    
    # Process each row of sub-images
    for i, row in enumerate(sub_images):
        for j, sub_image in enumerate(row):
            # Remove white rows and columns
            squeezed_image = squeeze_white_pixels(sub_image)
            
            # Get dimensions
            h, w = squeezed_image.shape[:2]
            
            # Check if image meets minimum size requirements
            if h > min_size and w > min_size:
                filename = f"{base_name}_{i}_{j}_{w}_{h}.jpg"
                
                # Convert to PIL Image and save
                img = Image.fromarray(squeezed_image)
                save_path = os.path.join(output_folder, filename)
                img.save(save_path, "JPEG")
                saved_count += 1
                print(f"Saved: {filename}")
    
    print(f"\nTotal images saved: {saved_count}")

# Usage:
# extract_and_save_sub_images('your_image.png', min_size=100, output_folder='output_images')

In [4]:
import os

folder_path = r"C:\Users\acer\Desktop\Work_IGB\Georgia Zooplankton\igb-georgia\input"
for root, dirs, files in os.walk(folder_path):
    # Check each file in the current directory
    for file in files:
        # Check if the file ends with _sorted.jpg
        if file.endswith('_sorted.jpg'):
            # Print the full path to the file
            img_path = os.path.join(root, file)
            print(f"Processing: {img_path}")
            extract_and_save_sub_images(img_path, min_size=10, output_folder=r"C:\Users\acer\Desktop\Work_IGB\Georgia Zooplankton\igb-georgia\output")

Processing: C:\Users\acer\Desktop\Work_IGB\Georgia Zooplankton\igb-georgia\input\M3A_2011-08-27__45um_above200um_x1_2400dpi_1-of-3_sorted.jpg
Saved: M3A_2011-08-27__45um_above200um_x1_2400dpi_1-of-3_sorted_0_1_588_958.jpg
Saved: M3A_2011-08-27__45um_above200um_x1_2400dpi_1-of-3_sorted_0_3_698_200.jpg
Saved: M3A_2011-08-27__45um_above200um_x1_2400dpi_1-of-3_sorted_0_6_227_485.jpg
Saved: M3A_2011-08-27__45um_above200um_x1_2400dpi_1-of-3_sorted_0_8_296_496.jpg
Saved: M3A_2011-08-27__45um_above200um_x1_2400dpi_1-of-3_sorted_0_9_111_104.jpg
Saved: M3A_2011-08-27__45um_above200um_x1_2400dpi_1-of-3_sorted_0_12_195_344.jpg
Saved: M3A_2011-08-27__45um_above200um_x1_2400dpi_1-of-3_sorted_0_14_208_200.jpg
Saved: M3A_2011-08-27__45um_above200um_x1_2400dpi_1-of-3_sorted_0_16_272_344.jpg
Saved: M3A_2011-08-27__45um_above200um_x1_2400dpi_1-of-3_sorted_0_17_164_208.jpg
Saved: M3A_2011-08-27__45um_above200um_x1_2400dpi_1-of-3_sorted_0_19_279_232.jpg
Saved: M3A_2011-08-27__45um_above200um_x1_2400dpi_1-o