<a href="https://colab.research.google.com/github/Jose-Augusto-C-M/deep_learning_toolbox/blob/main/patch_and_count_images_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install imagecodecs



In [None]:
import os
from datetime import date
from tqdm import tqdm
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from imagecodecs import imread
import random

In [None]:
def plot_random_patches(original_output_folder, segmented_output_folder, num_patches_to_plot, random_seed=None):
    # Set the random seed for reproducibility
    random.seed(random_seed)

    # Get a list of all files in the output folders
    original_files = [f for f in os.listdir(original_output_folder) if f.endswith(".tif")]

    # Randomly select patches for plotting
    selected_original_files = random.sample(original_files, min(num_patches_to_plot, len(original_files)))

    for selected_original_file in selected_original_files:
        selected_original_path = os.path.join(original_output_folder, selected_original_file)
        selected_segmented_path = os.path.join(segmented_output_folder, selected_original_file)

        selected_original_img = np.array(imread(selected_original_path), dtype=np.float32)
        selected_segmented_img = np.array(imread(selected_segmented_path), dtype=np.float32)

        # Convert NumPy arrays to 8-bit
        selected_original_patch_8bit = (selected_original_img * 255 / selected_original_img.max()).astype(np.uint8)
        selected_segmented_patch_8bit = (selected_segmented_img / selected_segmented_img.max()).astype(np.uint8)

        plt.figure(figsize=(10, 5))
        plt.subplot(1, 2, 1)
        plt.imshow(selected_original_patch_8bit, cmap='gray')
        plt.title('Randomly Selected Original Patch')

        plt.subplot(1, 2, 2)
        plt.imshow(selected_segmented_patch_8bit, cmap='gray')
        plt.title('Randomly Selected Segmented Patch')

        plt.show()

In [None]:
def patch_images(original_folder, segmented_folder, original_output_folder, segmented_output_folder, patch_size, background_threshold):
    if not os.path.exists(original_output_folder):
        os.makedirs(original_output_folder)

    if not os.path.exists(segmented_output_folder):
        os.makedirs(segmented_output_folder)

    os.makedirs(original_output_folder, exist_ok=True)
    os.makedirs(segmented_output_folder, exist_ok=True)

    # Get a list of all .tif image files in the original folder
    original_files = [f for f in os.listdir(original_folder) if f.endswith(".tif")]

    total_original_patches = 0
    removed_due_to_size = 0
    removed_due_to_background = 0

    for original_file in tqdm(original_files, desc="Processing Images", unit="image"):
        # Load the original image
        original_path = os.path.join(original_folder, original_file)
        original_img = np.array(imread(original_path), dtype=np.float32)  # Convert to NumPy array

        # Derive segmented file name from the original file name
        segmented_file = os.path.splitext(original_file)[0] + ".tif"
        segmented_path = os.path.join(segmented_folder, segmented_file)

        #print(f"Checking path: {segmented_path}")
        segmented_img = np.array(imread(segmented_path), dtype=np.float32)  # Convert to NumPy array
        # convert the values in the segmented image from 1 and 2 to 0 and 1
        segmented_img[segmented_img == 1] = 0
        segmented_img[segmented_img == 2] = 1

        # Check if the images have the same spatial dimensions
        if original_img.shape[:2] != segmented_img.shape[:2]:
            print(f"Skipping {original_file} due to mismatched dimensions.")
            continue

        # Split the original image into patches
        original_patches = [original_img[row:row + patch_size, col:col + patch_size] for row in
                            range(0, original_img.shape[0], patch_size) for col in
                            range(0, original_img.shape[1], patch_size)]

        # Split the segmented class mask into patches
        segmented_patches = [segmented_img[row:row + patch_size, col:col + patch_size] for row in
                             range(0, segmented_img.shape[0], patch_size) for col in
                             range(0, segmented_img.shape[1], patch_size)]

        # Save each patch and count the patches
        for idx, (original_patch, segmented_patch) in enumerate(zip(original_patches, segmented_patches)):
            # Skip patches smaller than the specified patch size
            if original_patch.shape[0] < patch_size or original_patch.shape[1] < patch_size:
                removed_due_to_size += 1
                continue

            # Calculate the percentage of background pixels
            background_percentage = np.sum(segmented_patch == 1) / (patch_size * patch_size)

            #print(f"Background Percentage for patch {idx}: {background_percentage}")

            # Skip patches with more than the specified percentage of background pixels
            if background_percentage > background_threshold:
                removed_due_to_background += 1
                continue

            output_file_original = os.path.join(original_output_folder, f"{os.path.splitext(original_file)[0]}_patch_{idx}.tif")
            output_file_segmented = os.path.join(segmented_output_folder, f"{os.path.splitext(original_file)[0]}_patch_{idx}.tif")

            # Convert NumPy arrays to 8-bit
            original_patch_8bit = (original_patch * 255 / original_patch.max()).astype(np.uint8)
            segmented_patch_8bit = (segmented_patch / segmented_patch.max()).astype(np.uint8)

            Image.fromarray(original_patch_8bit).save(output_file_original)
            Image.fromarray(segmented_patch_8bit).save(output_file_segmented)

            total_original_patches += 1

    today = date.today()

    print("Today's date:", today)
    print(f"Patching complete.")
    print(f"Total patches created: {total_original_patches + removed_due_to_size + removed_due_to_background}")
    print(f"Patches removed due to size: {removed_due_to_size}")
    print(f"Patches removed due to background: {removed_due_to_background}")
    print(f"Total patches after removal: {total_original_patches}")

In [None]:
# Example usage
original_folder_path = '/content/drive/MyDrive/Embrapa_Milho_Javali_2/corn_holes/raw_images_2'
segmented_folder_path = '/content/drive/MyDrive/Embrapa_Milho_Javali_2/corn_holes/labels_3'
original_output_folder_path = '/content/drive/MyDrive/Embrapa_Milho_Javali_2/corn_holes/original_patches_3/'
segmented_output_folder_path = '/content/drive/MyDrive/Embrapa_Milho_Javali_2/corn_holes/segmented_patches_3/'

patch_images(
    original_folder_path,
    segmented_folder_path,
    original_output_folder_path,
    segmented_output_folder_path,
    patch_size=256,
    background_threshold=0.90
)

Processing Images: 100%|██████████| 9/9 [02:20<00:00, 15.60s/image]

Today's date: 2024-04-17
Patching complete.
Total patches created: 8512
Patches removed due to size: 317
Patches removed due to background: 3941
Total patches after removal: 4254



