# Define our functions

In [2]:
import os
import glob
import cv2
from skimage.data import page
import matplotlib.pyplot as plt
# from cupy_common import check_cupy_available
# gpu_accelerated = check_cupy_available()

In [2]:
class Preprocess:
    def __init__(self, image_path, output_directory):
        self.global_thresh_value = 350
        self.adaptive_thresh_window_size = 25
        self.adaptive_thresh_C = 2
        self.morph_kernel_size = 2
        self.morph_iterations = 1
        self.image_path = image_path
        self.output_directory = output_directory

    def read_image(self):
        return cv2.imread(self.image_path, cv2.IMREAD_GRAYSCALE)

    def global_threshold(self, image):
        _, global_thresh_mask = cv2.threshold(image, self.global_thresh_value, 255, cv2.THRESH_BINARY)
        return global_thresh_mask

    def adaptive_threshold(self, image):
        adaptive_mask = cv2.adaptiveThreshold(
            image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,
            self.adaptive_thresh_window_size, self.adaptive_thresh_C)
        return adaptive_mask

    def combine_masks(self, global_mask, adaptive_mask):
        return cv2.bitwise_or(global_mask, adaptive_mask)

    def morphological_operations(self, combined_mask):
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (self.morph_kernel_size, self.morph_kernel_size))
        return cv2.morphologyEx(combined_mask, cv2.MORPH_OPEN, kernel, iterations=self.morph_iterations)

    def save_cleaned_image(self, cleaned_image):
        output_path = os.path.join(self.output_directory, os.path.basename(self.image_path))
        cv2.imwrite(output_path, cleaned_image)

    def process_image(self):
        image = self.read_image()
        global_mask = self.global_threshold(image)
        adaptive_mask = self.adaptive_threshold(image)
        combined_mask = self.combine_masks(global_mask, adaptive_mask)
        cleaned_image = self.morphological_operations(combined_mask)
        self.save_cleaned_image(cleaned_image)
        return cleaned_image

In [3]:
# Define input and output directories
input_folder = "datasets/v1/Z/"
output_folder = "datasets/v1/Z/processed"

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# Get a list of all image files in the input folder
# List of desired extensions
extensions = ["*.tiff", "*.png"]

# Find all files with matching extensions
image_files = [f for ext in extensions for f in glob.glob(os.path.join(input_folder, ext))]



for image_file in image_files:
    # Extract the base name of the file for output folder naming
    image_name = os.path.splitext(os.path.basename(image_file))[0]
    print(f"Processing {image_name}")
 
    # Create an instance of Preprocess and process the image
    binarize_image = Preprocess(image_file, output_folder)
    binarize_image.process_image()
   
    print(f"Finished processing {image_name}")

Processing Z4
Finished processing Z4
Processing Z7
Finished processing Z7
Processing Z5
Finished processing Z5
Processing Z6
Finished processing Z6
Processing Z2
Finished processing Z2
Processing Z3
Finished processing Z3
Processing Z1
Finished processing Z1
Processing Z10
Finished processing Z10
Processing Z8
Finished processing Z8
Processing Z11
Finished processing Z11
Processing Z9
Finished processing Z9


# For images that need manual tuning

In [6]:
import os
import glob
import cv2
import numpy as np
from itertools import product

class Preprocess:
    def __init__(self, image_path, output_directory, global_thresh_value=350, adaptive_thresh_window_size=15, 
                 adaptive_thresh_C=3, morph_kernel_size=2, morph_iterations=3):
        self.global_thresh_value = global_thresh_value
        self.adaptive_thresh_window_size = adaptive_thresh_window_size
        self.adaptive_thresh_C = adaptive_thresh_C
        self.morph_kernel_size = morph_kernel_size
        self.morph_iterations = morph_iterations
        self.image_path = image_path
        self.output_directory = output_directory

    def read_image(self):
        return cv2.imread(self.image_path, cv2.IMREAD_GRAYSCALE)

    def global_threshold(self, image):
        _, global_thresh_mask = cv2.threshold(image, self.global_thresh_value, 255, cv2.THRESH_BINARY)
        return global_thresh_mask

    def adaptive_threshold(self, image):
        adaptive_mask = cv2.adaptiveThreshold(
            image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,
            self.adaptive_thresh_window_size, self.adaptive_thresh_C)
        return adaptive_mask

    def combine_masks(self, global_mask, adaptive_mask):
        return cv2.bitwise_or(global_mask, adaptive_mask)

    def morphological_operations(self, combined_mask):
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (self.morph_kernel_size, self.morph_kernel_size))
        return cv2.morphologyEx(combined_mask, cv2.MORPH_OPEN, kernel, iterations=self.morph_iterations)

    def save_cleaned_image(self, cleaned_image, param_set):
        param_str = f"G{self.global_thresh_value}_A{self.adaptive_thresh_window_size}_C{self.adaptive_thresh_C}_M{self.morph_kernel_size}_I{self.morph_iterations}"
        output_path = os.path.join(self.output_directory, f"{os.path.basename(self.image_path)}_{param_str}.png")
        cv2.imwrite(output_path, cleaned_image)

    def process_image(self):
        image = self.read_image()
        global_mask = self.global_threshold(image)
        adaptive_mask = self.adaptive_threshold(image)
        combined_mask = self.combine_masks(global_mask, adaptive_mask)
        cleaned_image = self.morphological_operations(combined_mask)
        self.save_cleaned_image(cleaned_image, (self.global_thresh_value, self.adaptive_thresh_window_size, self.adaptive_thresh_C, self.morph_kernel_size, self.morph_iterations))
        return cleaned_image

def grid_search(input_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    
    # Parameter ranges for grid search
    global_thresh_values = [200, 300, 350, 400]
    adaptive_window_sizes = [11, 15, 19]
    adaptive_C_values = [2, 3, 4]
    morph_kernel_sizes = [2, 3, 4]
    morph_iterations_values = [1, 2, 3]

    # Generate parameter combinations
    param_combinations = list(product(global_thresh_values, adaptive_window_sizes, adaptive_C_values, morph_kernel_sizes, 
                                      morph_iterations_values))

    image_files = glob.glob(os.path.join(input_folder, 'Z1.png'))

    for image_file in image_files:
        print(f"Processing {os.path.basename(image_file)}")
        
        for params in param_combinations:
            print(f"  Trying parameters: {params}")
            binarize_image = Preprocess(image_file, output_folder, *params)
            binarize_image.process_image()
            print(f"  Finished with parameters: {params}")

        print(f"Finished processing {os.path.basename(image_file)}")

if __name__ == "__main__":
    input_folder = "datasets/v1/Z/"
    output_folder = "datasets/v1/Z/grid"
    grid_search(input_folder, output_folder)


Processing Z1.png
  Trying parameters: (200, 11, 2, 2, 1)
  Finished with parameters: (200, 11, 2, 2, 1)
  Trying parameters: (200, 11, 2, 2, 2)
  Finished with parameters: (200, 11, 2, 2, 2)
  Trying parameters: (200, 11, 2, 2, 3)
  Finished with parameters: (200, 11, 2, 2, 3)
  Trying parameters: (200, 11, 2, 3, 1)
  Finished with parameters: (200, 11, 2, 3, 1)
  Trying parameters: (200, 11, 2, 3, 2)
  Finished with parameters: (200, 11, 2, 3, 2)
  Trying parameters: (200, 11, 2, 3, 3)
  Finished with parameters: (200, 11, 2, 3, 3)
  Trying parameters: (200, 11, 2, 4, 1)
  Finished with parameters: (200, 11, 2, 4, 1)
  Trying parameters: (200, 11, 2, 4, 2)
  Finished with parameters: (200, 11, 2, 4, 2)
  Trying parameters: (200, 11, 2, 4, 3)
  Finished with parameters: (200, 11, 2, 4, 3)
  Trying parameters: (200, 11, 3, 2, 1)
  Finished with parameters: (200, 11, 3, 2, 1)
  Trying parameters: (200, 11, 3, 2, 2)
  Finished with parameters: (200, 11, 3, 2, 2)
  Trying parameters: (200

In [145]:
class Preprocess:
    def __init__(self, image_path, output_directory):
        self.global_thresh_value = 200
        self.adaptive_thresh_window_size = 11
        self.adaptive_thresh_C = 3
        self.morph_kernel_size = 3
        self.morph_iterations = 1
        self.image_path = image_path
        self.output_directory = output_directory

    def read_image(self):
        return cv2.imread(self.image_path, cv2.IMREAD_GRAYSCALE)

    def global_threshold(self, image):
        _, global_thresh_mask = cv2.threshold(image, self.global_thresh_value, 255, cv2.THRESH_BINARY)
        return global_thresh_mask

    def adaptive_threshold(self, image):
        adaptive_mask = cv2.adaptiveThreshold(
            image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,
            self.adaptive_thresh_window_size, self.adaptive_thresh_C)
        return adaptive_mask

    def combine_masks(self, global_mask, adaptive_mask):
        return cv2.bitwise_or(global_mask, adaptive_mask)

    def morphological_operations(self, combined_mask):
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (self.morph_kernel_size, self.morph_kernel_size))
        return cv2.morphologyEx(combined_mask, cv2.MORPH_OPEN, kernel, iterations=self.morph_iterations)

    def save_cleaned_image(self, cleaned_image):
        output_path = os.path.join(self.output_directory, os.path.basename(self.image_path))
        cv2.imwrite(output_path, cleaned_image)

    def process_image(self):
        image = self.read_image()
        global_mask = self.global_threshold(image)
        adaptive_mask = self.adaptive_threshold(image)
        combined_mask = self.combine_masks(global_mask, adaptive_mask)
        cleaned_image = self.morphological_operations(combined_mask)
        self.save_cleaned_image(cleaned_image)
        return cleaned_image

In [148]:
# Define input and output directories
input_folder = "datasets/v0/raw_data/"
output_folder = "datasets/v0/filtered/"

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# Get a list of all image files in the input folder
# List of desired extensions
extensions = ["X3.png"]

# Find all files with matching extensions
image_files = [f for ext in extensions for f in glob.glob(os.path.join(input_folder, ext))]


for image_file in image_files:
    # Extract the base name of the file for output folder naming
    image_name = os.path.splitext(os.path.basename(image_file))[0]
    print(f"Processing {image_name}")
 
    # Create an instance of Preprocess and process the image
    binarize_image = Preprocess(image_file, output_folder)
    binarize_image.process_image()
   
    print(f"Finished processing {image_name}")

Processing X3
Finished processing X3


In [11]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
from scipy.ndimage import label

def filter_small_clusters(image_path, min_cluster_size):
    # Load the image (assuming it's a binary image with 0 and 255 values)
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    # Ensure it's a binary image (convert to 0 and 1)
    binary_image = (image < 128).astype(np.uint8)
    
    # Label clusters of black pixels (value = 1)
    labeled_image, num_features = label(binary_image)
    
    # Calculate sizes of each cluster
    cluster_sizes = np.bincount(labeled_image.ravel())
    
    # Create a mask for clusters larger than the threshold
    large_clusters_mask = np.isin(labeled_image, np.where(cluster_sizes >= min_cluster_size)[0])
    
    # Set pixels of small clusters to white (255)
    filtered_image = np.where(large_clusters_mask, image, 255).astype(np.uint8)

    # Plot the original and filtered images
    fig, axs = plt.subplots(1, 2, figsize=(12, 6))
    axs[0].imshow(image, cmap='gray')
    axs[0].set_title('Original Image')
    axs[0].axis('off')

    axs[1].imshow(filtered_image, cmap='gray')
    axs[1].set_title('Filtered Image (Large Clusters Only)')
    axs[1].axis('off')

    plt.tight_layout()
    plt.show()

# Example usage
filter_small_clusters('processed/A_100.tiff', min_cluster_size=25)

[ WARN:0@343.679] global loadsave.cpp:241 findDecoder imread_('processed/A_100.tiff'): can't open/read file: check file path/integrity


TypeError: '<' not supported between instances of 'NoneType' and 'int'

In [12]:
def filter_small_clusters(image_path, output_path, min_cluster_size):
    # Load the image (assuming it's a binary image with 0 and 255 values)
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    # Ensure it's a binary image (convert to 0 and 1)
    binary_image = (image < 128).astype(np.uint8)
    
    # Label clusters of black pixels (value = 1)
    labeled_image, num_features = label(binary_image)
    
    # Calculate sizes of each cluster
    cluster_sizes = np.bincount(labeled_image.ravel())
    
    # Create a mask for clusters larger than the threshold
    large_clusters_mask = np.isin(labeled_image, np.where(cluster_sizes >= min_cluster_size)[0])
    
    # Set pixels of small clusters to white (255)
    filtered_image = np.where(large_clusters_mask, image, 255).astype(np.uint8)

    output_path = os.path.join(output_path, os.path.basename(image_path))
    
    # Save the result
    cv2.imwrite(output_path, filtered_image)

In [13]:
# Define input and output directories
input_folder = "datasets/v1/Z/processed/"
output_folder = "datasets/v1/Z/filtered/"

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# Get a list of all image files in the input folder
# List of desired extensions
extensions = ["*.tiff", "*.png"]

# Find all files with matching extensions
image_files = [f for ext in extensions for f in glob.glob(os.path.join(input_folder, ext))]

for image_file in image_files:
    # Extract the base name of the file for output folder naming
    image_name = os.path.splitext(os.path.basename(image_file))[0]
    print(f"Processing {image_name}")
 
    # Create an instance of Preprocess and process the image
    filter_small_clusters(image_file, output_folder, min_cluster_size=15)
   
    print(f"Finished processing {image_name}")

Processing Z4
Finished processing Z4
Processing Z7
Finished processing Z7
Processing Z5
Finished processing Z5
Processing Z6
Finished processing Z6
Processing Z2
Finished processing Z2
Processing Z3
Finished processing Z3
Processing Z1
Finished processing Z1
Processing Z10
Finished processing Z10
Processing Z8
Finished processing Z8
Processing Z11
Finished processing Z11
Processing Z9
Finished processing Z9


# Rotate the dot type images to increase dataset size

In [3]:
def rotate_image(image, angle):
    """Rotates an image by the specified angle."""
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)

    # Calculate the rotation matrix
    matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
    # Perform the rotation
    rotated = cv2.warpAffine(image, matrix, (w, h))
    return rotated

def rotate_images_in_folder(input_folder, output_folder, increments):
    """
    Rotates all images in the input folder by the specified increments and saves them in the output folder.
    
    Args:
        input_folder (str): Path to the folder containing input images.
        output_folder (str): Path to the folder where rotated images will be saved.
        increments (list of int): List of rotation angles (e.g., [90, 180, 270]).
    """
    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Get a list of all image files in the input folder
    image_files = glob.glob(os.path.join(input_folder, "*"))
    for image_file in image_files:
        image_name = os.path.splitext(os.path.basename(image_file))[0]
        extension = os.path.splitext(image_file)[1]

        # Read the image
        image = cv2.imread(image_file)
        if image is None:
            print(f"Failed to read {image_file}. Skipping...")
            continue

        for angle in increments:
            # Rotate the image
            rotated_image = rotate_image(image, angle)
            # Create a new file name for the rotated image
            output_name = f"{image_name}_rotated_{angle}{extension}"
            output_path = os.path.join(output_folder, output_name)
            # Save the rotated image
            cv2.imwrite(output_path, rotated_image)
            print(f"Saved rotated image: {output_path}")

# Example usage
if __name__ == "__main__":
    input_folder = "datasets/v1/raw_set/dots/"  # Replace with your input folder path
    increments = [90, 180, 270]  # Rotation angles

    rotate_images_in_folder(input_folder, input_folder, increments)


Saved rotated image: datasets/v1/raw_set/dots/D_AC_rotated_90.tiff
Saved rotated image: datasets/v1/raw_set/dots/D_AC_rotated_180.tiff
Saved rotated image: datasets/v1/raw_set/dots/D_AC_rotated_270.tiff
Saved rotated image: datasets/v1/raw_set/dots/E_AC_rotated_90.tiff
Saved rotated image: datasets/v1/raw_set/dots/E_AC_rotated_180.tiff
Saved rotated image: datasets/v1/raw_set/dots/E_AC_rotated_270.tiff
Saved rotated image: datasets/v1/raw_set/dots/F_AC_rotated_90.tiff
Saved rotated image: datasets/v1/raw_set/dots/F_AC_rotated_180.tiff
Saved rotated image: datasets/v1/raw_set/dots/F_AC_rotated_270.tiff
Saved rotated image: datasets/v1/raw_set/dots/B_AC_rotated_90.tiff
Saved rotated image: datasets/v1/raw_set/dots/B_AC_rotated_180.tiff
Saved rotated image: datasets/v1/raw_set/dots/B_AC_rotated_270.tiff
Saved rotated image: datasets/v1/raw_set/dots/A_AC_rotated_90.tiff
Saved rotated image: datasets/v1/raw_set/dots/A_AC_rotated_180.tiff
Saved rotated image: datasets/v1/raw_set/dots/A_AC_ro

In [5]:
def flip_images_in_folder(input_folder, output_folder):
    """
    Flips all images in the input folder vertically and horizontally, 
    and saves them in the output folder.
    
    Args:
        input_folder (str): Path to the folder containing input images.
        output_folder (str): Path to the folder where flipped images will be saved.
    """
    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Get a list of all image files in the input folder
    image_files = glob.glob(os.path.join(input_folder, "*"))
    for image_file in image_files:
        image_name = os.path.splitext(os.path.basename(image_file))[0]
        extension = os.path.splitext(image_file)[1]

        # Read the image
        image = cv2.imread(image_file)
        if image is None:
            print(f"Failed to read {image_file}. Skipping...")
            continue

        # Flip vertically
        flipped_vert = cv2.flip(image, 0)
        output_vert_name = f"{image_name}_flipped_vert{extension}"
        output_vert_path = os.path.join(output_folder, output_vert_name)
        cv2.imwrite(output_vert_path, flipped_vert)
        print(f"Saved vertically flipped image: {output_vert_path}")

        # Flip horizontally
        flipped_horiz = cv2.flip(image, 1)
        output_horiz_name = f"{image_name}_flipped_horiz{extension}"
        output_horiz_path = os.path.join(output_folder, output_horiz_name)
        cv2.imwrite(output_horiz_path, flipped_horiz)
        print(f"Saved horizontally flipped image: {output_horiz_path}")

# Example usage
if __name__ == "__main__":
    input_folder = "datasets/v1/raw_set/mixed/"  # Replace with your input folder path

    flip_images_in_folder(input_folder, input_folder)


Saved vertically flipped image: datasets/v1/raw_set/mixed/X27_flipped_vert.png
Saved horizontally flipped image: datasets/v1/raw_set/mixed/X27_flipped_horiz.png
Saved vertically flipped image: datasets/v1/raw_set/mixed/A_75_flipped_vert.tiff
Saved horizontally flipped image: datasets/v1/raw_set/mixed/A_75_flipped_horiz.tiff
Saved vertically flipped image: datasets/v1/raw_set/mixed/D_25_flipped_vert.tiff
Saved horizontally flipped image: datasets/v1/raw_set/mixed/D_25_flipped_horiz.tiff
Saved vertically flipped image: datasets/v1/raw_set/mixed/C_50_flipped_vert.tiff
Saved horizontally flipped image: datasets/v1/raw_set/mixed/C_50_flipped_horiz.tiff
Saved vertically flipped image: datasets/v1/raw_set/mixed/B_75_flipped_vert.tiff
Saved horizontally flipped image: datasets/v1/raw_set/mixed/B_75_flipped_horiz.tiff
Saved vertically flipped image: datasets/v1/raw_set/mixed/D_50_flipped_vert.tiff
Saved horizontally flipped image: datasets/v1/raw_set/mixed/D_50_flipped_horiz.tiff
Saved vertical

# Move the images into correct subfolders

In [18]:
import os
import shutil

def copy_images_to_structure(labeled_folder, raw_data_folder, raw_set_folder):
    # Ensure the raw_set folder exists
    os.makedirs(raw_set_folder, exist_ok=True)

    # Iterate through the labeled folder's structure
    for subfolder in os.listdir(labeled_folder):
        subfolder_path = os.path.join(labeled_folder, subfolder)
        if os.path.isdir(subfolder_path):
            # Create matching subfolder in raw_set
            target_subfolder = os.path.join(raw_set_folder, subfolder)
            os.makedirs(target_subfolder, exist_ok=True)

            # Get the image filenames from the labeled subfolder
            image_filenames = os.listdir(subfolder_path)

            for image_filename in image_filenames:
                raw_image_path = os.path.join(raw_data_folder, image_filename)
                target_image_path = os.path.join(target_subfolder, image_filename)

                if os.path.exists(raw_image_path):
                    shutil.copy2(raw_image_path, target_image_path)
                else:
                    print(f"Warning: {image_filename} not found in raw_data folder.")

if __name__ == "__main__":
    labeled_folder = 'datasets/v1/labeled/'
    raw_data_folder = 'datasets/v1/raw_data/'
    raw_set_folder = 'datasets/v1/raw_set/'

    copy_images_to_structure(labeled_folder, raw_data_folder, raw_set_folder)
    print("Image copying complete.")


Image copying complete.
