## Image Segmentation

#### This is the different image segment techniques used. The outputs are .png of the resulting masks.  

In [1]:
import os
import cv2 as cv
import os
import numpy as np

#### The source folder is the orginal images and destination is the annotated output. Depending on which masks are used change the directory accordingly. 

### K-clustering

In [3]:
# Define the source and destination folders
source_folder = 'Dataset/test/images'
destination_folder = 'Dataset/kclustering/kannotatedtest/'

#source_folder = 'Dataset/train/images'
#destination_folder = 'Dataset/kclustering/kannotatedtrain/'

#source_folder = 'Dataset/valid/images'
#destination_folder = 'Dataset/kclustering/kannotatedtrain/'

os.makedirs(destination_folder, exist_ok=True)

# Minimum area threshold
min_area = 800
output_size = (640, 480)

# Loop through each image in the source folder
for filename in os.listdir(source_folder):
    if filename.endswith('.png') or filename.endswith('.jpg'):  # Add other formats if needed
        img = cv.imread(os.path.join(source_folder, filename))

        # Convert the image to grayscale
        gray_img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

        # Reshape the grayscale image to a 2D array of pixels
        Z = gray_img.reshape((-1, 1))  # Grayscale image has only one channel

        # Convert to np.float32
        Z = np.float32(Z)

        # Define criteria, number of clusters(K), and apply kmeans()
        criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 10, 1.0)
        K = 5 # Number of clusters
        ret, label, center = cv.kmeans(Z, K, None, criteria, 10, cv.KMEANS_RANDOM_CENTERS)

        # Convert center to uint8 and create the clustered image
        center = np.uint8(center)
        res = center[label.flatten()]
        clustered_image = res.reshape((gray_img.shape))

        # Binarize the clustered image by thresholding
        clustered_image[clustered_image <= 70] = 0
        clustered_image[clustered_image > 70] = 255

        # Find all connected components in the binary image
        num_labels, labels, stats, centroids = cv.connectedComponentsWithStats(clustered_image)

        # Create an empty image to store the filtered clusters
        filtered_image = np.zeros_like(clustered_image)

        # Loop through each connected component and filter based on area
        for i in range(1, num_labels):  # Skipping the background label (0)
            area = stats[i, cv.CC_STAT_AREA]  # Get the area of the connected component
            if area >= min_area:
                # If the area is greater than or equal to the threshold, keep the component
                filtered_image[labels == i] = 255
                
        #This is just for the test images  
        filtered_image = cv.resize(filtered_image, output_size)        

        # Save the filtered image to the destination folder
        cv.imwrite(os.path.join(destination_folder, f'{filename}'), filtered_image)


### Binary  Segmentation (At 110)

In [8]:
source_folder = 'Dataset/test/images'
destination_folder = 'Dataset/binary/binarytest/'

#source_folder = 'Dataset/train/images'
#destination_folder = 'Dataset/binary/binarytrain/'

#source_folder = 'Dataset/valid/images'
#destination_folder = 'Dataset/binary/binaryvalid/'


min_area = 800
output_size = (640, 480)


# Create the output directory if it doesn't exist
if not os.path.exists(destination_folder):
    os.makedirs(destination_folder)

# Process each image in the input directory
for filename in os.listdir(source_folder):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        # Full path to the input image
        image_path = os.path.join(source_folder, filename)

        # Read the image
        image = cv.imread(image_path)

        # Convert to grayscale
        gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)

        # Apply thresholding to create a binary mask
        _, binary_mask = cv.threshold(gray, 110, 255, cv.THRESH_BINARY)


        num_labels, labels, stats, centroids = cv.connectedComponentsWithStats(binary_mask)

        # Create an empty image to store the filtered clusters
        filtered_image = np.zeros_like(binary_mask)

        # Loop through each connected component and filter based on area
        for i in range(1, num_labels):  # Skipping the background label (0)
            area = stats[i, cv.CC_STAT_AREA]  # Get the area of the connected component
            if area >= min_area:
                # If the area is greater than or equal to the threshold, keep the component
                filtered_image[labels == i] = 255

        # Create the output file path
        filtered_image = cv.resize(filtered_image, output_size)  
        output_path = os.path.join(destination_folder, filename)

        # Save the binary mask to the output directory
        cv.imwrite(output_path, filtered_image)

### Otsu Method

In [9]:
source_folder = 'Dataset/test/images'
destination_folder = 'Dataset/otsu/otsutest/'

#source_folder = 'Dataset/train/images'
#destination_folder = 'Dataset/otsu/otsutrain/'

#source_folder = 'Dataset/valid/images'
#destination_folder = 'Dataset/otsu/otsuvalid/'


output_size = (640, 480)
min_area = 0
# Create the output directory if it doesn't exist
if not os.path.exists(destination_folder):
    os.makedirs(destination_folder)

# Process each image in the input directory
for filename in os.listdir(source_folder):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        # Full path to the input image
        image_path = os.path.join(source_folder, filename)

        # Read the image
        image = cv.imread(image_path)

        # Convert to grayscale
        gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)

        # Apply Gaussian Blur to reduce noise
        blurred = cv.GaussianBlur(gray, (9, 9), 0)

        # Apply Otsu's thresholding
        ret, binary_mask = cv.threshold(
            blurred, 
            0,               
            255,              
            cv.THRESH_BINARY + cv.THRESH_OTSU
        )

        num_labels, labels, stats, centroids = cv.connectedComponentsWithStats(binary_mask)

        # Create an empty image to store the filtered clusters
        filtered_image = np.zeros_like(binary_mask)

        # Loop through each connected component and filter based on area
        for i in range(1, num_labels):  # Skipping the background label (0)
            area = stats[i, cv.CC_STAT_AREA]  # Get the area of the connected component
            if area >= min_area:
                # If the area is greater than or equal to the threshold, keep the component
                filtered_image[labels == i] = 255

        # Create the output file path
        filtered_image = cv.resize(filtered_image, output_size)  
        output_path = os.path.join(destination_folder, filename)

        # Create the output file path with .png extension
        output_filename = os.path.splitext(filename)[0] + '.png'
        output_path = os.path.join(destination_folder, output_filename)

        # Save the binary mask as PNG to the output directory
        cv.imwrite(output_path, filtered_image)