In [None]:
# import requirements
import os
import numpy as np
import pandas as pd
from math import ceil
from skimage import io
from skimage.filters import threshold_otsu
from skimage.color import rgb2gray
from skimage.segmentation import clear_border
from skimage.measure import label, regionprops_table
from skimage.transform import hough_circle, hough_circle_peaks
from skimage.feature import canny
from skimage.draw import disk
from sklearn.cluster import KMeans
from scipy.stats import spearmanr

In [None]:
class pre_process_image:
    # initialize image to be segmented
    def __init__(self, image_dir):
        self.image_dir = image_dir # full directory path to image
        self.image = io.imread(image_dir) # read image from directory
        self.grey_image = rgb2gray(self.image) #convert image to greyscale
        self.bw_image = self.grey_image > threshold_otsu(image_gray) # binarize image ot be black adn white
        self.inv_bw_image = np.invert(self.bw_image) # invert black and white image
        self.clear_inv_bw_image = clear_border(self.inv_bw_image) # remove anything touching image border
    
    # segment the image into smaller images
    def segment(self, cluster_num=2, image_edge_buffer=50):
        self.cluster_num = cluster_num
        self.image_edge_buffer = image_edge_buffer
        self.labeled_image = label(self.clear_inv_bw_image) #label image
        self.image_properties_df = pd.DataFrame( # get the properties of each image used to segment blobs in image
            regionprops_table(
                self.labeled_image, 
                properties=('centroid',
                           'bbox',
                           'orientation',
                           'axis_major_length',
                           'axis_minor_length',
                           'area',
                           'area_filled')
                                    )
                                )
        # cluster boxes of blobs by size
        kmean_result = KMeans(n_clusters=clust_count).fit(
            np.array(
                self.image_properties_df[['axis_major_length', 'axis_minor_length']]
            )
        )
        self.image_properties_df['kmeans_label'] = kmean_result.labels_
        # keep only the largest cluster (ball bearing needs to be a similar size as the beetles)
        self.max_kmeans_label = int(self.image_properties_df.kmeans_label[self.image_properties_df['area'] == self.image_properties_df['area'].max()])
        self.image_selected_df = self.image_properties_df[self.image_properties_df['kmeans_label']==self.max_kmeans_label]