In [33]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
from patchify import patchify,unpatchify
import tensorflow as tf
import keras.backend as K
import pandas as pd
import numpy as np
import os
from skimage.morphology import skeletonize
import matplotlib.pyplot as plt
import math
import cv2
from skan import Skeleton, summarize
import networkx as nx
np.set_printoptions(suppress=True)

In [34]:
def f1(y_true, y_pred):
    def recall_m(y_true, y_pred):
        TP = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        Positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = TP / (Positives+K.epsilon())
        return recall
    
    def precision_m(y_true, y_pred):
        TP = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        Pred_Positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = TP / (Pred_Positives+K.epsilon())
        return precision
    
    precision, recall = precision_m(y_true, y_pred), recall_m(y_true, y_pred)
    
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

def iou(y_true, y_pred):
    def f(y_true, y_pred):
        intersection = K.sum(K.abs(y_true * y_pred), axis=[1,2,3])
        total = K.sum(K.square(y_true),[1,2,3]) + K.sum(K.square(y_pred),[1,2,3])
        union = total - intersection
        return (intersection + K.epsilon()) / (union + K.epsilon())
    return K.mean(f(y_true, y_pred), axis=-1)

In [35]:
def padder(image,filename, patch_size):
    h = image.shape[0]
    w = image.shape[1]
    height_padding = ((h // patch_size) + 1) * patch_size - h
    width_padding = ((w // patch_size) + 1) * patch_size - w

    top_padding = int(height_padding/2)
    bottom_padding = height_padding - top_padding

    left_padding = int(width_padding/2)
    right_padding = width_padding - left_padding
    if '_mask' in filename:
        padded_image = cv2.copyMakeBorder(image, top_padding, bottom_padding, left_padding, right_padding, cv2.BORDER_CONSTANT, value=[0, 0, 0])
    else:
        padded_image = cv2.copyMakeBorder(image, top_padding, bottom_padding, left_padding, right_padding, cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE)
    return padded_image

In [36]:
def predict_image(image,patch_size,model):
    image = padder(image,'none', patch_size)
    patches = patchify(image, patch_size, patch_size)
    x = patches.shape[0]
    y = patches.shape[1]
    patches = patches.reshape(-1,patch_size,patch_size)
    predictions = model.predict(patches,verbose=0)
    patches = predictions.reshape(x, y, patch_size, patch_size)
    predicted_image = unpatchify(patches,image.shape)
    return predicted_image

In [37]:
if os.path.isdir("output"):
    print('Output folder exists')
else:
    os.mkdir('output')
if os.path.isdir("plants"):
    print('Plant folder exists')
else:   
    os.mkdir('plants')
plant_folder = 'plants'
output_folder = 'output'
model_name = 'root_test.h5'
model = tf.keras.models.load_model(model_name, custom_objects={'f1': f1, 'iou': iou})
for filename in os.listdir('input'):
    if '.tif' in filename:
        image_path = os.path.join('input', filename)
        filename = filename.replace('.tif', '')
        filename = filename.replace('.png', '')
        image = cv2.imread(image_path, 0)

        # Crop the image
        image = image[:, :-250]

        # Find edges using Canny edge detection
        edges = cv2.Canny(image, 0, 125)

        # Find coordinates of edge points
        points = np.argwhere(edges > 0)
        x_min, y_min = points.min(0)
        x_max, y_max = points.max(0)

        # Crop the image to the region containing edges
        image = image[x_min:x_max, y_min:y_max]
        image_height,image_width = image.shape
        # Make a prediction with the model
        prediction = predict_image(image, 128, model)
        prediction[prediction < 0.4] = 0

        # Post-processing to enhance prediction
        prediction *= 255
        prediction = prediction.astype('uint8')

        # Connected components labeling
        _, binary = cv2.threshold(prediction, 0, 255, cv2.THRESH_BINARY)

        retval, labels, stats, centroids = cv2.connectedComponentsWithStats(binary)

        # Create a mask to retain only regions with area above the threshold
        mask_values = [255 if (stats[label, cv2.CC_STAT_AREA] > 100 and
                                stats[label, cv2.CC_STAT_TOP] < 850 and
                                stats[label, cv2.CC_STAT_TOP] > 300 and
                                stats[label, cv2.CC_STAT_AREA] < 99900) else 0
                    for label in range(1, retval)]

        # Apply the mask values to the 'mask' array
        mask = np.zeros_like(binary)
        for label, value in zip(range(1, retval), mask_values):
            mask[labels == label] = value

        # Apply the mask to the original binary mask
        prediction = cv2.bitwise_and(binary, binary, mask=mask)
        kernel = np.ones((5, 5), np.uint8)  # Example 5x5 rectangular kernel

        # Perform dilation on the image
        dilated_image = cv2.dilate(prediction, kernel, iterations=3)
        # Specify the threshold value
        threshold_value = 100

        empty_image = np.zeros(prediction.shape, dtype=np.uint8) 
        # Split the prediction into five equal height parts with different widths
        height, width = prediction.shape
        num_parts = 5
        part_width = width // num_parts

        # Initialize a list to store the bounding boxes
        bounding_boxes = []

        # Iterate through the five parts
        for i in range(num_parts):
            # Define the region of interest (ROI) for the current part
            start_col = i * part_width 
            end_col = (i + 1) * part_width

            roi = prediction[:, start_col:end_col]

                
            # Use connected components to identify connected regions in the ROI
            _, labels, stats, _ = cv2.connectedComponentsWithStats(roi)
                
            if len(stats) >= 2:

                # Find the index of the largest connected component (excluding background)
                largest_component_index = np.argmax(stats[1:, cv2.CC_STAT_AREA]) + 1

                # Create a mask for the largest component in the ROI
                largest_component_mask = np.uint8(labels == largest_component_index) * 255

                # Find contours in the mask
                contours, _ = cv2.findContours(largest_component_mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

                # Find the minimal bounding rectangle for the contour
                if contours:
                    x, y, w, h = cv2.boundingRect(contours[0])
                    # Adjust the bounding box coordinates to the original image coordinates
                    x += start_col
                    x_max = x + w
                    bounding_boxes.append((x, y, x_max, y + h))
                else:
                    bounding_boxes.append((None,None,None,None))

        # Save images for each bounding box
        for i, (x1, y1, x2, y2) in enumerate(bounding_boxes):
            if x1 is not None:
                component = prediction[y1:y2, x1:x2]
                component_filename = os.path.join(plant_folder, filename)
                component_filename = component_filename + f'_plant_{i + 1}.png'
                cv2.imwrite(component_filename, component)
            else:
                component = empty_image
                component_filename = os.path.join(plant_folder, filename)
                component_filename = component_filename + f'_plant_{i + 1}.png'
                cv2.imwrite(component_filename, component)

In [38]:
# Loop through each file in the specified folder.
for filename in os.listdir(plant_folder):
    # Get the path to the image by combining the filename with the folder.
    img_path = os.path.join(plant_folder,filename)
    
    # Read the current image from the folder in grayscale mode.
    image = cv2.imread(img_path, 0)

    # Initialize lists to store coordinates of root tips and junctions.
    root_tips = []
    junctions = []

    # Perform skeletonization on the image to reduce all objects to lines without changing the structure.
    skele = skeletonize(image)

    # Summarize the skeleton data to identify features like branches and junctions.
    skele_data = summarize(Skeleton(skele))

    # Extract and process root tips (branch-type 1) from the skeleton data.
    tips_df = skele_data[skele_data['branch-type'] == 1]
    for index, row in tips_df.iterrows():
        # Extract and append coordinates of the source and destination points of root tips.
        y, x = row['image-coord-src-0'], row['image-coord-src-1']
        root_tips.append((x.astype(int), y.astype(int)))
        y, x = row['image-coord-dst-0'], row['image-coord-dst-1']
        root_tips.append((x.astype(int), y.astype(int)))
    
    # Extract and process junctions (branch-type 2) from the skeleton data.
    junc_df = skele_data[skele_data['branch-type'] == 2]
    for index, row in junc_df.iterrows():
        # Extract and append coordinates of the source and destination points of junctions.
        y, x = row['image-coord-src-0'], row['image-coord-src-1']
        junctions.append((x.astype(int), y.astype(int)))
        y, x = row['image-coord-dst-0'], row['image-coord-dst-1']
        junctions.append((x.astype(int), y.astype(int)))
    if len(root_tips) != 0:
        print(filename, 'Root End Coordinates:', root_tips[-1])
    else:
        print(filename, 'Root End Coordinates: No Plant in Image', )


test_image_10_plant_1.png Root End Coordinates: (16, 566)
test_image_10_plant_2.png Root End Coordinates: (23, 657)
test_image_10_plant_3.png Root End Coordinates: (164, 290)
test_image_10_plant_4.png Root End Coordinates: (46, 307)
test_image_10_plant_5.png Root End Coordinates: (2, 675)
test_image_11_plant_1.png Root End Coordinates: (18, 495)
test_image_11_plant_2.png Root End Coordinates: (165, 1046)
test_image_11_plant_3.png Root End Coordinates: (178, 1172)
test_image_11_plant_4.png Root End Coordinates: (92, 1142)
test_image_1_plant_1.png Root End Coordinates: (44, 562)
test_image_1_plant_2.png Root End Coordinates: (8, 522)
test_image_1_plant_3.png Root End Coordinates: (1, 620)
test_image_1_plant_4.png Root End Coordinates: (17, 424)
test_image_1_plant_5.png Root End Coordinates: (52, 643)
test_image_2_plant_1.png Root End Coordinates: (47, 808)
test_image_2_plant_2.png Root End Coordinates: (110, 1139)
test_image_2_plant_3.png Root End Coordinates: (30, 1194)
test_image_2_pla

In [41]:
lateral_lengths = []
images = []
# Loop through each file in the specified folder.
for filename in os.listdir(plant_folder):
    # Combine the folder path and filename to create the full path to the image.
    img_path = os.path.join(plant_folder, filename)
    # Read the current image in grayscale mode (0 signifies grayscale).
    img = cv2.imread(img_path, 0)

    # Initialize variables to keep track of the lateral length and the length of the longest branch.
    lateral_len = 0
    longest_branch = 0

    # Process the image if it is not empty (non-zero pixels).
    if np.count_nonzero(img) != 0:
        # Convert the image to a skeleton representation (thinning to single-pixel width).
        skele = skeletonize(img)

        # Extract graph nodes and branches from the skeleton, focusing on branch type 1.
        skele_data = summarize(Skeleton(skele))
        skele_data = skele_data[skele_data['branch-type'] == 1]

        # Iterate through the rows of skeletal data to calculate branch lengths.
        for index, row in skele_data.iterrows():
            current_branch = row['branch-distance']

            # Update the longest branch if the current branch is longer.
            if current_branch >= longest_branch:
                longest_branch = current_branch

            # Accumulate the length of all branches.
            lateral_len += current_branch

        # Subtract the length of the longest branch to get the lateral length.
        lateral_len -= longest_branch

        lateral_lengths.append(lateral_len)
    else:
        lateral_lengths.append(0)
    filename = filename.replace('.png', '')
    images.append(filename)
lateral_df = pd.DataFrame(np.column_stack([images, lateral_lengths]), columns=['Plant ID', 'Length (px)'])
lateral_df.to_csv('output/lateral_root_length.csv', index=False)

In [42]:
def predict_kaggle(folder):
    """
    Predicts the lengths of plant roots based on skeletonized images in a specified folder.

    Args:
    folder (str): Path to the folder containing root images.

    Returns:
    pandas.DataFrame: A DataFrame containing 'Plant ID' and 'Length (px)' for each image.
    """

    # Initialize lists to store root lengths and image names.
    root_len = []
    images = []

    # Iterate over each file in the specified folder.
    for filename in os.listdir(folder):
        img_path = folder + filename
        img = cv2.imread(img_path, 0)

        # Process the image only if it's not empty.
        if np.count_nonzero(img) != 0:
            # Perform skeletonization on the image.
            skele = skeletonize(img)

            # Find the first and last pixel coordinates of the skeleton.
            first_pixel_coordinates, last_pixel_coordinates = None, None
            for y in range(skele.shape[0]):
                for x in range(skele.shape[1]):
                    if np.array_equal(skele[y, x], True):
                        if first_pixel_coordinates is None:
                            first_pixel_coordinates = (y, x)
                        last_pixel_coordinates = (y, x)


            # Summarize the skeleton data to extract graph nodes and branches.
            skele_data = summarize(Skeleton(skele))
            # Create a graph from the skeleton data.
            G = nx.from_pandas_edgelist(skele_data, source='node-id-src', target='node-id-dst', edge_attr='branch-distance')

            # Find the largest connected component in the graph.
            connected_components = list(nx.connected_components(G))
            connected_components.sort(key=len, reverse=True)
            largest_component = connected_components[0]

            # Calculate the path length of the largest component.
            first_node_largest_component = min(largest_component)
            last_node_largest_component = max(largest_component)
            path_len = nx.dijkstra_path_length(G, first_node_largest_component, last_node_largest_component, weight='branch-distance')

            # Append the calculated length to the list.
            root_len.append(path_len)

        else:
            # If the image is empty, append 0 as the length
            root_len.append(0)

        # Remove file extension and append the filename to the list.
        filename = filename.replace('.png', '')
        images.append(filename)

    # Create a DataFrame with the image names and their corresponding root lengths.
    submission = pd.DataFrame(np.column_stack([images, root_len]), columns=['Plant ID', 'Length (px)'])
    return submission

# Call the function and store the result in 'submission'.
submission = predict_kaggle(plant_folder + '/')

# Save the submission data to a CSV file.
submission_path = output_folder + '/root_lengths.csv'
submission.to_csv(submission_path, index=False)