### Read in modules

In [1]:
# for reading in lists of files in folders 
from os import listdir 
from os.path import isfile, join

# for image analysis
# it gave me error first, but worked after
# !pip3 install opencv-python
import cv2

# for routine analysis
import numpy as np
import matplotlib.pyplot as plt

### Define functions

In [2]:
# a function to select the largest connected component

def undesired_objects (image):
    image = image.astype('uint8')
    nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(image, connectivity=4)
    sizes = stats[:, -1]

    max_label = 1
    max_size = sizes[1]
    for i in range(2, nb_components):
        if sizes[i] > max_size:
            max_label = i
            max_size = sizes[i]

    img2 = np.zeros(output.shape)
    img2[output == max_label] = 255
    
    return img2

# a function to calculate the area of a polygon

def PolyArea(x,y):
    return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))
    

### Set variables

In [3]:
## SET HSV THRESHOLDS
## HSV = HUE, SATURATION, VALUE 
low_thresholds = (20, 25, 25)
high_thresholds = (70, 255, 255)

## SET ITERATIONS OF DILATION AND EROSION
iters = 5

## FOLDER TO READ IN IMAGES - change the path to the folder with scaned images
images_path = "./01_images/" 

## FOLDER TO SAVE PLOTS - change the path to the folder where you want to save the new outlines
plot_path = "./02_plots/"

### Retrieve image names and filenames

In [4]:
## Name the files as:
## individualCode_totalNodeNumber_nodeAndLetter_numberOfleafLobes
## for the count of totalNodeNumber, we include the nodes 1 and 2, even if they couldn’t be sampled, but we don't include the
## final nodes close to the inflorescence, that were too small to scan - only until the transition to inflorescence. 
## Example: AM15_4_11_8b_7.jpg

## retrieve a list of image file names
image_files = [f for f in listdir(images_path) if isfile(join(images_path, f))] 

## sort the file names alphanumerically 
image_files.sort()

## retrieve image names
image_names = []

for i in range(len(image_files)):
    
    image_names.append(image_files[i][0:-4])

### Process images

In [None]:
for f in range(len(image_files)):
    
    ## read in image
    curr_file = image_files[f]
    curr_name = image_names[f]
    
    print(curr_file, f)
    
    img = cv2.imread(images_path + curr_file)

    ## convert to hsv
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    ## create mask binary image using color thresholds
    mask = cv2.inRange(hsv, low_thresholds, high_thresholds)

    ## create a matrix of size 5 as the kernel for dilation and erosion
    kernel = np.ones((5, 5), np.uint8)

    ## apply dilation and erosion
    img_dilation = cv2.dilate(mask, kernel, iterations=iters)
    img_erosion = cv2.erode(img_dilation, kernel, iterations=iters)

    ## select the largest connected component
    leaf = undesired_objects(img_erosion)

    ## define contours of all connected components
    contour,hier = cv2.findContours(np.array(leaf, np.uint8),cv2.RETR_CCOMP,cv2.CHAIN_APPROX_SIMPLE)

    ## calculate areas of all contours

    areas = []

    for i in range(len(contour)):

        curr_contour = contour[i]
        shape = np.shape(curr_contour)
        reshaped = np.reshape(curr_contour, (shape[0],shape[2]))
        area = PolyArea(reshaped[:,0], reshaped[:,1])
        areas.append(area)

    ## find the index of the largest contour

    max_ind = areas.index(max(areas))

    ## select the largest contour

    leaf_contour = np.reshape(contour[max_ind], (np.shape(contour[max_ind])[0],np.shape(contour[max_ind])[2]))

    ## save plot

    plt.figure(figsize=(40,40))
    plt.plot(leaf_contour[:,0], leaf_contour[:,1], c="dodgerblue")
    plt.fill(leaf_contour[:,0], leaf_contour[:,1], c="lightgray")
    plt.gca().set_aspect("equal")
    plt.axis('off')
    plt.savefig(plot_path + "trace_" + curr_file, bbox_inches='tight')
    plt.close()
