# Leaf Classification - Daniel Christodoulou 

In [2]:
# Importing libraries
import numpy as np
import os, os.path
from os import listdir
from skimage import io
from scipy.ndimage import binary_opening,binary_closing
from skimage.filters import threshold_mean
import cv2 as cv
import random as r

In [3]:
# Function to return a rectangle of an image that fully contains the leaf, this is used to create a template
def cropObject(im):
    n,m = im.shape[::-1]
    # Convert image to binary 
    im_2 = cv.adaptiveThreshold(im,1,cv.ADAPTIVE_THRESH_GAUSSIAN_C,cv.THRESH_BINARY,41,10) - 1
    # A square 4x4 structuring element
    SE = np.ones((4,4))
    # Remove salt noise by performing an opening followed by a closing with the structuring element
    im_2 = binary_opening(im_2,SE)
    im_2 = binary_closing(im_2,SE)
    # Locate bounds of object (leaf)
    for i in range(m):
        for j in range(n):
            if im_2[i,j] == 1:
                ymax = i
                break

    for i in range(n):
        for j in range(m):
            if im_2[j,i] == 1:
                xmax = i
                break

    for i in range(m-1,0,-1):
        for j in range(n-1,0,-1):
            if im_2[i,j] == 1:
                ymin = i
                break

    for i in range(n-1,0,-1):
        for j in range(m-1,0,-1):
            if im_2[j,i] == 1:
                xmin = i
                break
    
    a = (xmax - xmin)
    b = (ymax - ymin)
    # Output is the smaller template image
    output = im[ymin:ymin + b,xmin:xmin + a]
    return output
    

In [38]:
# Create Template for each image in the database
def createTemplates(path):
    database = [f for f in listdir(path)]
    for i in database:
        # Some hidden files start with '.' so we ignore those
        if not i.startswith('.'): 
            leaves = [p for p in listdir(path+"/"+i)]
            for j in leaves:
                if not j.startswith('.'): 
                    image = cv.imread(path+"/"+i+"/"+j,0)
                    template = cropObject(image)
                    # Change path to what is relevant 
                    save_folder = "/Users/danielchristodoulou/Documents/GitHub/Leaf-Classification-Template-Matching/leaf/Templates_Gray"
                    cv.imwrite(os.path.join(save_folder+"/"+i, 'template_'+j), template)

createTemplates("leaf/RGB")

In [44]:
# Gets the probability of white pixels in an image
def aveIntensity(img):
    m,n = img.shape
    sum = 0
    for i in range(m):
        for j in range(n):
            sum += img[i,j]/255
    return sum/(m*n)

In [45]:
# Functon that uses template matching to classify images of leaves with a level of confidence
def classifyLeaf(img):
    img2 = img.copy()
    database = [f for f in listdir("leaf/Templates_Gray")]
    storage = {}
    # Begins the loop to enter the database
    for i in database:
        # Some hidden files start with '.' so we ignore those
        if not i.startswith('.'):   
            counter = 0
            Templates = [p for p in listdir("leaf/Templates_Gray/"+i)]
            # Begins the loop to enter each template in the databse
            for j in Templates:
                if not j.startswith('.'): 
                    template = cv.imread("leaf/Templates_Gray/"+i+"/"+j,0)
                    w,h = template.shape[::-1]
                    img = img2.copy()
                    # Template matching method
                    method = eval("cv.TM_CCOEFF")
                    # Applying template matching
                    res = cv.matchTemplate(img,template,method)
                    # Applying minMaxLoc which returns the coordinates of the rectangle with the greatest match to the template
                    min_val, max_val, min_loc, max_loc = cv.minMaxLoc(res)
                    # Splice out rectangle
                    crop_img = img[max_loc[1]:max_loc[1] + h,max_loc[0]:max_loc[0] + w]
                    # Convert to binary
                    temp_bi = cv.threshold(template,threshold_mean(template),1,cv.THRESH_BINARY)[1]
                    test_bi = cv.threshold(crop_img,threshold_mean(crop_img),1,cv.THRESH_BINARY)[1]
                    # Take the difference between the binary versions of the template and the rectangle with the greatest match to calculate the coffidence interval
                    diff = test_bi - temp_bi
                    confidence = 1 - aveIntensity(diff)
                    if confidence > 0.99:
                        genus = i.split(" ")[1]
                        species = i.split(" ")[2]
                        storage[genus+" "+species] = confidence
    if bool(storage):
        # Returns the species with the greatest confidence level to the template
        output = max(storage)
    else:
        output = "Could not classify the leaf in this image"
    return output

In [49]:
# Function to test accuracy of implementation
def testing(path,n):
    correctCounter = 0
    for t in range(n):
        # Chooses a random directory
        species = r.choice(os.listdir(path))
        if not species.startswith("."):
            # Chooses a radnom image within the directory
            im_name = r.choice(os.listdir(path+"/"+species))
            # Some hidden files start with '.' so we ignore those
            if not im_name.startswith("."): 
                im = cv.imread("leaf/RGB/"+species+"/"+im_name,0)
                test = classifyLeaf(im)
                spec = species.split(" ")
                ref = spec[1]+" "+spec[2]
                # Tests the result of the classifyLeaf
                if test == ref:
                    correctCounter += 1
    return correctCounter/n
# Change path to what is relevant
path = "/Users/danielchristodoulou/Documents/GitHub/Leaf-Classification-Template-Matching/leaf/RGB"
# This percentage of accuracy varies significantly
print("The percentage accuracy of testing 50 images from database:"+str(testing(path,50)))

The percentage accuracy of testing 50 images from database:0.72
