# Imports

In [1]:
import json
import glob
import os
import pandas as pd
import math

import cv2
import numpy as np
from matplotlib import pyplot as plt

# Getting the file paths of json file

In [2]:
def get_files(path):
    all_files = []
    for root, dirs, files in os.walk(path):
        files = glob.glob(os.path.join(root, '*.json'))
        for f in files:
            all_files.append(os.path.abspath(f))
    return all_files

# Resize the Image

In [3]:
def resizeImage(img):
    CONST_HEIGHT = 1000
    return cv2.resize(img, (int(img.shape[1]/4), int(img.shape[0]/4)))

# Convert the Image to Grayscale
The blue channel is used instead of getting the average intensities of each channel because of the fact that difference in intensities between the colonies and the dish itself is more apparent in this channel. 

In [4]:
def getBlueChannel(img):
    (B, G, R) = cv2.split(img)
    
    if (DEBUGGING):
        plt.subplots(figsize = (10, 10))
        plt.title("Grayscale Image (Blue Channel)")
        plt.imshow(B, cmap = plt.cm.gray)
        plt.show()
    
    return B

# Automatic Petri Dish Bounds Detection

In [5]:
def detectPetriDish(file_name):
    
    f = open("bounds.json")
    
    data = json.load(f)
    
    bounds = data['bounds']
    
    for d in bounds:
        if (d['file_name'] == os.path.splitext(os.path.basename(file_name))[0]):
            return int(d['h']), int(d['k']), int(d['r'])
    
    return -1, -1, -1

# Customized Histogram Equalization within Petri Dish Bounds

This special type of HE builds the cumulative histogram using only the pixels within the bounds of the petri dish found through the Circular Hough Transform.  

In [6]:
def histogramEqualization(img, h, k, r):
    
    img_equalized = img
    
    # Create a histogram using only the pixels within the petri dish
    hist_list = [0] * 256
    for i in range(img_equalized.shape[0]):
        for j in range(img_equalized.shape[1]):
            if ((i - h)**2 + (j - k)**2 < r**2):
                hist_list[img_equalized[i][j]] += 1;

    hist = np.array(hist_list)
    cdf = hist.cumsum()
    cdf_normalized = cdf * float(hist.max()) / cdf.max()
    
    # Show the CDF and histogram of the image
    if (DEBUGGING):
        plt.plot(cdf_normalized, color = 'b')
        plt.hist(img_equalized.flatten(),256,[0,256], color = 'r')
        plt.xlim([0,256])
        plt.legend(('Cumulative Distribution Function','Histogram'), loc = 'upper left')
        plt.show()
    
    cdf_m = np.ma.masked_equal(cdf,0)
    cdf_m = (cdf_m - cdf_m.min())*255/(cdf_m.max()-cdf_m.min())
    cdf = np.ma.filled(cdf_m,0).astype('uint8')

    img_equalized = cdf[img_equalized]
    
    # Show the equalized image'
    if (DEBUGGING):
        plt.subplots(figsize = (10, 10))
        plt.title("Histogram Equalization")
        plt.imshow(img_equalized, cmap = plt.cm.gray)
        plt.show()
    
    return img_equalized

# Non-Local Means Denoising

In [7]:
def denoise(img):
    # TODO: Change h-value?
    img_denoised = cv2.fastNlMeansDenoising(img, None, h = 31)
    
    if (DEBUGGING):
        plt.subplots(figsize = (10, 10))
        plt.title("Non-Local Means Denoising")
        plt.imshow(img_denoised, cmap = plt.cm.gray)
        plt.show()
    
    return img_denoised

# Circular Hough Transform

In [8]:
def detectCircles(img_bin, img_gray, h, k, r, file_name, sensitivity, nhood, upperthresh, accumulator, min_radius, max_radius):
    
    from skimage.color import gray2rgb
    img_color = gray2rgb(img_bin)
    
    # Find circles in the image with Hough Circle Transform
    circles = cv2.HoughCircles(img_bin, cv2.HOUGH_GRADIENT, sensitivity+1, nhood+1, param1=upperthresh+1,
                                param2=accumulator+1, minRadius=min_radius+1, maxRadius=max_radius+1)
    
    try:
        return countCircles(img_color, file_name, circles[0,:], h, k, r)
    except:
        return countCircles(img_color, file_name, [], h, k, r)
            
def countCircles(img, file_name, circles, h, k, r):
    
    f = open(f"Sample Set/{os.path.splitext(os.path.basename(file_name))[0]}.json")
    data = json.load(f)
    
    img_show = img.copy()
    
    # Select what to detect
    DETECT = "OPAQUE"
    
    actual_count = 0
    counted = 0
    tp = 0
    fp = 0
    fn = 0
    
    size_threshold = 60
    
    # Count actual colonies, whether OPAQUE or TRANSLUCENT depending on setting
    for colony in data['labels']:
        if colony['height'] < size_threshold and DETECT == "OPAQUE":
            actual_count +=1 
        elif colony['height'] >= size_threshold and DETECT == "TRANSLUCENT":
            actual_count +=1 
    
    # Get the coordinates and size of each colony from the dataset and compare with the colonies found by this algorithm
    print("") if False else None
    
    has_match = []
    for circle in circles:
        
        # Do not count this circle if it is outside of the petri dish
        if ((circle[1] - h)**2 + (circle[0] - k)**2 < r**2):
            counted += 1
        else:
            # print("SKIPPED")
            continue
        
        # print("ADD FOUND")
        for colony in data['labels']:
            # OPAQUE < 60, TRANSPARENT > 60
            if colony['height'] >= size_threshold and DETECT == "OPAQUE":
                continue
            elif colony['height'] < size_threshold and DETECT == "TRANSLUCENT":
                continue
                
            colony_id = colony['id']
            radius = int(colony['height']/2)
            x = (colony['x'] + radius)
            y = colony['y'] + radius
            
            # Remap coordinates based on resizing factor
            radius /= 4
            x /= 4
            y /= 4
            
            # If the center of the detected colony is within max % of the radius of the actual colony, 
            # and the radius of the colony is within min % error of the actual radius, 
            # and there is no match for that colony yet, that is a TRUE POSITIVE
            
            MAX_RADIUS_DIST = radius*0.5
            MIN_RADIUS_ERROR = 0.66
            
            cv2.circle(img_show, (int(circle[0]), int(circle[1])), int(circle[2]), (0, 0, 255), 2)
            if ((circle[0] - x)**2 + (circle[1] - y)**2 < MAX_RADIUS_DIST**2):
                fr1 = "{:.2f}".format(radius)
                fr2 = "{:.2f}".format(circle[2])
                err = "{:.2f}".format(radius - circle[2]/circle[2])
                print (f" DIST MATCH, R1({fr1}) R2({fr2}) ERR({err})", end = "")  if False else None
                # if (abs((radius - circle[2])/circle[2]) < MIN_RADIUS_ERROR):
                if (True):
                    print (" SIZE MATCH", end = "") if False else None
                    if colony_id not in has_match:
                        has_match.append(colony_id)
                        print(f" ({x}, {y}, {radius}) MATCH", end = "")  if False else None
                        cv2.circle(img_show, (int(circle[0]), int(circle[1])), int(circle[2]), (0, 255, 0), 2)
                        break
                   # else:
                         # print(f"{x}({circle[0]}), {y}({circle[1]}), {radius}({circle[2]}) DUPLICATE")
    
    # plt.subplots(figsize = (50, 50))
    # plt.title("Image")
    # plt.imshow(img_show)
    # plt.show()
    
    tp = len(has_match)
    
    # FALSE POSITIVE = COUNTED - TRUE POSITIVE
    # FALSE NEGATIVE = ACTUAL - COUNTED
    fp = counted - tp
    
    if actual_count < counted:
        fn = 0
    else:
        fn = actual_count - counted
    
    if (True):
        try:
            precision = tp/(tp+fp)
            prec = "{:.2%}".format(precision)
        except:
            prec = "0.00%"
            
        try:  
            recall = tp/(tp+fn)
            rec = "{:.2%}".format(recall)
        except:
            recall = "0.00%"
        
        try:
            fscore = (2*precision*recall)/(precision+recall)
            f1 = "{:.2%}".format(fscore)
        except:
            f1 = "0.00%"
        
        # ID, F, P, R, ACTUAL, COUNTED, TP, FP, FN
        print(f"{file_name}, {str(f1)}, {str(prec)}, {str(rec)}, {actual_count}, {counted}, {tp}, {fp}, {fn}")
        cv2.imwrite(f"OPTIMIZED PARAMETER RESULTS/CHT NEW {DETECT}/" + file_name, img_show)
    
    
    return actual_count, counted, tp, fp, fn


# Complete Detection Function

In [9]:
def countColonies(path, file_name, sensitivity, nhood, upperthresh, accumulator, min_radius, max_radius):
    img_orig = cv2.imread(path)
    img_resized = resizeImage(img_orig)
    img_gray = getBlueChannel(img_resized)
    h, k, r = detectPetriDish(file_name) 
    
    if not(h == -1 and k == -1 and r == -1):
        img_equalized = histogramEqualization(img_gray, h, k, r)
        img_denoised = denoise(img_equalized)

        # TODO: Sharpen image?
        return detectCircles(img_denoised, img_gray, h, k, r, file_name, sensitivity, nhood, upperthresh, accumulator, min_radius, max_radius)

# Main Program

In [10]:
folder = 'Sample Set'

dir = os.path.dirname("__file__")
json_file = get_files(os.path.join(dir, folder))

In [11]:
cleaned_data = []
for i in json_file:
    with open(i, "r") as f:
        data = json.load(f)
        if (data['background'] == 'vague'):
            # print('Background: ' + data['background'])
            # print('Filename: ' + json_file)
            cleaned_data.append(i)

In [12]:
DEBUGGING = False

SENSITIVITY = 0
MIN_DIST_LIST = [5]
UPPERTHRESH = 100
ACCUMULATOR_LIST = [7]
MIN_RADIUS_LIST = [3]
MAX_RADIUS_LIST = [3]

# Iterate through all possible combinations of parameters

import itertools
for params in itertools.product(MIN_DIST_LIST, ACCUMULATOR_LIST, MIN_RADIUS_LIST, MAX_RADIUS_LIST):
                                
    # For each set of parameters, detect colonies in all images
    min_dist, accumulator, min_radius, max_radius = params
    
    if max_radius < min_radius:
        continue
    
    total_actual_count = 0
    total_counted = 0
    total_tp = 0
    total_fp = 0
    total_fn = 0
    
    total_precision = 0
    total_recall = 0
    total_f1 = 0
    
    for file in cleaned_data:
        file_name = os.path.splitext(os.path.basename(file))[0] + ".jpg"
        actual_count, counted, tp, fp, fn = countColonies(folder + "/" + file_name, file_name, SENSITIVITY, min_dist, UPPERTHRESH, accumulator, min_radius, max_radius)
        
        try:
            total_precision += tp/(tp+fp)
        except:
            total_precision += 0
        
        try:
            total_recall += tp/(tp+fn)
        except:
            total_recall += 0
            
        try:
            total_f1 += (2*(tp/(tp+fp))*(tp/(tp+fn)))/((tp/(tp+fp))+(tp/(tp+fn)))
        except:
            total_f1 += 0
        
        total_actual_count += actual_count
        total_counted += counted
        total_tp += tp
        total_fp += fp
        total_fn += fn
    
    try:
        # prec_int = total_tp/(total_tp+total_fp)
        prec_int = total_precision/30
        precision = "{:.2%}".format(prec_int)
    except:
        precision = "0.00%"
    
    try:
        # rec_int = total_tp/(total_tp+total_fn)
        rec_int = total_recall/30
        recall = "{:.2%}".format(rec_int)
    except:
        recall = "0.00%"
    
    try:
        # f1 = "{:.2%}".format((2*prec_int*rec_int)/(prec_int+rec_int))
        f1 = "{:.2%}".format(total_f1/30)
    except:
        f1 = "0.00%"
        
    # print(f"P: {precision} R: {recall} F: {f1}, ACTUAL: {str(total_actual_count)}, COUNT: {str(total_counted)}, TP: {str(total_tp)} FP: {str(total_fp)} FN: {str(total_fn)}, MN_DIST: {min_dist} ACCUM: {accumulator} MN_RAD: {min_radius} MX_RAD: {max_radius}") 
    print(f"{precision}, {recall}, {f1}, {str(total_actual_count)}, {str(total_counted)}, {str(total_tp)}, {str(total_fp)}, {str(total_fn)}, {min_dist}, {accumulator}, {min_radius}, {max_radius}")

12442.jpg, 76.19%, 68.57%, 85.71%, 39, 35, 24, 11, 4
12444.jpg, 17.14%, 13.04%, 25.00%, 32, 23, 3, 20, 9
12452.jpg, 66.67%, 50.00%, 100.00%, 17, 18, 9, 9, 0
12454.jpg, 58.82%, 41.67%, 100.00%, 10, 12, 5, 7, 0
12455.jpg, 64.86%, 48.00%, 100.00%, 23, 25, 12, 13, 0
12456.jpg, 29.63%, 17.39%, 100.00%, 6, 23, 4, 19, 0
12457.jpg, 73.91%, 65.38%, 85.00%, 29, 26, 17, 9, 3
12460.jpg, 55.17%, 38.10%, 100.00%, 12, 21, 8, 13, 0
12461.jpg, 62.50%, 45.45%, 100.00%, 10, 11, 5, 6, 0
12463.jpg, 68.57%, 52.17%, 100.00%, 21, 23, 12, 11, 0
12465.jpg, 39.02%, 38.10%, 40.00%, 33, 21, 8, 13, 12
12466.jpg, 71.43%, 55.56%, 100.00%, 27, 27, 15, 12, 0
12470.jpg, 62.50%, 47.62%, 90.91%, 22, 21, 10, 11, 1
12471.jpg, 6.67%, 6.25%, 7.14%, 29, 16, 1, 15, 13
12475.jpg, 54.12%, 71.88%, 43.40%, 62, 32, 23, 9, 30
12476.jpg, 80.00%, 66.67%, 100.00%, 24, 27, 18, 9, 0
12478.jpg, 53.33%, 36.36%, 100.00%, 17, 22, 8, 14, 0
12479.jpg, 75.00%, 79.41%, 71.05%, 45, 34, 27, 7, 11
12480.jpg, 33.33%, 28.57%, 40.00%, 20, 14, 4, 10, 6
