In [None]:
import numpy as np
import cv2
from PIL import Image, ImageDraw

# 
# 
def getScaleBar(path, isShowPlot=False, isNegative = False):
    """
    Detects the scale bar by thresholding the image, and looking at the longest horizontal object

    inputs:
    path: path of the image file
    isShowPlot: boolean to show the plots or not
    isNegative: takes the negative of the image for white scale bars

    outputs:
    scalebar_stats: vector that contains the stats of the detected scalebar [x top left corner, y top left corner, width height]
    isTrueScaleBar: returns a boolean that is True when the detected object aspect ratio is larger than 5 (width is more than 5 times the height)  
    """
    Im_grayscale = Image.open(path).convert('L')
    Im_grayscale = np.array(Im_grayscale)
    t = 7

    if isNegative:
        Im_grayscale = 255-Im_grayscale
        Im_grayscae = ((Im_grayscale/255)**0.25)*255
        t = 40
    if isShowPlot:
        Im_to_show = Image.fromarray(Im_grayscale)
        Im_to_show.show() 

    #threshold
    
    Im_threshold =  Im_grayscale
    Im_threshold[Im_grayscale<t] = 0
    Im_threshold[Im_grayscale>=t] = 255

    
    if isShowPlot:
        Im_to_show = Image.fromarray(Im_threshold)
        Im_to_show.show() 

    img_inv = cv2.bitwise_not( Im_threshold )

    ret, thresh = cv2.threshold(img_inv,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    connectivity = 4  
    # Perform the operation
    output = cv2.connectedComponentsWithStats(thresh, connectivity, cv2.CV_32S)

    num_labels = output[0]
    # The second cell is the label matrix
    labels = output[1]
    # The third cell is the stat matrix
    stats = output[2]
    centroids = output[3]

    target_stat = stats[:,2]
    target_stat_sorted = np.sort(target_stat)

    print(stats[:,4])
    print(stats[:,2])
    if len(target_stat_sorted) <2:
        stats_bar = [0,0,10,10]
        isTrueScaleBar = False
    else:
        ind_bar = np.where(target_stat == target_stat_sorted[-2])
        ind_bar = ind_bar[0]

        stats_bar = stats[ind_bar,:][0]

        x = stats_bar[0]
        y = stats_bar[1]
        w = stats_bar[2]
        h = stats_bar[3]

        shape = [(x,y), (x+w,y+h)] 
        labels
        if w > 10*h:
            isTrueScaleBar = True
        else:
            isTrueScaleBar = False

    if isShowPlot:
        Im_color = Image.open(path)
        img1 = ImageDraw.Draw(Im_color)   
        if isTrueScaleBar:
            img1.rectangle(shape, fill ="green", outline ="green") 
        else:
            img1.rectangle(shape, fill ="red", outline ="red") 
        Im_color.show() 

    return stats_bar, isTrueScaleBar
    
path_images = "/Users/vivien.gaillet/Library/CloudStorage/OneDrive-NordAngliaEducation/CS_433_Machine_Learning/original/"
image_filename = "jdm32-001087-2_p_1.jpg" #white scale bar example 1
#image_filename = "jdm32-001249-2_p_1.jpg" #white scale bar example 1

#image_filename = "inbiocri002281627_p_1.jpg" #black scale bar example (with legend text)
#image_filename = "jdm32-001458-1_p_1.jpg" #white scale bar example 3: failure false positive
#image_filename = "anic32-046417_p_1.jpg" #black scale bar example with large ends

path = path_images+ image_filename


scalebar_stats, isTrueScaleBar = getScaleBar(path, True)
"""
When the script fails to detect a scale bar, which is only the case with white scalebars,
the script is re-run with the third arguments as True, which takes the negative of the image

"""
if isTrueScaleBar == False:
    scalebar_stats, isTrueScaleBar = getScaleBar(path, True, True)

[971813      2      2      3      8   1741      1      1      2     12
     12      3      1      8      4     24      2      1]
[1205    1    1    2    3   75    1    1    2    5    4    3    1    3
    2    8    1    1]
[936325      8      1      2      1      5      3      8      2      1
      2     12     22      1      5      1      2      6      1      1
      3      2      2      1      2      1      1      3      6      4
      1      1      1      1      3      2      4      3     23      1
     65  10705      9      3     11      2   9407      1      1     33
      4      2      2      3     56      1      6     13    207      1
      3      2      8     36      1      3      2      2      5   1023
      1      5    510      2      8      1      2      4    271      1
      3   3572      1      1      2      5   2210      1      3   4217
     14      1      7      2     16      1      1      7      2      1
      1     75      1      3      3      2      1      5      2     