# Data Extraction

## Parsing Y- axis Ticks and Y- vals 
This code parses the x and y axes (horizontal and vertical axes) detection for the y-ticks, calculates the ratio to normalize the figure contents, specifically the bar plots with respect to the y-ticks and finally connects the evaluated y-values with their correspinding legends using clustering algorithm. 
* Note that all the figures recorded have their respective dois as a unique key of identification.

In [1]:
import cv2, imutils, re
import xlsxwriter, json, os
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from matplotlib import rcParams

In [2]:
# Directory of images to run the code on
img_dir = '../AllData/processed_images/Part2'

# Directory to save the output images
save_dir = 'out'

### Using json to parse the captions and dois for each image

In [3]:
with open('../data/urldata.json') as handler:
    urldata = json.load(handler)

In [4]:
with open('../data/doi_data.json') as doihandler:
    doidata = json.load(doihandler)

In [5]:
with open('../data/ocr-image-text.json') as ocrhandler:
    images_text = json.load(ocrhandler)

### Function to detect x-axis and y-axis for a plot

In [6]:
def findMaxConsecutiveOnes(nums) -> int:
    count = maxCount = 0
    
    for i in range(len(nums)):
        if nums[i] == 1:
            count += 1
        else:
            maxCount = max(count, maxCount)
            count = 0
                
    return max(count, maxCount)

In [7]:
def detectAxes(filepath, threshold=None, debug=False):
    if filepath is None:
        return None, None
    
    if threshold is None:
        threshold = 10
    
    image = cv2.imread(filepath)
    height, width, channels = image.shape
    
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    maxConsecutiveOnes = [findMaxConsecutiveOnes(gray[:, idx] < 200) for idx in range(width)]
    start_idx, maxindex, maxcount = 0, 0, max(maxConsecutiveOnes)
    while start_idx < width:
        if abs(maxConsecutiveOnes[start_idx] - maxcount) <= threshold:
            maxindex = start_idx
            break
            
        start_idx += 1
           
    yaxis = (maxindex, 0, maxindex, height)
    
    if debug:
        fig, ax = plt.subplots(1, 2)

        ax[0].imshow(image)

        ax[1].plot(maxConsecutiveOnes, color = 'k')
        ax[1].axhline(y = max(maxConsecutiveOnes) - 10, color = 'r', linestyle = 'dashed')
        ax[1].axhline(y = max(maxConsecutiveOnes) + 10, color = 'r', linestyle = 'dashed')
        ax[1].vlines(x = maxindex, ymin = 0.0, ymax = maxConsecutiveOnes[maxindex], color = 'b', linewidth = 4)

        plt.show()

    maxConsecutiveOnes = [findMaxConsecutiveOnes(gray[idx, :] < 200) for idx in range(height)]
    start_idx, maxindex, maxcount = 0, 0, max(maxConsecutiveOnes)
    while start_idx < height:
        if abs(maxConsecutiveOnes[start_idx] - maxcount) <= threshold:
            maxindex = start_idx
            
        start_idx += 1
            
    cv2.line(image, (0, maxindex), (width, maxindex),  (255, 0, 0), 2)
    xaxis = (0, maxindex, width, maxindex)
    
    if debug:
        rcParams['figure.figsize'] = 15, 8

        fig, ax = plt.subplots(1, 1)
        ax.imshow(image, aspect = 'auto')
        
    return xaxis, yaxis

In [8]:
def getProbableLabels(image, image_text, xaxis, yaxis):
    y_labels = []
    x_labels = []
    legends = []
    y_text_list = []
    
    height, width, channels = image.shape
    
    (x1, y1, x2, y2) = xaxis
    (x11, y11, x22, y22) = yaxis
    
    for text, (textx, texty, w, h) in image_text:
        text = text.strip()
            
        # To the left of y-axis and top of x-axis
        if (np.sign((x2 - x1) * (texty - y1) - (y2 - y1) * (textx - x1)) == -1 and
            np.sign((x22 - x11) * (texty - y11) - (y22 - y11) * (textx - x11)) == 1):
            
            numbers = re.findall(r'^[+-]?\d+(?:\.\d+)?$', text)
            if bool(numbers):
                y_labels.append((text, (textx, texty, w, h)))
            else:
                y_text_list.append((text, (textx, texty, w, h)))
            
        # To the right of y-axis and bottom of x-axis
        elif (np.sign((x2 - x1) * (texty - y1) - (y2 - y1) * (textx - x1)) == 1 and
            np.sign((x22 - x11) * (texty - y11) - (y22 - y11) * (textx - x11)) == -1):
            x_labels.append((text, (textx, texty, w, h)))
            
        # Top of x-axis and to the right of y-axis
        elif (np.sign((x2 - x1) * (texty - y1) - (y2 - y1) * (textx - x1)) == -1 and
            np.sign((x22 - x11) * (texty - y11) - (y22 - y11) * (textx - x11)) == -1):
            
            # Consider non-numeric only for legends
            legends.append((text, (textx, texty, w, h)))
    
    # Get the y-labels by finding the maximum
    # intersections with the sweeping line
    maxIntersection = 0
    maxList = []
    for i in range(x11):
        count = 0
        current = []
        for index, (text, rect) in enumerate(y_labels):
            if lineIntersectsRectX(i, rect):
                count += 1
                current.append(y_labels[index])
                            
        if count > maxIntersection:
            maxIntersection = count
            maxList = current
    
    y_labels_list = maxList.copy()
    
    y_labels = []
    for text, (textx, texty, w, h) in maxList:
        y_labels.append(text)
        
    # Get the x-labels by finding the maximum
    # intersections with the sweeping line
    maxIntersection = 0
    maxList = []
    for i in range(y1, height):
        count = 0
        current = []
        for index, (text, rect) in enumerate(x_labels):
            if lineIntersectsRectY(i, rect):
                count += 1
                current.append(x_labels[index])
                            
        if count > maxIntersection:
            maxIntersection = count
            maxList = current
    
    x_labels = []
    for text, (textx, texty, w, h) in maxList:
        x_labels.append(text)
    
    # Get possible legend text
    # For this, we need to search both top to
    # bottom and also from left to right.
    
    legends_and_numbers = mergeTextBoxes(legends)
    
    legends = []
    for text, (textx, texty, w, h) in legends_and_numbers:
        if not re.search(r'^[+-]?\d+(?:\.\d+)?[%]?$', text):
            legends.append((text, (textx, texty, w, h)))
    
    maxIntersection = 0
    maxList = []
    for i in range(y1):
        count = 0
        current = []
        for index, (text, rect) in enumerate(legends):
            if lineIntersectsRectY(i, rect):
                count += 1
                current.append(legends[index])
                            
        if count > maxIntersection:
            maxIntersection = count
            maxList = current
            
    for i in range(x11, width):
        count = 0
        current = []
        for index, (text, rect) in enumerate(legends):
            if lineIntersectsRectX(i, rect):
                count += 1
                current.append(legends[index])
                            
        if count > maxIntersection:
            maxIntersection = count
            maxList = current
        
    legends = []
    for text, (textx, texty, w, h) in maxList:
        legends.append(text)
    
    return image, x_labels, y_labels, y_labels_list, y_text_list, legends, maxList

In [9]:
def lineIntersectsRectX(candx, rect):
    (x, y, w, h) = rect
    
    if x <= candx <= x + w:
        return True
    else:
        return False
    
def lineIntersectsRectY(candy, rect):
    (x, y, w, h) = rect
    
    if y <= candy <= y + h:
        return True
    else:
        return False

## Getting the Ratio for y-value matching 

Similar to the label detection logic, y-ticks are detected: 

Y-ticks 

*  Check only the numerical boxes which are to the left of y-axis and to the top of x-axis.
*  Run a line sweep from left end of the image to the y-axis position, and check when the sweeping line intersects with the maximum number of numerical boxes.
* The numerical boxes are then used as bounding boxes for calculating the y-ticks.

Ratio:
* Difference between the y-ticks is then calculated.  
* Only consider the mean difference between the y-ticks, rejecting the outliers from the calculated values.
* The final ratio is: <br> ``normalize_ratio := ticks_diff.mean() / y-ticks.mean()``.

In [10]:
def getRatio(path, xaxis, yaxis):
    list_text = []
    list_ticks = []
    
    filepath = img_dir + "/" + path.name
    
    image = cv2.imread(filepath)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    height, width, channels = image.shape

    image_text = images_text[path.name]
    
    for text, (textx, texty, w, h) in image_text:
        text = text.strip()
                    
        (x1, y1, x2, y2) = xaxis
        (x11, y11, x22, y22) = yaxis
        
        # To the left of y-axis and top of x-axis
        if (np.sign((x2 - x1) * (texty - y1) - (y2 - y1) * (textx - x1)) == -1 and
            np.sign((x22 - x11) * (texty - y11) - (y22 - y11) * (textx - x11)) == 1):
            
            # Consider numeric only for ticks on y-axis
            numbers = re.findall(r'\d+(?:\.\d+)?', text)
            if bool(numbers):
                list_text.append((numbers[0], (textx, texty, w, h)))
                          
    # Get the y-labels by finding the maximum
    # intersections with the sweeping line
    maxIntersection = 0
    maxList = []
    for i in range(x11):
        count = 0
        current = []
        for index, (text, rect) in enumerate(list_text):
            if lineIntersectsRectX(i, rect):
                count += 1
                current.append(list_text[index])
                            
        if count > maxIntersection:
            maxIntersection = count
            maxList = current
    
    # Get list of text and ticks
    list_text = []
    for text, (textx, texty, w, h) in maxList:
        list_text.append(float(text))
        list_ticks.append(float(texty + h))
        
    text_sorted = (sorted(list_text))
    ticks_sorted  = (sorted(list_ticks))
    
    ticks_diff = ([ticks_sorted[i] - ticks_sorted[i-1] for i in range(1, len(ticks_sorted))])
    text_diff = ([text_sorted[i] - text_sorted[i-1] for i in range(1, len(text_sorted))])
    print("[get text-to-tick ratio] ticks_diff: {0}, text_diff: {1}".format(ticks_diff, text_diff))
    
    # Detected text may not be perfect! Remove the outliers.
    ticks_diff = reject_outliers(np.array(ticks_diff), m=1)
    text_diff = reject_outliers(np.array(text_diff), m=1)
    print("[reject_outliers] ticks_diff: {0}, text_diff: {1}".format(ticks_diff, text_diff))
    
    normalize_ratio = np.array(text_diff).mean() / np.array(ticks_diff).mean()

    return text_sorted, normalize_ratio

In [11]:
def lineIntersectsRectX(candx, rect):
    (x, y, w, h) = rect
    
    if x <= candx <= x + w:
        return True
    else:
        return False
    
def lineIntersectsRectY(candy, rect):
    (x, y, w, h) = rect
    
    if y <= candy <= y + h:
        return True
    else:
        return False

In [12]:
def reject_outliers(data, m=1):
    return data[abs(data - np.mean(data)) <= m * np.std(data)]

## Writing to Excel workbook

In [13]:
def addToExcel(dataname, data, row):
    col = 0

    worksheet.write(row, col, dataname)
    for content in data:
        col += 1
        worksheet.write(row, col, content)

## Matching the ratio for final data extraction 

Y-val data:
* The height of each bounding box is calculated by cluster count estimation method. 
* This ratio previously calculated is then used to calculate the <br> ``y_values :=  v_value = normalize_ratio x height of bounding box``.

In [14]:
def mergeRects(contours):
    rects = []
    rectsUsed = []

    # Just initialize bounding rects and set all bools to false
    for cnt in contours:
        rects.append(cv2.boundingRect(cnt))
        rectsUsed.append(False)

    # Sort bounding rects by x coordinate
    def getXFromRect(item):
        return item[0]

    rects.sort(key = getXFromRect)

    # Array of accepted rects
    acceptedRects = []

    # Merge threshold for x coordinate distance
    xThr = 5
    yThr = 5

    # Iterate all initial bounding rects
    for supIdx, supVal in enumerate(rects):
        if (rectsUsed[supIdx] == False):

            # Initialize current rect
            currxMin = supVal[0]
            currxMax = supVal[0] + supVal[2]
            curryMin = supVal[1]
            curryMax = supVal[1] + supVal[3]

            # This bounding rect is used
            rectsUsed[supIdx] = True

            # Iterate all initial bounding rects
            # starting from the next
            for subIdx, subVal in enumerate(rects[(supIdx+1):], start = (supIdx+1)):

                # Initialize merge candidate
                candxMin = subVal[0]
                candxMax = subVal[0] + subVal[2]
                candyMin = subVal[1]
                candyMax = subVal[1] + subVal[3]

                # Check if x distance between current rect
                # and merge candidate is small enough
                if (candxMin <= currxMax + xThr):

                    if not nearbyRectangle((candxMin, candyMin, candxMax - candxMin, candyMax - candyMin),
                                           (currxMin, curryMin, currxMax - currxMin, curryMax - curryMin), yThr):
                        break

                    # Reset coordinates of current rect
                    currxMax = candxMax
                    curryMin = min(curryMin, candyMin)
                    curryMax = max(curryMax, candyMax)

                    # Merge candidate (bounding rect) is used
                    rectsUsed[subIdx] = True
                else:
                    break

            # No more merge candidates possible, accept current rect
            acceptedRects.append([currxMin, curryMin, currxMax - currxMin, curryMax - curryMin])

    #for rect in acceptedRects:
    #    img = cv2.rectangle(img, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (121, 11, 189), 2)
    
    return acceptedRects

In [15]:
def mergeTextBoxes(textboxes):
    rects = []
    rectsUsed = []
    
    # Just initialize bounding rects and set all bools to false
    for box in textboxes:
        rects.append(box)
        rectsUsed.append(False)

    # Sort bounding rects by x coordinate
    def getXFromRect(item):
        return item[1][0]
    
    def getYFromRect(item):
        return item[1][1]

    rects.sort(key = lambda x: (getYFromRect, getXFromRect))

    #print(rects)
    #print("----------------------------------")
    
    # Array of accepted rects
    acceptedRects = []

    # Merge threshold for x coordinate distance
    xThr = 10
    yThr = 0

    # Iterate all initial bounding rects
    for supIdx, supVal in enumerate(rects):
        if (rectsUsed[supIdx] == False):

            # Initialize current rect
            currxMin = supVal[1][0]
            currxMax = supVal[1][0] + supVal[1][2]
            curryMin = supVal[1][1]
            curryMax = supVal[1][1] + supVal[1][3]
            currText = supVal[0]

            # This bounding rect is used
            rectsUsed[supIdx] = True

            # Iterate all initial bounding rects
            # starting from the next
            for subIdx, subVal in enumerate(rects[(supIdx+1):], start = (supIdx+1)):

                # Initialize merge candidate
                candxMin = subVal[1][0]
                candxMax = subVal[1][0] + subVal[1][2]
                candyMin = subVal[1][1]
                candyMax = subVal[1][1] + subVal[1][3]
                candText = subVal[0]

                # Check if x distance between current rect
                # and merge candidate is small enough
                if (candxMin <= currxMax + xThr):

                    if not nearbyRectangle((candxMin, candyMin, candxMax - candxMin, candyMax - candyMin),
                                           (currxMin, curryMin, currxMax - currxMin, curryMax - curryMin), yThr):
                        break

                    # Reset coordinates of current rect
                    currxMax = candxMax
                    curryMin = min(curryMin, candyMin)
                    curryMax = max(curryMax, candyMax)
                    currText = currText + ' ' + candText
                    
                    # Merge candidate (bounding rect) is used
                    rectsUsed[subIdx] = True
                else:
                    break

            # No more merge candidates possible, accept current rect
            acceptedRects.append([currText, (currxMin, curryMin, currxMax - currxMin, curryMax - curryMin)])

    #for rect in acceptedRects:
    #    img = cv2.rectangle(img, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (121, 11, 189), 2)
    
    return acceptedRects

In [16]:
def nearbyRectangle(current, candidate, threshold):
    (currx, curry, currw, currh) = current
    (candx, candy, candw, candh) = candidate
    
    currxmin = currx
    currymin = curry
    currxmax = currx + currw
    currymax = curry + currh
    
    candxmin = candx
    candymin = candy
    candxmax = candx + candw
    candymax = candy + candh
    
    # If candidate is on top, and is close
    if candymax <= currymin and candymax + threshold >= currymin:
        return True
    
    # If candidate is on bottom and is close
    if candymin >= currymax and currymax + threshold >= candymin:
        return True
    
    # If intersecting at the top, merge it
    if candymax >= currymin and candymin <= currymin:
        return True
    
    # If intersecting at the bottom, merge it
    if currymax >= candymin and currymin <= candymin:
        return True
    
    # If intersecting on the sides or is inside, merge it
    if (candymin >= currymin and
        candymin <= currymax and
        candymax >= currymin and
        candymax <= currymax):
        return True
    
    return False

In [17]:
def euclidean(v1, v2):
    return sum((p - q) ** 2 for p, q in zip(v1, v2)) ** .5

def angle_between(p1, p2):
    
    deltaX = p1[0] - p2[0]
    deltaY = p1[1] - p2[1]

    return math.atan2(deltaY, deltaX) / math.pi * 180
    
def RectDist(rectA, rectB):
    (rectAx, rectAy, rectAw, rectAh) = rectA
    (rectBx, rectBy, rectBw, rectBh) = rectB
    
    angle = angle_between((rectAx + rectAw / 2, rectAy + rectAh / 2), (rectBx + rectBw / 2, rectBy + rectBh / 2))
    
    if (angle >= -5 and angle <= 5) or (angle <= -175 and angle >= -185) or (angle >= 175 and angle <= 185):
        if (rectBx > rectAx + rectAw):
            return euclidean([rectAx + rectAw / 2, rectAy + rectAh / 2], [rectBx + rectBw / 2, rectBy + rectBh / 2])
        else:
            return sys.maxsize
    else:
        return sys.maxsize

In [18]:
def getProbableYLabels(image, contours, xaxis, yaxis):
    y_labels = []
    
    height, width, channels = image.shape
    
    (x1, y1, x2, y2) = xaxis
    (x11, y11, x22, y22) = yaxis
    
    # Get the y-labels by finding the maximum
    # intersections with the sweeping line
    maxIntersection = 0
    maxList = []
    for i in range(x11):
        count = 0
        current = []
        for index, rect in enumerate(contours):
            if lineIntersectsRectX(i, rect):
                count += 1
                current.append(contours[index])
                            
        if count > maxIntersection:
            maxIntersection = count
            maxList = current
                    
    return image, maxList

## Saving y-values in our data excel sheet 

In [19]:
import ast, cv2, re, sys
import math
import json

images = []
texts = []

def getYVal(img_dir):
    yValueDict = {}
    for index, path in enumerate(Path(img_dir).iterdir()):
        if path.name.endswith('.png') or path.name.endswith('.jpg'):
            filepath = img_dir + "/" + path.name
            img = cv2.imread(filepath)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            xaxis, yaxis = detectAxes(filepath)
            
            for (x1, y1, x2, y2) in [xaxis]:
                xaxis = (x1, y1, x2, y2)

            for (x1, y1, x2, y2) in [yaxis]:
                yaxis = (x1, y1, x2, y2)

            image_text = images_text[path.name]
            img, x_labels, _, _, _, legends, legendBoxes = getProbableLabels(img, image_text, xaxis, yaxis)
            try:
                list_text, normalize_ratio = getRatio(path, xaxis, yaxis)
                print("[getYVal] legends: {0}".format(legends))
                print("[{0}] path: {1}, ratio: {2}".format(index, path.name, normalize_ratio), end='\n\n')

                # Color detection
                h, w, _ = np.shape(img)

                image = img.reshape((h * w, 3))

                # Remove white and near-by pixels
                threshold = 5
                for r in range(255 - threshold, 256):
                    for g in range(255 - threshold, 256):
                        for b in range(255 - threshold, 256):
                            image = image[np.where((image != [r, g, b]).any(axis = 1))]

                values, counts = np.unique(image, axis = 0, return_counts = True)

                greater_counts = np.where(counts >= 100)
                values, counts = values[greater_counts], counts[greater_counts]

                sort_indices = np.argsort(-counts)
                values, counts = values[sort_indices], counts[sort_indices]

                groups = []
                groupcounts = []

                for idx, value in enumerate(values):
                    grouped = False

                    for groupid, group in enumerate(groups):
                        for member in group:
                            r, g, b = member
                            vr, vg, vb = value

                            if (abs(vr.astype(np.int16) - r.astype(np.int16)) <= 5 and
                                abs(vg.astype(np.int16) - g.astype(np.int16)) <= 5 and
                                abs(vb.astype(np.int16) - b.astype(np.int16)) <= 5):
                                group.append(value)
                                groupcounts[groupid] += counts[idx]
                                grouped = True
                                break

                        if grouped:
                            break

                    if not grouped:
                        groups.append([value])
                        groupcounts.append(counts[idx])

                groupcounts = np.array(groupcounts)
                sort_indices = np.argsort(-groupcounts)
                new_groups = [groups[i] for i in sort_indices]
                groups = new_groups
                
                for i in range(len(groups[:4])):

                    img = cv2.imread(img_dir + "/" + path.name)
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                    mask = None
                    for value in groups[i]:
                        COLOR_MIN = np.array([value[0], value[1], value[2]], np.uint8)
                        COLOR_MAX = np.array([value[0], value[1], value[2]], np.uint8)

                        if mask is None:
                            mask = cv2.inRange(img, COLOR_MIN, COLOR_MAX)
                        else:
                            mask = mask | cv2.inRange(img, COLOR_MIN, COLOR_MAX)

                    image = cv2.bitwise_and(img, img, mask = mask)
                    image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, (3, 3))

                    edged = cv2.Canny(image, 0, 250)
                    contours, _ = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                    contours = [contour for contour in contours if cv2.contourArea(contour) > 0.]

                    # Remove noisy ones!
                    if len(contours) == 0 or len(contours) > 100:
                        continue

                    contours = [cv2.approxPolyDP(contour, 3, True) for contour in contours]

                    rects = mergeRects(contours)
                    textBoxes = []
                    legends = []
                    
                    for rectBox in rects:
                        min_distance = sys.maxsize
                        closestBox = None
                        legendtext = None

                        for text, textBox in legendBoxes:    
                            if RectDist(rectBox, textBox) < min_distance:
                                closestBox = textBox
                                min_distance = RectDist(rectBox, textBox)
                                legendtext = text

                        if closestBox is not None:
                            textBoxes.append(closestBox)

                        if legendtext is not None:
                            legends.append(legendtext)

                    list_len = []
                    
                    for rect in rects:
                        list_len.append(float(rect[3]))

                    # y-values will be a product of the normalize ratio and each length              
                    y_val = [round(l* normalize_ratio, 1) for l in list_len]

                    if path.name in yValueDict:
                        if legends:
                            yValueDict[path.name].append([legends[-1]])
                        else:
                            yValueDict[path.name].append([''])
                            
                        yValueDict[path.name].append(y_val)
                    else:
                        if legends:
                            yValueDict[path.name] = [legends[-1]]
                        else:
                            yValueDict[path.name] = ['']
                            
                        yValueDict[path.name].append(y_val)
                        
                    for box in textBoxes:
                        (x, y, w, h) = box
                        
            except Exception as e:
                print(e)
                continue
            
    return yValueDict

In [20]:
yValueDict = getYVal(img_dir)

[get text-to-tick ratio] ticks_diff: [44.0, 45.0, 47.0, 44.0, 46.0, 44.0], text_diff: [15.0, 15.0, 15.0, 15.0, 15.0, 15.0]
[reject_outliers] ticks_diff: [44. 45. 44. 46. 44.], text_diff: [15. 15. 15. 15. 15. 15.]
[getYVal] legends: ['Aqueous Phase', 'Gaseous Phase', 'Solid Residue']
[0] path: 08813216-Figure1-1.png, ratio: 0.3363228699551569

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: ['A', 'B']
[1] path: 08813216-Figure4-1.png, ratio: nan



  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims)
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)


[get text-to-tick ratio] ticks_diff: [34.0, 34.0, 33.0, 34.0, 35.0, 34.0, 33.0, 35.0, 33.0], text_diff: [0.1, 0.09999999999999998, 0.10000000000000003, 0.09999999999999998, 0.09999999999999998, 0.09999999999999998, 0.10000000000000009, 0.09999999999999998, 0.09999999999999998]
[reject_outliers] ticks_diff: [34. 34. 34. 34.], text_diff: [0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1]
[getYVal] legends: ['SR', 'HSP', 'MSP', 'WSP']
[2] path: 1-s2.0-S0009250913008336-main-Figure10-1.png, ratio: 0.0029411764705882353

[get text-to-tick ratio] ticks_diff: [33.0, 34.0, 31.0, 34.0, 34.0, 34.0, 31.0, 35.0, 32.0], text_diff: [0.1, 0.09999999999999998, 0.10000000000000003, 0.09999999999999998, 0.09999999999999998, 0.09999999999999998, 0.10000000000000009, 0.09999999999999998, 0.09999999999999998]
[reject_outliers] ticks_diff: [33. 34. 34. 34. 34. 32.], text_diff: [0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1]
[getYVal] legends: ['0.9-1.0cm', 'SR', 'HSP', 'MSP', 'WSP']
[3] path: 1-s2.0-S0009250913008336-main-Figure11-1.png,

[get text-to-tick ratio] ticks_diff: [65.0, 189.0], text_diff: [60.0, 20.0]
[reject_outliers] ticks_diff: [ 65. 189.], text_diff: [60. 20.]
[getYVal] legends: ['Conversion', 'Yield of paraffin']
[25] path: 1-s2.0-S0016236115011345-main-Figure2-1.png, ratio: 0.31496062992125984

[get text-to-tick ratio] ticks_diff: [70.0, 70.0], text_diff: [10.0, 10.0]
[reject_outliers] ticks_diff: [70. 70.], text_diff: [10. 10.]
[getYVal] legends: ['Raw Algae', 'Crude Lipids', 'Residual Solids', 'AC-Purified Lipids', 'K10-Purified Lipids']
[26] path: 1-s2.0-S0016236116302447-main-Figure1-1.png, ratio: 0.14285714285714285

[get text-to-tick ratio] ticks_diff: [43.0, 43.0, 44.0, 44.0, 42.0, 44.0, 42.0, 45.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [43. 43. 44. 44. 44.], text_diff: [10. 10. 10. 10. 10. 10. 10. 10.]
[getYVal] legends: ['Raw Algae', 'Residual Solids', 'Crude Lipids', 'AC-Purified Lipids']
[27] path: 1-s2.0-S0016236116302447-main-Figure2-1.p

[get text-to-tick ratio] ticks_diff: [51.0, 50.0, 51.0, 51.0, 52.0, 49.0, 51.0, 51.0, 50.0, 50.0], text_diff: [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0]
[reject_outliers] ticks_diff: [51. 50. 51. 51. 51. 51. 50. 50.], text_diff: [5. 5. 5. 5. 5. 5. 5. 5. 5. 5.]
[getYVal] legends: []
[49] path: 1-s2.0-S0016236118304496-main-Figure5-1.png, ratio: 0.09876543209876543

[get text-to-tick ratio] ticks_diff: [94.0, 93.0, 93.0, 94.0, 94.0], text_diff: [20.0, 20.0, 20.0, 20.0, 20.0]
[reject_outliers] ticks_diff: [94. 94. 94.], text_diff: [20. 20. 20. 20. 20.]
[getYVal] legends: ['Chlorella', 'Pavlova Spirulina platensis', 'cruentum Isochrysis Entermorpha Nannochloropsis', '& Nannochloropsis Nannochloropsis Nannochloropsis Nannochloropsist Nannochloropsis Nannochloropsis Spirulina', 'Porphyridium Spirulinad']
[50] path: 1-s2.0-S0016236118304502-main-Figure4-1.png, ratio: 0.2127659574468085

[get text-to-tick ratio] ticks_diff: [66.0, 63.0, 132.0, 66.0], text_diff: [0.0, 0.2, 0.39999999999

[get text-to-tick ratio] ticks_diff: [39.0, 41.0, 39.0, 40.0, 40.0, 40.0, 40.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [40. 40. 40. 40.], text_diff: [10. 10. 10. 10. 10. 10. 10.]
[getYVal] legends: ['80 (%)', 'g of esterification', 'capable', 'oil', 'N crude', 'of', 'Percentage']
[73] path: 1-s2.0-S0016236119307495-main-Figure1-1.png, ratio: 0.25

[get text-to-tick ratio] ticks_diff: [69.0, 71.0], text_diff: [10.0, 10.0]
[reject_outliers] ticks_diff: [69. 71.], text_diff: [10. 10.]
[getYVal] legends: ['HFO 42.5', 'DAO 418']
[74] path: 1-s2.0-S0016236119308063-main-Figure4-1.png, ratio: 0.14285714285714285

[get text-to-tick ratio] ticks_diff: [27.0, 26.0, 26.0, 26.0, 26.0, 25.0, 28.0, 25.0, 27.0], text_diff: [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0]
[reject_outliers] ticks_diff: [27. 26. 26. 26. 26. 27.], text_diff: [5. 5. 5. 5. 5. 5. 5. 5. 5.]
[getYVal] legends: []
[75] path: 1-s2.0-S0016236119312037-main-Figure1-1.png, ratio: 0.189873

[get text-to-tick ratio] ticks_diff: [257.0], text_diff: [2.95]
[reject_outliers] ticks_diff: [257.], text_diff: [2.95]
[getYVal] legends: ['Xylose', '4-0-Methyl glucuronic acid', 'Glucose', 'Galactose', 'Arabinose']
[101] path: 1-s2.0-S014139101630026X-main-Figure2-1.png, ratio: 0.011478599221789884

[get text-to-tick ratio] ticks_diff: [91.0, 192.0, 48.0], text_diff: [0.05, 0.75, 0.3999999999999999]
[reject_outliers] ticks_diff: [91.], text_diff: [0.4]
[getYVal] legends: []
[102] path: 1-s2.0-S014139101630026X-main-Figure3-1.png, ratio: 0.004395604395604395

[get text-to-tick ratio] ticks_diff: [53.0, 51.0, 207.0], text_diff: [20.0, 5.0, 5.0]
[reject_outliers] ticks_diff: [53. 51.], text_diff: [5. 5.]
[getYVal] legends: ['bezene derivatives', 'phenol derivatives', 'alkancs', 'othcr oxy-compounds', 'nitro-compounds']
[103] path: 1-s2.0-S016523701300123X-main-Figure4-1.png, ratio: 0.09615384615384616

[get text-to-tick ratio] ticks_diff: [60.0, 61.0, 63.0, 61.0, 62.0, 61.0, 62.0, 61.0,

[get text-to-tick ratio] ticks_diff: [27.0, 25.0, 26.0, 25.0, 27.0, 26.0, 25.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [26. 26.], text_diff: [10. 10. 10. 10. 10. 10. 10.]
[getYVal] legends: ['I Char', 'Gas', 'Total oil']
[127] path: 1-s2.0-S0196890416301984-main-Figure1-1.png, ratio: 0.38461538461538464

[get text-to-tick ratio] ticks_diff: [54.0, 163.0, 53.0], text_diff: [20.0, 60.0, 20.0]
[reject_outliers] ticks_diff: [54. 53.], text_diff: [20. 20.]
[getYVal] legends: ['3.2MPa', '5.8MPa', '9.7MPa', '21.2MPa', '32.9MIPa']
[128] path: 1-s2.0-S0196890416306021-main-Figure3-1.png, ratio: 0.37383177570093457

[get text-to-tick ratio] ticks_diff: [54.0], text_diff: [20.0]
[reject_outliers] ticks_diff: [54.], text_diff: [20.]
[getYVal] legends: ['6.3MPa', '7.2MPa', '9.7MPa', '16.1MPa', '20.3MPa', '100 I']
[129] path: 1-s2.0-S0196890416306021-main-Figure4-1.png, ratio: 0.37037037037037035

[get text-to-tick ratio] ticks_diff: [55.0, 163.0], text_

[get text-to-tick ratio] ticks_diff: [103.0], text_diff: [10.0]
[reject_outliers] ticks_diff: [103.], text_diff: [10.]
[getYVal] legends: ['Glucose', 'Glycine']
[154] path: 1-s2.0-S0196890417312049-main-Figure2-1.png, ratio: 0.0970873786407767

[get text-to-tick ratio] ticks_diff: [85.0, 84.0, 44.0], text_diff: [5.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [85. 84.], text_diff: [10. 10.]
[getYVal] legends: ['Ethanol', 'Acetate', 'Propionate', 'Butyrate', 'Valerate', 'Caproate', 'Total SMPs']
[155] path: 1-s2.0-S0196890417312049-main-Figure4-1.png, ratio: 0.11834319526627218

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: ['Hydrogen', 'Methane']
[156] path: 1-s2.0-S0196890417312049-main-Figure6-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [27.0, 31.0, 30.0, 60.0, 60.0, 31.0, 30.0, 30.0], text_diff: [9.0, 10.0, 10.0, 10.0, 20.0, 20.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [27. 31. 30. 31. 30. 30.]

[get text-to-tick ratio] ticks_diff: [81.0, 39.0, 42.0, 39.0, 39.0, 41.0, 40.0], text_diff: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0]
[reject_outliers] ticks_diff: [39. 42. 39. 39. 41. 40.], text_diff: [1. 1. 1. 1. 1. 1.]
[getYVal] legends: ['Hydrochar yield', 'Approximate equilibrium moisture content', 'Anne 58']
[179] path: 1-s2.0-S0306261913003887-main-Figure3-1.png, ratio: 0.025

[get text-to-tick ratio] ticks_diff: [26.0, 27.0, 25.0, 27.0, 27.0, 27.0, 28.0, 27.0, 26.0, 28.0], text_diff: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
[reject_outliers] ticks_diff: [26. 27. 27. 27. 27. 27. 26.], text_diff: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
[getYVal] legends: []
[180] path: 1-s2.0-S0306261914002840-main-Figure3-1.png, ratio: 0.01871657754010695

[get text-to-tick ratio] ticks_diff: [35.0, 37.0, 33.0, 37.0, 36.0, 37.0, 34.0], text_diff: [0.2, 0.2, 0.19999999999999996, 0.4, 0.19999999999999996, 0.19999999999999996, 78.6]
[reject_outliers] ticks_diff: [35. 37. 37. 36. 37.], text_di

[get text-to-tick ratio] ticks_diff: [24.0, 26.0, 22.0, 24.0, 26.0, 25.0, 25.0, 23.0, 25.0, 25.0], text_diff: [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0]
[reject_outliers] ticks_diff: [24. 24. 25. 25. 25. 25.], text_diff: [5. 5. 5. 5. 5. 5. 5. 5. 5. 5.]
[getYVal] legends: []
[203] path: 1-s2.0-S0306261919313431-main-Figure5-1.png, ratio: 0.2027027027027027

[get text-to-tick ratio] ticks_diff: [24.0, 24.0, 45.0, 24.0, 23.0, 45.0, 24.0, 22.0], text_diff: [5.0, 5.0, 10.0, 5.0, 5.0, 10.0, 5.0, 5.0]
[reject_outliers] ticks_diff: [24. 24. 24. 23. 24. 22.], text_diff: [5. 5. 5. 5. 5. 5.]
[getYVal] legends: []
[204] path: 1-s2.0-S0306261919313431-main-Figure8-1.png, ratio: 0.2127659574468085

[get text-to-tick ratio] ticks_diff: [64.0, 66.0, 65.0, 65.0, 65.0, 65.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [65. 65. 65. 65.], text_diff: [10. 10. 10. 10. 10. 10.]
[getYVal] legends: ['BC-EA', 'BC-THF', 'BC-PE']
[205] path: 1-s2.0-S0306261919313662-main

[get text-to-tick ratio] ticks_diff: [57.0, 57.0, 56.0, 55.0, 56.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [56. 56.], text_diff: [10. 10. 10. 10. 10.]
[getYVal] legends: []
[230] path: 1-s2.0-S0360544214013401-main-Figure2-1.png, ratio: 0.17857142857142858

[get text-to-tick ratio] ticks_diff: [33.0, 32.0, 33.0, 34.0, 33.0, 31.0, 34.0, 33.0, 32.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [33. 32. 33. 33. 33. 32.], text_diff: [10. 10. 10. 10. 10. 10. 10. 10. 10.]
[getYVal] legends: ['Diesel-insolvable', 'Diesel-solvable']
[231] path: 1-s2.0-S0360544215000511-main-Figure6-1.png, ratio: 0.30612244897959184

[get text-to-tick ratio] ticks_diff: [56.0, 174.0], text_diff: [60.0, 20.0]
[reject_outliers] ticks_diff: [ 56. 174.], text_diff: [60. 20.]
[getYVal] legends: ['light oil+gas', 'solid residue', 'heavy oil']
[232] path: 1-s2.0-S0360544215004429-main-Figure8-1.png, ratio: 0.34782608695652173

[get t

[get text-to-tick ratio] ticks_diff: [92.0, 92.0], text_diff: [20.0, 20.0]
[reject_outliers] ticks_diff: [92. 92.], text_diff: [20. 20.]
[getYVal] legends: ['R, 50%, R, 0.1 g/mL, t 30 Bio-oil', 'min.']
[251] path: 1-s2.0-S0360544218302147-main-Figure6-1.png, ratio: 0.21739130434782608

[get text-to-tick ratio] ticks_diff: [43.0, 41.0, 42.0, 84.0, 41.0, 42.0], text_diff: [10.0, 10.0, 20.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [43. 41. 42. 41. 42.], text_diff: [10. 10. 10. 10. 10.]
[getYVal] legends: ['Esters', 'Nitrogenous compounds', 'Acids', 'Other compounds']
[252] path: 1-s2.0-S0360544218302147-main-Figure7-1.png, ratio: 0.23923444976076558

[get text-to-tick ratio] ticks_diff: [52.0, 53.0, 52.0, 54.0, 56.0, 52.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [52. 53. 52. 54. 52.], text_diff: [10. 10. 10. 10. 10. 10.]
[getYVal] legends: ['effect(%)', '&n Synergistic']
[253] path: 1-s2.0-S0360544218303803-main-Figure10-1.png, ratio: 0.19011

[get text-to-tick ratio] ticks_diff: [36.0, 36.0, 37.0, 36.0, 35.0, 35.0, 36.0, 35.0], text_diff: [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0]
[reject_outliers] ticks_diff: [36. 36. 36. 36.], text_diff: [5. 5. 5. 5. 5. 5. 5. 5.]
[getYVal] legends: ['Experimental results', '20 Synergistic effect']
[277] path: 1-s2.0-S0360544219302749-main-Figure7-1.png, ratio: 0.1388888888888889

[get text-to-tick ratio] ticks_diff: [49.0, 101.0, 49.0, 51.0], text_diff: [20.0, 20.0, 40.0, 20.0]
[reject_outliers] ticks_diff: [49. 49. 51.], text_diff: [20. 20. 20.]
[getYVal] legends: ['Conversion rate', 'I', 'I', 'a', 'C']
[278] path: 1-s2.0-S0360544219302865-main-Figure3-1.png, ratio: 0.4026845637583893

[get text-to-tick ratio] ticks_diff: [51.0, 51.0, 50.0, 51.0, 51.0], text_diff: [20.0, 20.0, 20.0, 20.0, 20.0]
[reject_outliers] ticks_diff: [51. 51. 51. 51.], text_diff: [20. 20. 20. 20. 20.]
[getYVal] legends: ['Conversion rate', 'I', 'I', 'b', 'b']
[279] path: 1-s2.0-S0360544219302865-main-Figure4-1.png, 

[get text-to-tick ratio] ticks_diff: [80.0, 165.0], text_diff: [40.0, 20.0]
[reject_outliers] ticks_diff: [ 80. 165.], text_diff: [40. 20.]
[getYVal] legends: ['no catalyst', 'Fe,03', 'catalyst', 'catalyst']
[303] path: 1-s2.0-S0378382015301028-main-Figure5-1.png, ratio: 0.24489795918367346

[get text-to-tick ratio] ticks_diff: [60.0, 29.0, 32.0, 60.0, 30.0], text_diff: [10.0, 20.0, 10.0, 10.0, 20.0]
[reject_outliers] ticks_diff: [29. 32. 30.], text_diff: [10. 10. 10.]
[getYVal] legends: ['C1', 'C3', 'C6']
[304] path: 1-s2.0-S0378382015302022-main-Figure6-1.png, ratio: 0.32967032967032966

[get text-to-tick ratio] ticks_diff: [43.0, 44.0, 44.0, 45.0, 42.0], text_diff: [20.0, 20.0, 20.0, 20.0, 20.0]
[reject_outliers] ticks_diff: [43. 44. 44.], text_diff: [20. 20. 20. 20. 20.]
[getYVal] legends: ['other %', '% Zn', '% Fe', '% Mg', '% P', '% N', '0%H']
[305] path: 1-s2.0-S0378382015302046-main-Figure5-1.png, ratio: 0.45801526717557256

[get text-to-tick ratio] ticks_diff: [], text_diff: [

[get text-to-tick ratio] ticks_diff: [84.0, 82.0, 166.0], text_diff: [40.0, 20.0, 20.0]
[reject_outliers] ticks_diff: [84. 82.], text_diff: [20. 20.]
[getYVal] legends: ['Loss', 'Extracted by ashing, dissolution of alkaline', 'salt in distilled water, filtration,', 'crystallization and drying', 'Seperated by dilution/washing with', 'methanol, centrifugation and drying']
[332] path: 1-s2.0-S0378382018310336-main-Figure5-1.png, ratio: 0.24096385542168675

[get text-to-tick ratio] ticks_diff: [178.0, 179.0], text_diff: [25.0, 25.0]
[reject_outliers] ticks_diff: [178. 179.], text_diff: [25. 25.]
[getYVal] legends: ['Na3PO4']
[333] path: 1-s2.0-S0378382018310336-main-Figure9-1.png, ratio: 0.1400560224089636

[get text-to-tick ratio] ticks_diff: [48.0, 47.0, 48.0, 50.0, 46.0, 48.0, 47.0], text_diff: [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0]
[reject_outliers] ticks_diff: [48. 47. 48. 48. 47.], text_diff: [5. 5. 5. 5. 5. 5. 5.]
[getYVal] legends: []
[334] path: 1-s2.0-S037838201831275X-main-Figure3-

[get text-to-tick ratio] ticks_diff: [26.0, 26.0, 26.0, 24.0, 27.0, 26.0, 25.0, 27.0, 26.0, 25.0], text_diff: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
[reject_outliers] ticks_diff: [26. 26. 26. 26. 25. 26. 25.], text_diff: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
[getYVal] legends: ['CH4', 'C02', 'H2', 'CO', 'GE']
[358] path: 1-s2.0-S0896844616300651-main-Figure7-1.png, ratio: 0.019444444444444445

[get text-to-tick ratio] ticks_diff: [27.0, 27.0, 26.0, 28.0, 26.0, 27.0, 27.0, 28.0, 26.0, 27.0], text_diff: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
[reject_outliers] ticks_diff: [27. 27. 27. 27. 27.], text_diff: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
[getYVal] legends: ['CH4', 'C02', 'H2', 'CO', 'GE']
[359] path: 1-s2.0-S0896844616300651-main-Figure9-1.png, ratio: 0.018518518518518517

[get text-to-tick ratio] ticks_diff: [38.0, 41.0, 39.0, 39.0, 40.0, 39.0, 41.0, 38.0], text_diff: [0.2, 0.2, 0.19999999999999996, 0.20000000000000007, 0.19999999999999996, 0.19999999

[get text-to-tick ratio] ticks_diff: [39.0, 40.0, 40.0, 40.0, 39.0], text_diff: [0.2, 0.2, 0.19999999999999996, 0.20000000000000007, 0.19999999999999996]
[reject_outliers] ticks_diff: [40. 40. 40.], text_diff: [0.2 0.2]
[getYVal] legends: ['aromatic', 'aliphatic', 'unreacted C12 12']
[383] path: 1-s2.0-S0920586118306837-main-Figure6-1.png, ratio: 0.005

[get text-to-tick ratio] ticks_diff: [30.0, 61.0, 120.0, 64.0, 29.0], text_diff: [10.0, 20.0, 40.0, 20.0, 10.0]
[reject_outliers] ticks_diff: [30. 61. 64. 29.], text_diff: [10. 20. 20. 10.]
[getYVal] legends: ['(mmoles)', 'production', 'H,']
[384] path: 1-s2.0-S0920586119305371-main-Figure11-1.png, ratio: 0.32608695652173914

[get text-to-tick ratio] ticks_diff: [41.0, 41.0, 43.0], text_diff: [20.0, 20.0, 20.0]
[reject_outliers] ticks_diff: [41. 41.], text_diff: [20. 20. 20.]
[getYVal] legends: ['AD', 'HTL', 'max max-min range']
[385] path: 1-s2.0-S0921344914000974-main-Figure2-1.png, ratio: 0.4878048780487805

[get text-to-tick ratio] 

[get text-to-tick ratio] ticks_diff: [144.0, 49.0], text_diff: [100.0, 300.0]
[reject_outliers] ticks_diff: [144.  49.], text_diff: [100. 300.]
[getYVal] legends: ['J-(Hydroxymethyl)-furtural', '2. levulinic acid', '3.2-pentanone,4-hydroxy-4-methyl-', '4. 1-methyl-3-propylbenzene', '5. 1-tetralone', '6. benzofuran,2-methyl-', '7. 2-furancarboxaldehyde,5-methyl.', '8. isocaproic acid', '9 diisooctyl ester', '10. dimethyl sebacate', '7 11. C-17 n-alkane', '12. iminodiacetonitrile']
[404] path: 1-s2.0-S0956053X1500361X-main-Figure2-1.png, ratio: 2.0725388601036268

[get text-to-tick ratio] ticks_diff: [65.0, 63.0, 63.0, 63.0, 64.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [63. 63. 63. 64.], text_diff: [10. 10. 10. 10. 10.]
[getYVal] legends: []
[405] path: 1-s2.0-S0956053X16303658-main-Figure1-1.png, ratio: 0.15810276679841898

[get text-to-tick ratio] ticks_diff: [43.0, 41.0, 89.0], text_diff: [40.0, 20.0, 20.0]
[reject_outliers] ticks_diff: [43. 41.], tex

[get text-to-tick ratio] ticks_diff: [69.0, 210.0], text_diff: [30.0, 10.0]
[reject_outliers] ticks_diff: [ 69. 210.], text_diff: [30. 10.]
[getYVal] legends: ['CA yield of pretreated samples', 'CA yield of untreated sample']
[429] path: 1-s2.0-S0959652617330123-main-Figure3-1.png, ratio: 0.14336917562724014

[get text-to-tick ratio] ticks_diff: [52.0, 50.0, 52.0, 51.0], text_diff: [20.0, 20.0, 20.0, 20.0]
[reject_outliers] ticks_diff: [52. 52. 51.], text_diff: [20. 20. 20. 20.]
[getYVal] legends: ['Catl0%', 'Cal20%', 'Cat30%', "T'emp270", 'Temp290', 'Temp310', 'Temp330', 'Temp350(Ly-stagel)', '-stage2 D', 'Ly-stagel', 'w-stage2']
[430] path: 1-s2.0-S0959652618327124-main-Figure3-1.png, ratio: 0.38709677419354843

[get text-to-tick ratio] ticks_diff: [57.0, 57.0, 57.0, 57.0], text_diff: [20.0, 20.0, 20.0, 20.0]
[reject_outliers] ticks_diff: [57. 57. 57. 57.], text_diff: [20. 20. 20. 20.]
[getYVal] legends: ['Biocrude', 'Biochar', 'Gas']
[431] path: 1-s2.0-S0959652619312260-main-Figure4

[get text-to-tick ratio] ticks_diff: [39.0, 43.0, 39.0, 41.0, 40.0, 81.0, 41.0, 40.0, 40.0], text_diff: [10.0, 10.0, 10.0, 20.0, 10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [39. 43. 39. 41. 40. 41. 40. 40.], text_diff: [10. 10. 10. 10. 10. 10. 10. 10.]
[getYVal] legends: ['Biocrude', 'Solid', 'Gas', 'Extract', 'Others']
[457] path: 1-s2.0-S0960148119313230-main-Figure2-1.png, ratio: 0.2476780185758514

[get text-to-tick ratio] ticks_diff: [69.0, 216.0], text_diff: [25.0, 0.0]
[reject_outliers] ticks_diff: [ 69. 216.], text_diff: [25.  0.]
[getYVal] legends: ['Velocity', 'Efficiency of nitrogen recovery', 'Efficiency of paddlewheel']
[458] path: 1-s2.0-S0960148119316428-main-Figure2-1.png, ratio: 0.08771929824561403

[get text-to-tick ratio] ticks_diff: [66.0, 211.0], text_diff: [150.0, 0.0]
[reject_outliers] ticks_diff: [ 66. 211.], text_diff: [150.   0.]
[getYVal] legends: ['Velocity', 'Efficiency of paddlewheel', 'Efficiency of N recovery']
[459] path: 1-s2.0-S0960148

[get text-to-tick ratio] ticks_diff: [26.0, 27.0, 25.0, 27.0, 27.0, 26.0, 26.0, 28.0, 26.0], text_diff: [0.1, 0.09999999999999998, 0.10000000000000003, 0.09999999999999998, 0.09999999999999998, 0.09999999999999998, 0.10000000000000009, 0.09999999999999998, 0.09999999999999998]
[reject_outliers] ticks_diff: [26. 27. 27. 27. 26. 26. 26.], text_diff: [0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1]
[getYVal] legends: ['+5 g/L Glucose', 'g/L Glucose', '+3 g/L Glucose', 'g/L Glucose', '+1 g/L Glucose', 'No Glucose']
[483] path: 1-s2.0-S0960852413004422-main-Figure5-1.png, ratio: 0.0037837837837837837

[get text-to-tick ratio] ticks_diff: [35.0, 35.0, 35.0, 35.0, 35.0, 36.0, 33.0, 36.0, 36.0, 35.0], text_diff: [0.1, 0.1, 0.09999999999999998, 0.10000000000000003, 0.09999999999999998, 0.09999999999999998, 0.09999999999999998, 0.10000000000000009, 0.09999999999999998, 0.09999999999999998]
[reject_outliers] ticks_diff: [35. 35. 35. 35. 35. 35.], text_diff: [0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1]
[getYVal] legend

[get text-to-tick ratio] ticks_diff: [54.0, 57.0], text_diff: [10.0, 10.0]
[reject_outliers] ticks_diff: [54. 57.], text_diff: [10. 10.]
[getYVal] legends: ['1:3', '1:6', '1:12']
[509] path: 1-s2.0-S0960852414012358-main-Figure2.3-1.png, ratio: 0.18018018018018017

[get text-to-tick ratio] ticks_diff: [37.0, 36.0, 37.0, 35.0, 37.0, 37.0, 37.0], text_diff: [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0]
[reject_outliers] ticks_diff: [37. 36. 37. 37. 37. 37.], text_diff: [5. 5. 5. 5. 5. 5. 5.]
[getYVal] legends: ['Thermal', 'IN K2CO3', 'IN KOH']
[510] path: 1-s2.0-S0960852414012358-main-Figure2.4-1.png, ratio: 0.13574660633484162

[get text-to-tick ratio] ticks_diff: [46.0, 92.0, 46.0, 46.0], text_diff: [10.0, 10.0, 20.0, 10.0]
[reject_outliers] ticks_diff: [46. 46. 46.], text_diff: [10. 10. 10.]
[getYVal] legends: ["250'C", "280'C", "300'C"]
[511] path: 1-s2.0-S0960852414012358-main-Figure3.1-1.png, ratio: 0.21739130434782608

[get text-to-tick ratio] ticks_diff: [44.0, 42.0, 45.0, 41.0, 45.0, 42.0

[get text-to-tick ratio] ticks_diff: [138.0], text_diff: [40.0]
[reject_outliers] ticks_diff: [138.], text_diff: [40.]
[getYVal] legends: ['Fast HTL', 'Isothermal HTL', 'I']
[533] path: 1-s2.0-S0960852416300918-main-Figure1-1.png, ratio: 0.2898550724637681

[get text-to-tick ratio] ticks_diff: [23.0, 22.0, 23.0, 24.0, 23.0, 23.0, 23.0, 25.0, 23.0, 23.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [23. 23. 23. 23. 23. 23. 23.], text_diff: [10. 10. 10. 10. 10. 10. 10. 10. 10. 10.]
[getYVal] legends: ['Energy in bio-char', 'Energy in bio-oil', 'Energy in gas']
[534] path: 1-s2.0-S0960852416301833-main-Figure2-1.png, ratio: 0.43478260869565216

[get text-to-tick ratio] ticks_diff: [23.0, 22.0, 25.0, 22.0, 23.0, 22.0, 24.0, 25.0, 23.0, 23.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [23. 23. 24. 23. 23.], text_diff: [10. 10. 10. 10. 10. 10. 10. 10. 10. 10.]
[getYVal] legen

[get text-to-tick ratio] ticks_diff: [29.0, 29.0, 30.0, 30.0, 30.0, 30.0], text_diff: [50.0, 50.0, 50.0, 50.0, 50.0, 50.0]
[reject_outliers] ticks_diff: [30. 30. 30. 30.], text_diff: [50. 50. 50. 50. 50. 50.]
[getYVal] legends: ['072h', '0120 h', '240 h']
[558] path: 1-s2.0-S0960852416314055-main-Figure2-1.png, ratio: 1.6666666666666667

[get text-to-tick ratio] ticks_diff: [34.0, 35.0, 34.0, 36.0], text_diff: [20.0, 20.0, 20.0, 20.0]
[reject_outliers] ticks_diff: [34. 35. 34.], text_diff: [20. 20. 20. 20.]
[getYVal] legends: ['ICOD reduction (%)']
[559] path: 1-s2.0-S0960852416314055-main-Figure3-1.png, ratio: 0.5825242718446602

[get text-to-tick ratio] ticks_diff: [35.0, 35.0, 37.0, 37.0, 35.0, 38.0], text_diff: [20.0, 20.0, 20.0, 20.0, 20.0, 20.0]
[reject_outliers] ticks_diff: [35. 35. 37. 37. 35.], text_diff: [20. 20. 20. 20. 20. 20.]
[getYVal] legends: ['Biocrude', 'Biochar', 'WSC', 'Gases+loss']
[560] path: 1-s2.0-S0960852416314286-main-Figure1-1.png, ratio: 0.558659217877095

[

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[583] path: 1-s2.0-S0960852417316413-main-Figure1-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [46.0, 45.0, 48.0, 46.0, 45.0], text_diff: [5.0, 5.0, 5.0, 5.0, 5.0]
[reject_outliers] ticks_diff: [46. 45. 46. 45.], text_diff: [5. 5. 5. 5. 5.]
[getYVal] legends: ['180C', '200C', '220C', '240C', '260C', 'Pyrrolidine/pyrrolidinedione']
[584] path: 1-s2.0-S0960852417316413-main-Figure3-1.png, ratio: 0.10989010989010989

[get text-to-tick ratio] ticks_diff: [92.0, 281.0], text_diff: [60.0, 20.0]
[reject_outliers] ticks_diff: [ 92. 281.], text_diff: [60. 20.]
[getYVal] legends: ['Liquid', 'Hydrochar']
[585] path: 1-s2.0-S0960852417316553-main-Figure1-1.png, ratio: 0.21447721179624665

[get text-to-tick ratio] ticks_diff: [57.0], text_diff: [20.0]
[reject_outliers] ticks_diff: [57.], text_diff: [20.]
[getYVal] legends: ['(%)', 'conversion', 'Liquefaction']
[586

[get text-to-tick ratio] ticks_diff: [35.0, 37.0, 34.0, 38.0, 37.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [35. 37. 37.], text_diff: [10. 10. 10. 10. 10.]
[getYVal] legends: []
[612] path: 1-s2.0-S0960852419307308-main-Figure1-1.png, ratio: 0.2752293577981651

[get text-to-tick ratio] ticks_diff: [36.0, 32.0, 32.0, 34.0, 34.0, 31.0, 32.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [32. 32. 34. 34. 32.], text_diff: [10. 10. 10. 10. 10. 10. 10.]
[getYVal] legends: []
[613] path: 1-s2.0-S0960852419307308-main-Figure3-1.png, ratio: 0.30487804878048785

[get text-to-tick ratio] ticks_diff: [62.0, 62.0, 30.0, 33.0, 31.0, 30.0, 31.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0, 20.0, 20.0]
[reject_outliers] ticks_diff: [30. 33. 31. 30. 31.], text_diff: [10. 10. 10. 10. 10.]
[getYVal] legends: []
[614] path: 1-s2.0-S0960852419307308-main-Figure4-1.png, ratio: 0.3225806451612903

[get text-to-tick ratio] ticks_diff: [39.0

[get text-to-tick ratio] ticks_diff: [53.0], text_diff: [200.0]
[reject_outliers] ticks_diff: [53.], text_diff: [200.]
[getYVal] legends: ['Solid', 'Hexane', 'TOC water', 'TIC water', 'Gas']
[638] path: 1-s2.0-S0961953412005284-main-Figure6-1.png, ratio: 3.7735849056603774

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: ['C2-C4', 'CHA', 'H2', 'I']
[639] path: 1-s2.0-S0961953412005284-main-Figure7-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [73.0, 74.0, 71.0, 74.0], text_diff: [10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [73. 74. 74.], text_diff: [10. 10. 10. 10.]
[getYVal] legends: ['SR', 'Bio oil', 'Carbon in AP']
[640] path: 1-s2.0-S0961953413004443-main-Figure2-1.png, ratio: 0.13574660633484162

[get text-to-tick ratio] ticks_diff: [29.0, 29.0, 186.0], text_diff: [175.0, 100.0, 100.0]
[reject_outliers] ticks_diff: [29. 29.], text_diff: [100. 100.]
[getYVal] legends: ['Chloride', 'Nitrate

[get text-to-tick ratio] ticks_diff: [54.0, 57.0, 54.0, 55.0], text_diff: [20.0, 20.0, 20.0, 20.0]
[reject_outliers] ticks_diff: [54. 54. 55.], text_diff: [20. 20. 20. 20.]
[getYVal] legends: ['Aromatics and short alkenes', 'Long chain alkenes', 'DOxygenated hydrocarbons']
[661] path: 1-s2.0-S0961953417300120-main-Figure4-1.png, ratio: 0.36809815950920244

[get text-to-tick ratio] ticks_diff: [73.0, 74.0, 73.0, 74.0, 73.0, 72.0], text_diff: [50.0, 50.0, 50.0, 50.0, 50.0, 50.0]
[reject_outliers] ticks_diff: [73. 73. 73.], text_diff: [50. 50. 50. 50. 50. 50.]
[getYVal] legends: ['Oil C', 'Oil B', 'Oil']
[662] path: 1-s2.0-S0961953417300399-main-Figure3-1.png, ratio: 0.684931506849315

[get text-to-tick ratio] ticks_diff: [20.0, 18.0, 18.0, 19.0, 35.0, 36.0, 19.0, 18.0, 17.0, 20.0], text_diff: [9.0, 0.0, 20.0, 20.0, 0.0, 20.0, 0.0, 20.0, 20.0, 20.0]
[reject_outliers] ticks_diff: [20. 18. 18. 19. 19. 18. 17. 20.], text_diff: [ 9. 20. 20. 20. 20. 20. 20.]
[getYVal] legends: ['PTE CYH', 'TOL

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: ['Al', 'Ca', 'Cd', 'Cr', 'Cu', 'Fe', 'K', 'Mg', 'Mn', 'Na', 'Ni', 'P', 'Pb', 'Zn']
[686] path: 1-s2.0-S0961953420300386-main-Figure11-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: ['Al', 'Ca', 'Cd', 'Cr', 'Cu', 'Fe', 'K', 'Mg', 'Mn', 'Na', 'Ni', 'P', 'Pb', 'S', 'Zn']
[687] path: 1-s2.0-S0961953420300386-main-Figure12-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[688] path: 1-s2.0-S0961953420300386-main-Figure13-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: ['350-None', '350-Catalytic', '400-None', '400-Catalytic']
[689] path: 1-s2.0-S0961953420300386-main-Figure3-1.png, ratio: nan



[get text-to-tick ratio] ticks_diff: [31.0, 31.0, 29.0, 31.0, 33.0, 28.0, 31.0, 30.0], text_diff: [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0]
[reject_outliers] ticks_diff: [31. 31. 31. 31. 30.], text_diff: [5. 5. 5. 5. 5. 5. 5. 5.]
[getYVal] legends: ['550g', '850gr', '1150g']
[712] path: 1-s2.0-S1364032118301461-main-Figure18-1.png, ratio: 0.16233766233766234

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: ['Catalytic Processes', 'Commercial Demo']
[713] path: 1-s2.0-S1364032118301461-main-Figure19-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [45.0, 90.0, 44.0, 45.0, 92.0], text_diff: [39.0, 0.0, 80.0, 280.0, 600.0]
[reject_outliers] ticks_diff: [45. 44. 45.], text_diff: [ 39.   0.  80. 280.]
[getYVal] legends: []
[714] path: 1-s2.0-S1364032118301461-main-Figure20-1.png, ratio: 2.2332089552238807

[get text-to-tick ratio] ticks_diff: [38.0, 40.0, 39.0, 41.0, 81.0, 40.0, 39.0, 40.0], text_diff: [10.0, 10

[get text-to-tick ratio] ticks_diff: [55.0, 216.0], text_diff: [60.0, 15.0]
[reject_outliers] ticks_diff: [ 55. 216.], text_diff: [60. 15.]
[getYVal] legends: ['C18', 'C17', 'C16', 'C15']
[735] path: 1-s2.0-S1381116915301217-main-Figure11-1.png, ratio: 0.2767527675276753

[get text-to-tick ratio] ticks_diff: [66.0, 77.0, 58.0, 67.0, 64.0], text_diff: [0.0, 0.0, 99.0, 0.0, 0.0]
[reject_outliers] ticks_diff: [66. 67. 64.], text_diff: [0. 0. 0. 0.]
[getYVal] legends: ['Phenol', 'Acetic acid', 'Glucose']
[736] path: 1-s2.0-S1383586616313132-main-Figure5-1.png, ratio: 0.0

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: ['Arabinose', 'Xylose', 'Glucose']
[737] path: 1-s2.0-S1383586617320841-main-Figure5-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [89.0, 180.0], text_diff: [40.0, 20.0]
[reject_outliers] ticks_diff: [ 89. 180.], text_diff: [40. 20.]
[getYVal] legends: ['I I', 'I', '8.0 I', 'I', 'I I', 'I']
[

[get text-to-tick ratio] ticks_diff: [31.0, 34.0, 33.0, 33.0, 34.0, 32.0, 35.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [34. 33. 33. 34. 32.], text_diff: [10. 10. 10. 10. 10. 10. 10.]
[getYVal] legends: ['Bio-Char', 'Methane', 'Algae Oil']
[763] path: 1-s2.0-S2211926414000502-main-Figure5-1.png, ratio: 0.3012048192771084

[get text-to-tick ratio] ticks_diff: [49.0, 48.0, 48.0, 47.0, 47.0], text_diff: [50.0, 50.0, 50.0, 50.0, 50.0]
[reject_outliers] ticks_diff: [48. 48.], text_diff: [50. 50. 50. 50. 50.]
[getYVal] legends: ['Dividends', 'Principal', 'Interest', 'Total Variable Costs']
[764] path: 1-s2.0-S2211926414000502-main-Figure6-1.png, ratio: 1.0416666666666667

[get text-to-tick ratio] ticks_diff: [33.0, 32.0, 34.0, 33.0, 34.0, 33.0, 33.0, 34.0], text_diff: [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0]
[reject_outliers] ticks_diff: [33. 33. 33. 33.], text_diff: [5. 5. 5. 5. 5. 5. 5. 5.]
[getYVal] legends: ['Dividends', 'Interest', 'Costs']
[

[get text-to-tick ratio] ticks_diff: [45.0, 46.0, 45.0, 44.0, 47.0, 45.0], text_diff: [1.0, 0.5, 0.5, 0.5, 0.5, 2.0]
[reject_outliers] ticks_diff: [45. 46. 45. 45.], text_diff: [1.  0.5 0.5 0.5 0.5]
[getYVal] legends: ['MI', 'MI']
[789] path: 1-s2.0-S2211926416302661-main-Figure6-1.png, ratio: 0.013259668508287293

[get text-to-tick ratio] ticks_diff: [472.0], text_diff: [40.0]
[reject_outliers] ticks_diff: [472.], text_diff: [40.]
[getYVal] legends: ['N2', 'CO2']
[790] path: 1-s2.0-S2211926416307834-main-Figure4-1.png, ratio: 0.0847457627118644

[get text-to-tick ratio] ticks_diff: [114.0], text_diff: [20.0]
[reject_outliers] ticks_diff: [114.], text_diff: [20.]
[getYVal] legends: ['Gas', 'Liquid', 'Soild']
[791] path: 1-s2.0-S2211926416307834-main-Figure5-1.png, ratio: 0.17543859649122806

[get text-to-tick ratio] ticks_diff: [42.0, 45.0, 45.0, 43.0, 45.0, 46.0, 43.0, 44.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [45. 45. 43. 45. 43.

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[815] path: 1-s2.0-S221192641830804X-main-Figure4-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[816] path: 1-s2.0-S221192641830804X-main-Figure5-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [70.0, 72.0, 68.0, 71.0, 72.0], text_diff: [1.0, 1.0, 1.0, 1.0, 1.0]
[reject_outliers] ticks_diff: [70. 72. 71. 72.], text_diff: [1. 1. 1. 1. 1.]
[getYVal] legends: ['costs', 'chemicals,', 'OFixed costs', 'tax', 'Nutrient recycle']
[817] path: 1-s2.0-S221192641830804X-main-Figure7-1.png, ratio: 0.014035087719298246

[get text-to-tick ratio] ticks_diff: [45.0, 46.0, 46.0, 46.0, 44.0, 46.0, 46.0], text_diff: [2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]
[reject_outliers] ticks_diff: [45. 46. 46. 46. 46. 46.], text_diff: [2. 2. 2. 2. 2. 2. 2.]
[getYVal] legends: ['MFSP', 'Conversio

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[842] path: acs.energyfuels.5b02136-Figure3-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[843] path: acs.energyfuels.5b02136-Figure5-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[844] path: acs.energyfuels.5b02962-Figure4-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[845] path: acs.energyfuels.6b00096-Figure2-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: ['Bio-oil', 'Solid residue', 'Gas']
[846] path: acs.energyfuels.6b00201-Figure4-1.png, ratio: nan

[get text-to-tick ratio] ti

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[878] path: acs.energyfuels.7b02994-Figure3-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[879] path: acs.energyfuels.7b02994-Figure5-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[880] path: acs.energyfuels.7b03144-Figure5-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [241.0, 121.0], text_diff: [9.0, 990.0]
[reject_outliers] ticks_diff: [241. 121.], text_diff: [  9. 990.]
[getYVal] legends: ['low water loading', 'medium water loading', 'high water loading']
[881] path: acs.energyfuels.7b03144-Figure6-1.png, ratio: 2.7596685082872927

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: ['Orga

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[912] path: acs.energyfuels.9b01473-Figure11-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[913] path: acs.energyfuels.9b01473-Figure3-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: ['others', 'guaiacols', 'diphenols', 'monophenols']
[914] path: acs.energyfuels.9b01501-Figure5-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [70.0, 70.0], text_diff: [4.0, 4.0]
[reject_outliers] ticks_diff: [70. 70.], text_diff: [4. 4.]
[getYVal] legends: ['Hydrocarbons', 'Esters', 'Acids', 'Phenols&Alcohols', 'Ketones']
[915] path: acs.energyfuels.9b01846-Figure6-1.png, ratio: 0.05714285714285714

[get text-to-tick ratio] ticks_diff: [46.0, 47.0, 46.0, 48.0, 45.0, 48.0], text_diff: [1

[get text-to-tick ratio] ticks_diff: [34.0, 36.0, 33.0, 34.0, 34.0, 37.0, 34.0, 34.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [34. 34. 34. 34. 34.], text_diff: [10. 10. 10. 10. 10. 10. 10. 10.]
[getYVal] legends: ['ABT', 'DCM', 'Acetone', 'MTBE']
[945] path: acs.iecr.9b02442-Figure3-1.png, ratio: 0.29411764705882354

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[946] path: acs.iecr.9b03497-Figure1-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [58.0, 59.0, 57.0, 58.0, 58.0], text_diff: [500.0, 500.0, 500.0, 500.0, 500.0]
[reject_outliers] ticks_diff: [58. 58. 58.], text_diff: [500. 500. 500. 500. 500.]
[getYVal] legends: ['Experimental', 'Correlated']
[947] path: acs.iecr.9b03497-Figure2-1.png, ratio: 8.620689655172415

[get text-to-tick ratio] ticks_diff: [62.0], text_diff: [60.0]
[reject_outliers] ticks_diff: [62.], text_diff: [60.]
[getYVal] leg

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[980] path: acssuschemeng.8b03156-Figure1-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[981] path: acssuschemeng.8b03156-Figure3-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[982] path: acssuschemeng.8b03156-Figure5-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[983] path: acssuschemeng.8b03841-Figure3-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [68.0, 139.0], text_diff: [50.0, 25.0]
[reject_outliers] ticks_diff: [ 68. 139.], text_diff: [50. 25.]
[getYVal] legends: ['G', 'L', 'S']
[984] path: acssuschemeng.8b05147-Figure1-1.png, ratio: 0.36231884057971014

[get

[get text-to-tick ratio] ticks_diff: [43.0, 43.0, 41.0, 43.0, 43.0, 43.0, 42.0, 40.0], text_diff: [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0]
[reject_outliers] ticks_diff: [43. 43. 43. 43. 43. 42.], text_diff: [5. 5. 5. 5. 5. 5. 5. 5.]
[getYVal] legends: []
[1008] path: Alper2019_Article_HydrothermalAndSupercriticalEt-Figure7-1.png, ratio: 0.11673151750972761

[get text-to-tick ratio] ticks_diff: [48.0, 48.0, 45.0, 49.0, 47.0, 47.0, 46.0], text_diff: [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0]
[reject_outliers] ticks_diff: [48. 48. 47. 47. 46.], text_diff: [5. 5. 5. 5. 5. 5. 5.]
[getYVal] legends: []
[1009] path: Alper2019_Article_HydrothermalAndSupercriticalEt-Figure8-1.png, ratio: 0.1059322033898305

[get text-to-tick ratio] ticks_diff: [43.0, 46.0, 132.0, 45.0], text_diff: [10.0, 30.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [43. 46. 45.], text_diff: [10. 10. 10.]
[getYVal] legends: []
[1010] path: apj.2353-Figure1-1.png, ratio: 0.22388059701492538

[get text-to-tick ratio] ticks_diff: [222.

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[1033] path: c3ra23453c-Figure3-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [41.0, 40.0, 42.0, 40.0, 39.0, 83.0], text_diff: [20.0, 10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [41. 40. 42. 40. 39.], text_diff: [10. 10. 10. 10. 10.]
[getYVal] legends: ['Alkanes', 'Unsaturated bond/heteroatoms (3.0-1.5 ppm)', 'Alcohols. Methylene-Dibenzene (4.4-3.0 ppm)', 'Methoxy, bhydrates (6.0-4.4 ppm)', 'Aromatics/Hetero-Aromatics (8.0-6.0 ppm)']
[1034] path: c3ra46607h-Figure7-1.png, ratio: 0.24752475247524752

[get text-to-tick ratio] ticks_diff: [24.0, 145.0, 24.0, 24.0], text_diff: [10.0, 10.0, 60.0, 10.0]
[reject_outliers] ticks_diff: [24. 24. 24.], text_diff: [10. 10. 10.]
[getYVal] legends: ['Carbon', 'Nitrogen']
[1035] path: C4RA11662C-Figure2-1.png, ratio: 0.4166666666666667

[get text-to-tick ratio] ticks_diff: [60.0], text_diff: [20.0]
[re

[get text-to-tick ratio] ticks_diff: [41.0, 44.0, 39.0, 41.0], text_diff: [12.0, 20.0, 40.0, 20.0]
[reject_outliers] ticks_diff: [41. 41.], text_diff: [20. 20.]
[getYVal] legends: ['Solids', 'Aqueous', 'Oil', 'Gas', 'Loss']
[1059] path: C7SE00090A-Figure2-1.png, ratio: 0.4878048780487805

[get text-to-tick ratio] ticks_diff: [37.0, 35.0, 36.0, 33.0, 38.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [37. 35. 36.], text_diff: [10. 10. 10. 10. 10.]
[getYVal] legends: ['Bio-crude', 'No catalyst', 'Niw', 'NiMo-LL', 'ONiMo-HL']
[1060] path: C7SE00090A-Figure6-1.png, ratio: 0.2777777777777778

[get text-to-tick ratio] ticks_diff: [46.0, 49.0, 47.0, 48.0, 47.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [47. 48. 47.], text_diff: [10. 10. 10. 10. 10.]
[getYVal] legends: ['33.72 33.50 33.32', '34 33.11', 'HHV']
[1061] path: C8RA07090C-Figure2-1.png, ratio: 0.2112676056338028

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[1087] path: ef201417e-Figure7-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[1088] path: ef201417e-Figure9-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [48.0, 50.0, 99.0, 50.0, 49.0, 49.0], text_diff: [10.0, 10.0, 10.0, 20.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [48. 50. 50. 49. 49.], text_diff: [10. 10. 10. 10. 10.]
[getYVal] legends: ['4.89 3.07']
[1089] path: ef201966w-Figure5-1.png, ratio: 0.2032520325203252

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[1090] path: ef300391b-Figure1-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[1091] path: ef300391b-Figure2-1.png, ratio

[get text-to-tick ratio] ticks_diff: [26.0, 27.0, 29.0, 27.0, 28.0, 28.0, 28.0, 27.0], text_diff: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
[reject_outliers] ticks_diff: [27. 27. 28. 28. 28. 27.], text_diff: [1. 1. 1. 1. 1. 1. 1. 1.]
[getYVal] legends: []
[1122] path: ef5018708-Figure5-1.png, ratio: 0.03636363636363636

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[1123] path: ef502574b-Figure6-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[1124] path: ef502773w-Figure2-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [39.0, 38.0, 39.0, 39.0], text_diff: [5.0, 5.0, 5.0, 5.0]
[reject_outliers] ticks_diff: [39. 39. 39.], text_diff: [5. 5. 5. 5.]
[getYVal] legends: ['CH4', 'H2']
[1125] path: ef700497d-Figure6-1.png, ratio: 0.1282051282051282

[get text-to-tick ratio] ticks_diff: [80.0, 160.0, 82.0], text_dif

[get text-to-tick ratio] ticks_diff: [27.0, 31.0, 28.0, 31.0, 177.0, 27.0], text_diff: [10.0, 60.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [27. 31. 28. 31. 27.], text_diff: [10. 10. 10. 10. 10.]
[getYVal] legends: []
[1149] path: ep.12490-Figure2-1.png, ratio: 0.3472222222222222

[get text-to-tick ratio] ticks_diff: [31.0, 29.0, 207.0], text_diff: [14.0, 2.0, 2.0]
[reject_outliers] ticks_diff: [31. 29.], text_diff: [2. 2.]
[getYVal] legends: ['SPME', 'DAE']
[1150] path: ep.12490-Figure4-1.png, ratio: 0.06666666666666667

[get text-to-tick ratio] ticks_diff: [27.0, 28.0, 28.0, 28.0, 28.0, 27.0, 28.0, 29.0, 26.0], text_diff: [0.1, 0.1, 0.09999999999999998, 0.10000000000000003, 0.09999999999999998, 0.09999999999999998, 0.09999999999999998, 0.10000000000000009, 0.09999999999999998]
[reject_outliers] ticks_diff: [27. 28. 28. 28. 28. 27. 28.], text_diff: [0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1]
[getYVal] legends: ['Liquid', 'Gas']
[1151] path: ep.12713-Figure10-1.png, ratio: 0.003608

[get text-to-tick ratio] ticks_diff: [38.0, 40.0, 37.0, 39.0, 39.0, 38.0, 39.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [38. 39. 39. 38. 39.], text_diff: [10. 10. 10. 10. 10. 10. 10.]
[getYVal] legends: ['No H2', 'High-Pressurel H2']
[1177] path: ie100758s-Figure2-1.png, ratio: 0.25906735751295334

[get text-to-tick ratio] ticks_diff: [57.0, 54.0, 54.0, 55.0], text_diff: [20.0, 20.0, 20.0, 20.0]
[reject_outliers] ticks_diff: [54. 54. 55.], text_diff: [20. 20. 20. 20.]
[getYVal] legends: ['N2', 'C2H6', 'C2H4', 'CH4', '0CO2']
[1178] path: ie100758s-Figure8-1.png, ratio: 0.36809815950920244

[get text-to-tick ratio] ticks_diff: [53.0, 162.0, 54.0], text_diff: [20.0, 60.0, 20.0]
[reject_outliers] ticks_diff: [53. 54.], text_diff: [20. 20.]
[getYVal] legends: ['C2H6', 'BCH4', 'C02']
[1179] path: ie100758s-Figure9-1.png, ratio: 0.37383177570093457

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: 

[get text-to-tick ratio] ticks_diff: [], text_diff: []
[reject_outliers] ticks_diff: [], text_diff: []
[getYVal] legends: []
[1206] path: Onoja2019_Article_OilPalmElaeisGuineensisBiomass-Figure3-1.png, ratio: nan

[get text-to-tick ratio] ticks_diff: [50.0, 51.0, 51.0, 53.0], text_diff: [5.0, 5.0, 5.0, 5.0]
[reject_outliers] ticks_diff: [51. 51.], text_diff: [5. 5. 5. 5.]
[getYVal] legends: []
[1207] path: Onwudili2014_Chapter_HydrothermalGasificationOfBiom-Figure10.10-1.png, ratio: 0.09803921568627451

[get text-to-tick ratio] ticks_diff: [51.0, 51.0, 51.0, 53.0, 51.0, 52.0], text_diff: [5.0, 5.0, 5.0, 5.0, 5.0, 5.0]
[reject_outliers] ticks_diff: [51. 51. 51. 51. 52.], text_diff: [5. 5. 5. 5. 5. 5.]
[getYVal] legends: []
[1208] path: Onwudili2014_Chapter_HydrothermalGasificationOfBiom-Figure10.11-1.png, ratio: 0.09765625

[get text-to-tick ratio] ticks_diff: [63.0, 62.0, 62.0, 63.0, 61.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [62. 62.], text_diff: [1

[get text-to-tick ratio] ticks_diff: [59.0, 57.0, 60.0, 60.0, 59.0], text_diff: [10.0, 10.0, 10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [59. 60. 60. 59.], text_diff: [10. 10. 10. 10. 10.]
[getYVal] legends: ['THERMAL', 'KOH', 'K ,CO, 2']
[1233] path: Singh2015_Article_HydrothermalLiquefactionOfAgri-Figure5-1.png, ratio: 0.16806722689075632

[get text-to-tick ratio] ticks_diff: [258.0], text_diff: [40.0]
[reject_outliers] ticks_diff: [258.], text_diff: [40.]
[getYVal] legends: ['THERMAL', 'KOH', 'K K,CO,']
[1234] path: Singh2015_Article_HydrothermalLiquefactionOfAgri-Figure6-1.png, ratio: 0.15503875968992248

[get text-to-tick ratio] ticks_diff: [70.0, 72.0, 72.0], text_diff: [10.0, 10.0, 10.0]
[reject_outliers] ticks_diff: [72. 72.], text_diff: [10. 10. 10.]
[getYVal] legends: ['THERMAL', 'KOH', 'K,CO,']
[1235] path: Singh2015_Article_HydrothermalLiquefactionOfAgri-Figure7-1.png, ratio: 0.1388888888888889

[get text-to-tick ratio] ticks_diff: [256.0], text_diff: [40.0]
[reject_out

In [None]:
workbook = xlsxwriter.Workbook('../results/FigureData1.xlsx')

for index, path in enumerate(Path(img_dir).iterdir()):
    if path.name.endswith('.png') or path.name.endswith('.jpg') or path.name.endswith('.jpeg'):
        if path.name in yValueDict:
            filepath = img_dir + "/" + path.name
            print("[{0}] path: {1}".format(index, path.name))
            
            image = cv2.imread(filepath)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            
            height, width, channels = image.shape
            xaxis, yaxis = detectAxes(filepath)
            y_text = []

            for (x1, y1, x2, y2) in [xaxis]:
                xaxis = (x1, y1, x2, y2)

            for (x1, y1, x2, y2) in [yaxis]:
                yaxis = (x1, y1, x2, y2)
                
            image_text = images_text[path.name]
            image, x_labels, y_labels, y_labels_list, y_text_list, legends, _ = getProbableLabels(image,
                                                                                                  image_text,
                                                                                                  xaxis,
                                                                                                  yaxis)
            
            # Sort bounding rects by y coordinate
            def getYFromRect(item):
                return item[1][1]

            y_labels_list.sort(key = getYFromRect)
            y_text_list.sort(key = getYFromRect, reverse=True)
            
            for text, (textx, texty, w, h) in y_text_list:
                y_text.append(text)

            # Append doi values for each image.
            if path.name[:-16]+'.pdf' in doidata['Dois']:
                doi = doidata['Dois'][path.name[:-16]+'.pdf']
            else:
                doi = ''
             
            # Write to Excel
            worksheet = workbook.add_worksheet()            
            
            addToExcel("doi", doi, 0)
            addToExcel("file name", [path.name], 1)
            addToExcel("x-labels", x_labels, 2)
            addToExcel("y-text", y_text, 3)
            addToExcel("y-labels", y_labels, 4)
            addToExcel("legends", legends, 5)
            
            for col_num, data in enumerate(yValueDict[path.name]):
                print(col_num, data)
                if type(data) != list:
                    worksheet.write(9 + col_num, 0, "Y-values:" + "\n\n" + str(data))
                else:
                    for pos, d in enumerate(data):
                        worksheet.write(9 + col_num, pos, str(d))

            # Print the output here!
            print("file name    :  ", path.name)
            print("doi          :  ", doi)
            print("x-labels     :  ", x_labels)
            print("y-text       :  ", y_text)
            print("y-labels     :  ", y_labels)
            print("legends      :  ", legends)
            print("Y- Values    :  ", ' '.join([str(elem) for elem in yValueDict[path.name]]), end = "\n\n")

            # Insert the image
            worksheet.insert_image('J21', filepath)

# Close the excel workbook!
workbook.close()