In [11]:
# Using OpenCV template matching to find each individual market line
import cv2
import numpy as np
import pytesseract as tess
from PIL import Image as im


In [12]:
# Importing images
market_img = cv2.imread('cache/buy-orders/green_wood.png')
dashes_img = cv2.imread('resources/dashes.png')
order_img = cv2.imread('resources/order_green_wood.png')

# Will be filled with images found
found_orders = []

In [13]:
# # Display market image for debugging
# cv2.imshow('Market', dashes_img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [14]:
# Seartching for dashes in market image

# There are 6 comparison methods to choose from:
# TM_CCOEFF, TM_CCOEFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_SQDIFF, TM_SQDIFF_NORMED
# You can see the differences at a glance here:
# https://docs.opencv.org/master/d4/dc6/tutorial_py_template_matching.html
result = cv2.matchTemplate(market_img, dashes_img, cv2.TM_CCOEFF_NORMED)

# Strictness of matching
threshold = 0.95

# Offsets to line box up with order line
# Calculated by hand by finding coords of order boxes when found with open-cv and dash boxes, drawing it out, then subtracting differences.
x_offset = 555
y_offset = 32

# Size of box to draw around matches
w = order_img.shape[1]
h = order_img.shape[0]

# Finding coordinates of where to draw box
yloc, xloc = np.where(result >= threshold)

# Drawing box around each found location, also cropping found boxes and adding to found_orders array
for (x, y) in zip(xloc, yloc):
    
    x -= x_offset
    y -= y_offset
    
    
    top_corner = (x, y)
    bottom_corner = (x+w, y+h)
    
    cv2.rectangle(market_img, top_corner, bottom_corner, (255,0,0), 2)
    found_orders.append(market_img[y:y+h, x:x+w])

In [15]:
len(found_orders)

8

In [16]:
# # Showing market image after rectangles are added
# cv2.imshow('Market', market_img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [17]:
# # Showing one order from the orders found
# cv2.imshow('Order', found_orders[0])
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [19]:
# This whole block is temporary to find best methods
# Getting df from a single image within found images

# Changing data type from array to PIL Image object
temp_image = im.fromarray(found_orders[1])

# Getting the size of the current object
w, h = temp_image.size

# Upscaling the image at same aspect ratio to improve tesseract accuracy
w *= 2
h *= 2

# Creating tuple that must be passed into resize function
newsize = (w, h)

# Resizing image using Bicubic upscaling
# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#PIL.Image.BICUBIC
temp_image = temp_image.resize(newsize, resample=3)

# Extracting text from upscaled image
text_df = tess.image_to_data(temp_image, output_type=tess.Output.DATAFRAME)

# Dropping any rows with a null value extracted
text_df = text_df.dropna()

text_df


Unnamed: 0,level,page_num,block_num,par_num,line_num,word_num,left,top,width,height,conf,text
4,5,1,1,1,1,1,0,0,2402,8,95.0,
8,5,1,2,1,1,1,24,23,98,95,61.54586,~
9,5,1,2,1,1,2,130,47,13,7,0.0,~
10,5,1,2,1,1,3,156,59,81,24,94.334473,Green
11,5,1,2,1,1,4,245,59,84,24,96.067886,Wood
12,5,1,2,1,1,5,490,30,58,60,58.531807,&
13,5,1,2,1,1,6,818,44,58,51,96.344368,0.53
14,5,1,2,1,1,7,1001,56,8,28,93.60788,I
15,5,1,2,1,1,8,1119,73,21,7,28.199379,=
16,5,1,2,1,1,9,1241,71,21,4,71.622574,=


In [20]:
# The prices seems to always correlate with the left value being above 810
# This may be a problematic solution, as it only gets the first value that's true. By limiting lower end we limit the range in which a false positive can appear.
# As long as this is balanced to still be below where price consistently appears everything should work.

price = text_df.loc[text_df['left'] > 810 , ['text']]

price = float(price.iat[0, 0])

price



0.53

In [21]:
# Iterates through each found image and reads image.

prices = []

for order in found_orders:
    
    # Changing data type from array to PIL Image object
    img = im.fromarray(order)

    # Getting the size of the current object
    w, h = img.size

    # Upscaling the image at same aspect ratio to improve tesseract accuracy
    w *= 2
    h *= 2

    # Creating tuple that must be passed into resize function
    newsize = (w, h)

    # Resizing image using Bicubic upscaling
    # https://pillow.readthedocs.io/en/stable/handbook/concepts.html#PIL.Image.BICUBIC
    img = img.resize(newsize, resample=3)

    # Extracting text from upscaled image
    text_df = tess.image_to_data(img, output_type=tess.Output.DATAFRAME)
    
    # Extracting price from the dataframe
    price = text_df.loc[text_df['left'] > 810, ['text']]
    
    # Takes extracted price and converts from dataframe to float
    price = float(price.iat[0, 0])
    
    # Adding found price to list of prices
    prices.append(price)

    # Dropping any rows with a null value extracted
    text_df = text_df.dropna()
    

In [26]:
# Ensuring prices list doesn't have any nan values
prices = [price for price in prices if price == price]
prices

[0.53, 0.53, 0.54, 0.54, 0.54, 0.68, 0.69]