In [1]:
# Using OpenCV template matching to find each individual market line
import cv2
import numpy as np
import pytesseract as tess
from PIL import Image as im


In [2]:
# Importing images
market_img = cv2.imread('cache/buy-orders/green_wood.png')
order_img = cv2.imread('resources/order_green_wood.png')

# Will be filled with images found
found_imgs = []

In [3]:
# # Display market image for debugging
# cv2.imshow('Market', order_img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [4]:
# Seartching for orders in market image

# There are 6 comparison methods to choose from:
# TM_CCOEFF, TM_CCOEFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_SQDIFF, TM_SQDIFF_NORMED
# You can see the differences at a glance here:
# https://docs.opencv.org/master/d4/dc6/tutorial_py_template_matching.html
result = cv2.matchTemplate(market_img, order_img, cv2.TM_CCOEFF_NORMED)

In [5]:
# Strictness of matching
threshold = 0.75

# Size of box to draw around matches
w = order_img.shape[1]
h = order_img.shape[0]

# Drawing a box around all matching objects?
yloc, xloc = np.where(result >= threshold)

In [6]:
# Drawing box around each found location, also cropping found boxes and adding to found_imgs array
for (x, y) in zip(xloc, yloc):
    cv2.rectangle(market_img, (x, y), (x + w, y + h), (0,255,255), 2)
    found_imgs.append(market_img[y:y+h, x:x+w])

In [7]:
# # Showing market image after rectangles are added
# cv2.imshow('Market', market_img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [8]:
# # Showing one order from the orders found
# cv2.imshow('Order', found_imgs[0])
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [13]:
# This whole block is temporary to find best methods
# Getting df from a single image within found images

# Changing data type from array to PIL Image object
temp_image = im.fromarray(found_imgs[3])

# Getting the size of the current object
w, h = temp_image.size

# Upscaling the image at same aspect ratio to improve tesseract accuracy
w *= 2
h *= 2

# Creating tuple that must be passed into resize function
newsize = (w, h)

# Resizing image using Bicubic upscaling
# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#PIL.Image.BICUBIC
temp_image = temp_image.resize(newsize, resample=3)

# Extracting text from upscaled image
text_df = tess.image_to_data(temp_image, output_type=tess.Output.DATAFRAME)

# Dropping any rows with a null value extracted
text_df = text_df.dropna()


In [16]:
# Iterates through each found image and reads image.

for img in found_imgs:
    
    # Changing data type from array to PIL Image object
    img = im.fromarray(img)

    # Getting the size of the current object
    w, h = img.size

    # Upscaling the image at same aspect ratio to improve tesseract accuracy
    w *= 2
    h *= 2

    # Creating tuple that must be passed into resize function
    newsize = (w, h)

    # Resizing image using Bicubic upscaling
    # https://pillow.readthedocs.io/en/stable/handbook/concepts.html#PIL.Image.BICUBIC
    img = img.resize(newsize, resample=3)

    # Extracting text from upscaled image
    text_df = tess.image_to_data(img, output_type=tess.Output.DATAFRAME)

    # Dropping any rows with a null value extracted
    text_df = text_df.dropna()
