In [1]:
# Using OpenCV template matching to find each individual market line
import cv2
import numpy as np
import pytesseract as tess
from PIL import Image as im


In [2]:
# Importing images
market_img = cv2.imread('cache/buy-orders/green_wood.png')
order_img = cv2.imread('resources/order_green_wood.png')

# Will be filled with images found
found_imgs = []

In [3]:
# # Display market image for debugging
# cv2.imshow('Market', order_img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [4]:
# Seartching for orders in market image

# There are 6 comparison methods to choose from:
# TM_CCOEFF, TM_CCOEFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_SQDIFF, TM_SQDIFF_NORMED
# You can see the differences at a glance here:
# https://docs.opencv.org/master/d4/dc6/tutorial_py_template_matching.html
result = cv2.matchTemplate(market_img, order_img, cv2.TM_CCOEFF_NORMED)

In [5]:
# Strictness of matching
threshold = 0.65

# Size of box to draw around matches
w = order_img.shape[1]
h = order_img.shape[0]

# Drawing a box around all matching objects?
yloc, xloc = np.where(result >= threshold)

In [6]:
# Drawing box around each found location, also cropping found boxes and adding to found_imgs array
for (x, y) in zip(xloc, yloc):
    cv2.rectangle(market_img, (x, y), (x + w, y + h), (0,255,255), 2)
    found_imgs.append(market_img[y:y+h, x:x+w])

In [7]:
# # Showing market image after rectangles are added
cv2.imshow('Market', market_img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [8]:
# # Showing one order from the orders found
# cv2.imshow('Order', found_imgs[0])
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [9]:
len(found_imgs)

11

In [10]:
# This whole block is temporary to find best methods
# Getting df from a single image within found images

# Changing data type from array to PIL Image object
temp_image = im.fromarray(found_imgs[0])

# Getting the size of the current object
w, h = temp_image.size

# Upscaling the image at same aspect ratio to improve tesseract accuracy
w *= 2
h *= 2

# Creating tuple that must be passed into resize function
newsize = (w, h)

# Resizing image using Bicubic upscaling
# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#PIL.Image.BICUBIC
temp_image = temp_image.resize(newsize, resample=3)

# Extracting text from upscaled image
text_df = tess.image_to_data(temp_image, output_type=tess.Output.DATAFRAME)

# What to do with that text
text_df = text_df.drop(columns=['page_num'])

# Dropping any rows with a null value extracted
text_df = text_df.dropna()

text_df


Unnamed: 0,level,block_num,par_num,line_num,word_num,left,top,width,height,conf,text
4,5,1,1,1,1,0,0,2402,42,95.0,
8,5,2,1,1,1,24,24,98,94,79.685265,~
9,5,2,1,1,2,131,48,10,6,0.0,~
10,5,2,1,1,3,156,59,81,24,93.16613,Green
11,5,2,1,1,4,245,59,84,24,96.23822,Wood
12,5,2,1,1,5,490,30,58,57,6.822617,¢
13,5,2,1,1,6,818,45,58,49,96.404366,0.53
14,5,2,1,1,7,1001,56,8,28,79.053375,|
15,5,2,1,1,8,1119,73,21,7,12.732323,=
16,5,2,1,1,9,1241,71,21,4,72.891159,=


In [11]:
# This whole block is temporary to find best methods
# Getting df from a single image within found images
# Duplicate of above block, used to compare found data

# Changing data type from array to PIL Image object
temp_image = im.fromarray(found_imgs[1])

# Getting the size of the current object
w, h = temp_image.size

# Upscaling the image at same aspect ratio to improve tesseract accuracy
w *= 2
h *= 2

# Creating tuple that must be passed into resize function
newsize = (w, h)

# Resizing image using Bicubic upscaling
# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#PIL.Image.BICUBIC
temp_image = temp_image.resize(newsize, resample=3)

# Extracting text from upscaled image
text_df2 = tess.image_to_data(temp_image, output_type=tess.Output.DATAFRAME)

# What to do with that text
text_df2 = text_df2.drop(columns=['page_num'])

# Dropping any rows with a null value extracted
text_df2 = text_df2.dropna()

text_df2

Unnamed: 0,level,block_num,par_num,line_num,word_num,left,top,width,height,conf,text
4,5,1,1,1,1,0,0,2402,8,95.0,
8,5,2,1,1,1,31,28,11,14,26.111977,P/
10,5,2,1,2,1,30,27,86,77,49.790382,Pa
11,5,2,1,2,2,157,60,79,22,96.167603,Green
12,5,2,1,2,3,246,60,81,22,96.51091,Wood
13,5,2,1,2,4,819,56,57,28,96.601486,0.53
14,5,2,1,2,5,1002,56,7,28,69.625992,|
15,5,2,1,2,6,1119,75,21,4,46.137878,-
16,5,2,1,2,7,1242,72,20,2,66.149323,=
17,5,2,1,2,8,1376,72,22,4,54.356438,-


In [12]:
# The prices seems to always correlate with the left value being 817.
# This may be a problematic solution, but I believe if the pixel value of the images stay consistent so will this value.

price = text_df2.loc[text_df2['left'] == 817, ['text']]

price = float(price.iat[0, 0])

price



IndexError: index 0 is out of bounds for axis 0 with size 0

In [None]:
# Iterates through each found image and reads image.

prices = []

for img in found_imgs:
    
    # Changing data type from array to PIL Image object
    img = im.fromarray(img)

    # Getting the size of the current object
    w, h = img.size

    # Upscaling the image at same aspect ratio to improve tesseract accuracy
    w *= 2
    h *= 2

    # Creating tuple that must be passed into resize function
    newsize = (w, h)

    # Resizing image using Bicubic upscaling
    # https://pillow.readthedocs.io/en/stable/handbook/concepts.html#PIL.Image.BICUBIC
    img = img.resize(newsize, resample=3)

    # Extracting text from upscaled image
    text_df = tess.image_to_data(img, output_type=tess.Output.DATAFRAME)
    
    # Extracting price from the dataframe
    price = text_df.loc[text_df['left'] == 817, ['text']]
    
    # Takes extracted price and converts from dataframe to float
    price = float(price.iat[0, 0])
    
    # Adding found price to list of prices
    prices.append(price)

    # Dropping any rows with a null value extracted
    text_df = text_df.dropna()


In [None]:
prices