Skip to content

Commit

Permalink
#120 :: Refactor number recognition to use new DotToDotImage class
Browse files Browse the repository at this point in the history
  • Loading branch information
JackBuck committed Mar 22, 2017
1 parent 9ed490b commit df3663a
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 85 deletions.
174 changes: 131 additions & 43 deletions roboplot/dottodot/number_recognition.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import PIL.Image as Image
import re
import warnings

import cv2
import numpy as np
Expand All @@ -23,61 +22,148 @@ def __init__(self, numeric_value: int, dot_location_yx: tuple):


class DotToDotImage:
@staticmethod
def process_file(file_path: str):
img = read_image(file_path)
return DotToDotImage.process_image(img)
"""A class to process dot-to-dot images."""

@staticmethod
def process_image(img: np.ndarray) -> tuple:
def load_image_from_file(file_path: str):
"""
A factory method to process an image as a dot-to-dot image.
Load an image from a supplied file path.
Args:
img (np.ndarray): the image to process
file_path (str): path to the image to return
Returns:
tuple[Number, DotToDotImage]: The first return value contains the number closest to the centre.
The second return value contains the DotToDotImage instance used to analyse
the number. This contains references to intermediate results which can be
useful when debugging.
DotToDotImage: the loaded (unprocessed) image
"""

# TODO: Refactor these into member methods which act in sequence on a single member variable img,
# but which also save their results to other member variables for debugging purposes.
dot2dot_img = DotToDotImage(img)
dot2dot_img.clean_image = _clean_image(dot2dot_img.original_image)

dot2dot_img.spots = _extract_spots_from_clean_image(dot2dot_img.clean_image)

dot2dot_img.central_contours = _extract_contours_close_to(dot2dot_img.clean_image,
dot2dot_img.closest_spot_to_centre.pt,
maximum_pixels_between_contours=9)
dot2dot_img.masked_image = dot2dot_img.clean_image.copy()
_mask_with_contours(dot2dot_img.masked_image, dot2dot_img.central_contours)

# TODO: Finish refactoring the functions into this class
img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
if img is not None:
return DotToDotImage(img)
else:
raise TypeError("Could not open image file: {}".format(file_path))

def __init__(self, original_img):
"""
Use one of the static factory methods provided, as opposed to this initialiser.
Create an unprocessed dot-to-dot image.
Args:
original_img (np.ndarray): the image to proccess
"""
self.original_image = original_img
self.clean_image = None
self.spots = None
self.central_contours = None
self.masked_image = None
self._img = original_img
self.original_image = self._img.copy()

def process_image(self) -> Number:
"""
Process the dot-to-dot image.
@property
def closest_spot_to_centre(self):
if self.spots is None or len(self.spots) == 0:
return None
Returns:
Number: the number whose spot is closest to the centre of the image
"""
self._clean_image()
self._extract_spots()
self._find_closest_spot_to_centre()
self._extract_central_contours(maximum_pixels_between_contours=9)
self._mask_using_central_contours()
self._rotate_centre_spot_to_bottom_right()
self._recognise_number_text()
self._extract_number_from_recognised_text()
return Number(self.recognised_numeric_value, self.centre_spot.pt)

def _clean_image(self):
self._img = cv2.medianBlur(self._img, ksize=3)
self._img = cv2.adaptiveThreshold(self._img, maxValue=255, adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
thresholdType=cv2.THRESH_BINARY, blockSize=11, C=2)
self.clean_image = self._img.copy()

def _extract_spots(self):
# Dilate and Erode to 'clean' the spot (note that this harms the number itself, so we only do it to extract spots
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
img = cv2.dilate(self._img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)

# Perform a simple blob detect
params = cv2.SimpleBlobDetector_Params()
params.filterByArea = True
params.minArea = 20 # The dot in 20pt font has area of about 30
params.filterByCircularity = True
params.minCircularity = 0.7
params.filterByConvexity = True
params.minConvexity = 0.8
params.filterByInertia = True
params.minInertiaRatio = 0.6
detector = cv2.SimpleBlobDetector_create(params)
self.spot_keypoints = detector.detect(img)

def _find_closest_spot_to_centre(self):
if self.spot_keypoints is None or len(self.spot_keypoints) == 0:
self.centre_spot = None
else:
image_centre = np.array(self.original_image.shape) / 2
return min(self.spots, key=lambda s: np.linalg.norm(s.pt - image_centre))
self.centre_spot = min(self.spot_keypoints, key=lambda s: np.linalg.norm(s.pt - image_centre))

def _extract_central_contours(self, maximum_pixels_between_contours: float):
self.central_contours = None
if self.centre_spot is not None:
self.central_contours = self._extract_contours_close_to(self.centre_spot.pt,
maximum_pixels_between_contours)

def _extract_contours_close_to(self, target_point, maximum_pixels_between_contours: float):
img_inverted = 255 - self._img
_, all_contours, _ = cv2.findContours(img_inverted, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_SIMPLE)

def dist_between_contours(cnt1, cnt2):
return min([min(np.linalg.norm(cnt1 - pt, axis=2)) for pt in cnt2])

# all_contours = [cv2.convexHull(c, returnPoints=True) for c in all_contours]

target_point_as_contour = np.reshape(target_point, (-1, 1, 2))
contours_near_target = [target_point_as_contour]

still_adding_contours = True
while still_adding_contours:
still_adding_contours = False

for i in reversed(range(len(all_contours))):
dist_from_central_contours = min(
[dist_between_contours(all_contours[i], c) for c in contours_near_target])
if dist_from_central_contours <= maximum_pixels_between_contours:
contours_near_target.append(all_contours.pop(i))
still_adding_contours = True

return contours_near_target[1:]

def _mask_using_central_contours(self):
if self.central_contours is not None:
self._img = self._mask_using_contours(self.central_contours)
self.masked_image = self._img.copy()

def _mask_using_contours(self, contours):
img = self._img.copy()
mask = np.zeros(img.shape, np.uint8)
cv2.drawContours(mask, contours, contourIdx=-1, color=255, thickness=-1)
img[np.where(mask == 0)] = 255
return img

def _rotate_centre_spot_to_bottom_right(self):
self.rotated_image = None
if self.centre_spot is not None:
current_angle = _estimate_degrees_from_number_centre_to_spot(self._img, self.centre_spot)
desired_angle = -30
self._img = _rotate_image(desired_angle - current_angle, self._img)
self.rotated_image = self._img.copy()

def _recognise_number_text(self):
img = Image.fromarray(self._img)

# psm 8 => single word;
# digits => use the digits config file supplied with the software
self.recognised_text = pytesseract.image_to_string(img, config='-psm 8, digits')

def _extract_number_from_recognised_text(self):
# Forcing a terminating period helps us to filter out bad results
match = re.match(r'(\d+)\.$', self.recognised_text)
self.recognised_numeric_value = None
if match is not None:
self.recognised_numeric_value = int(match.group(1))


def read_image(file_path: str) -> np.ndarray:
Expand Down Expand Up @@ -282,13 +368,15 @@ def draw_image_with_keypoints(img, keypoints, window_title="Image with keypoints
# cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS ensures the size of the circle corresponds to the size of blob
img_with_keypoints = cv2.drawKeypoints(img, keypoints, outImage=np.array([]), color=(0, 0, 255),
flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
draw_image(img_with_keypoints, window_title)

cv2.imshow(window_title, img_with_keypoints)
cv2.waitKey(0)


def draw_image_with_contours(img, contours):
def draw_image_with_contours(img, contours, window_title = "Image with contours"):
img_colour = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
cv2.drawContours(img_colour, contours, contourIdx=-1, color=(0, 0, 255), thickness=1)
cv2.imshow("Image with contours", img_colour)
draw_image(img_colour, window_title)


def draw_image(img, window_title="Image"):
cv2.imshow(window_title, img)
cv2.waitKey(0)
2 changes: 1 addition & 1 deletion scripts/recognise_all_numbers
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ fi
for i in "$@"
do
echo `basename $i .jpg`
$script_directory/recognise_number.py $i -r || { echo ERROR: recognise_number.py failed; exit 1; }
$script_directory/recognise_number.py $i || { echo ERROR: recognise_number.py failed; exit 1; }
echo
done
23 changes: 10 additions & 13 deletions scripts/recognise_number.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,18 @@
# Commandline arguments
parser = argparse.ArgumentParser(description='Recognise a supplied number.')
parser.add_argument('input_file', type=str, help='the path to the file containing the number to recognise.')
parser.add_argument('-r', '--possibly-rotated', action='store_true',
help='set this flag to indicate that the number could be rotated')
parser.add_argument('-d', '--display-images', action='store_true', help='display intermediate results.')
args = parser.parse_args()

# Load the image
img = number_recognition.read_image(args.input_file)

# Extract the numeric value
if args.possibly_rotated:
try:
recognised_number = number_recognition.recognise_rotated_number(img)
except ValueError:
recognised_number = "No spot!!"
else:
recognised_number = number_recognition.recognise_number(img)
# Load and Process the image
img = number_recognition.DotToDotImage.load_image_from_file(args.input_file)
recognised_number = img.process_image()

print("Recognised number: {!r}".format(recognised_number.numeric_value))
print("Probable spot location: {!r}".format(recognised_number.dot_location_yx))

# Display images
if args.display_images:
number_recognition.draw_image_with_keypoints(img.clean_image, [img.centre_spot], "Clean with centre spot")
number_recognition.draw_image_with_contours(img.clean_image, img.central_contours, "Clean with centre contours")
number_recognition.draw_image(img.masked_image, "Masked image")
32 changes: 4 additions & 28 deletions testing/test_number_recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,37 +9,13 @@

import context
import roboplot.config as config
import roboplot.dottodot.number_recognition as number_recognition
from roboplot.dottodot.number_recognition import DotToDotImage


class NumberRecognitionRegressionTests(unittest.TestCase):
test_data_directory = os.path.join(config.test_data_dir, 'number_recognition')

def test_basic_number_recognition(self):
"""Regression test number recognition on images which don't require rotation."""
file_glob = os.path.join(self.test_data_directory, '*.jpg')
for img_file in glob.glob(file_glob):
filename = os.path.basename(img_file)
file_name_match = re.match(
r'(?P<numeric_value>\d+)_(?P<fontsize>\d+)pt_(?P<angle>\d+)deg_y(?P<spot_y>\d+)_x(?P<spot_x>\d+)',
filename)
if float(file_name_match.group('angle')) == 0:
with self.subTest(filename=filename):
# Perform the number recognition
img = number_recognition.read_image(img_file)
number = number_recognition.recognise_number(img)

# Extract expected results
expected_number = int(file_name_match.group('numeric_value'))
expected_spot_location = (int(file_name_match.group('spot_y')),
int(file_name_match.group('spot_x')))

# Compare
self.assertEqual(number.numeric_value, expected_number)
self.assertAlmostEqual(number.dot_location_yx[0], expected_spot_location[0], delta=2)
self.assertAlmostEqual(number.dot_location_yx[1], expected_spot_location[1], delta=2)

def test_rotated_number_recognition(self):
def test_on_selection_of_sizes(self):
"""Regression test number recognition on potentially rotated images."""
file_glob = os.path.join(self.test_data_directory, '*.jpg')
for img_file in glob.glob(file_glob):
Expand All @@ -49,8 +25,8 @@ def test_rotated_number_recognition(self):
filename)
with self.subTest(filename = filename):
# Perform the number recognition
img = number_recognition.read_image(img_file)
number = number_recognition.recognise_rotated_number(img)
img = DotToDotImage.load_image_from_file(img_file)
number = img.process_image()

# Extract expected results
expected_number = int(file_name_match.group('numeric_value'))
Expand Down

0 comments on commit df3663a

Please sign in to comment.