# Proof of concept for template matching
## Goals
- Faster & accurate template matching
- Smaller size of load

In [52]:
import cv2 as cv
import os
from statistics import mean

In [53]:
HAYSTACK = "./img/2.jpg"
NEEDLE = "./img/avatars/char_120_hibisc.png"

In [54]:
def get_features(img):
    akaze = cv.AKAZE_create()

    keypoints, descriptor = akaze.detectAndCompute(img, None)

    return keypoints, descriptor

def brute_force_matcher(descriptor_query, descriptor_train, sort=False):
    matcher = cv.BFMatcher(cv.NORM_HAMMING, crossCheck=True)
    matches = matcher.match(descriptor_query, descriptor_train)

    if sort == True:
        matches = sorted(matches, key=lambda x:x.distance)
    
    return matches

def find_anchor(img):
    # find region of interest
    h_end = img.shape[0]
    h_start = int(h_end*0.65)

    region = img[h_start:h_end, :]
    feature = blur_and_canny(region)
    _, _, w, _ = find_box(feature)

    return w

# refer to line number 43 and 47
# should note that sliding window will process from the right half of the image then the left half
# since the process is done beginning from the center of the image, then if it were to be enumerated
# the 6th window will be 0th index and the 1st window is the 9th index
def sliding_window(img, stride):
    # crop into two halves
    left_img, right_img = crop_image_half(img)
    
    offset = 120 - stride

    if stride < 80: offset -= 20
    elif stride < 90: offset -= 15

    shift = stride + offset

    for i in range(5):
        yield right_img[:, shift*i:shift*(i+1)]

    _, wi = left_img.shape
    for i in range(5):
        # left[:, wi-shift*(i+1):wi-shift*i]
        yield left_img[:, wi-shift*(i+1):wi-shift*i]

def crop_image_half(img):
    _, w = img.shape

    left_half = img[:, 0:int(w/2)]
    right_half = img[:, int(w/2):w]

    return left_half, right_half

def resize_image(img, width=1280, height=720):
    resized = cv.resize(img, (width, height))
    
    return resized

def blur_and_canny(img):
    blurred = cv.GaussianBlur(img, (7,7), 1)
    canny = cv.Canny(blurred, 120, 255, 1)

    return canny

# return x, y, w, h of the biggest box
def find_box(img):
    contours = cv.findContours(img, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
    contours = contours[0] if len(contours) == 2 else contours[1]
    contours = sorted(contours, key=cv.contourArea , reverse=True)[:1]

    return cv.boundingRect(contours[0])


In [55]:
# gather all features
DIR = "./img/avatars"

keypoints, descriptors = [], []
for file in os.listdir(DIR):
    img = cv.imread(DIR + "/" + file, 0)
    kp, desc = get_features(img)
    keypoints.append(kp)
    descriptors.append(desc)

In [56]:
train_images = []
for i, file in enumerate(os.listdir("./img")):
    if file == "avatars": 
        continue

    im = "./img/" + file
    print(im)
    img = cv.imread(im, 0)
    img = resize_image(img)
    train_images.append(img)

./img/1.jpeg
./img/10.jpeg
./img/2.jpg
./img/3.jpg
./img/4.jpg
./img/5.jpg
./img/6.png
./img/7.jpg
./img/8.png
./img/9.jpg


In [57]:
# gather all features
DIR = "./img/avatars"

operators = {}
for file in os.listdir(DIR):
    operators.setdefault(file[:-4], {})

    img = cv.imread(DIR + "/" + file, 0)
    kp, desc = get_features(img)

    operators[file[:-4]]["name"] = file[:-4]
    operators[file[:-4]]["keypoints"] = [i.pt for i in kp]
    operators[file[:-4]]["descriptors"] = desc


In [59]:
train_img = train_images[9]
anchor = find_anchor(train_img)
generators = sliding_window(train_img, anchor)

for i, window in enumerate(generators):
    kp, desc = get_features(window)

    prediction = {"name": "", "min": 0}
    for name, operator in operators.items():
        matches = brute_force_matcher(operator["descriptors"], desc, sort=True)

        temp = [i.distance for i in matches[:10]]

        if prediction["min"] == 0:
            prediction["min"] = mean(temp)
        elif prediction["min"] > mean(temp):
            prediction["min"] = mean(temp)
            prediction["name"] = name
    
    print(f"window {i} = {prediction['name']} ({prediction['min']})")

window 0 = char_378_asbest (38.1)
window 1 = char_120_hibisc (59.5)
window 2 = char_282_catap (46.1)
window 3 = char_120_hibisc (55.5)
window 4 = char_123_fang (48.1)
window 5 = char_209_ardign (54.9)
window 6 = char_423_blemsh (62.7)
window 7 = char_452_bstalk (53.5)
window 8 = char_149_scave (73.0)
window 9 = char_479_sleach (41.0)


## Problem
- Object detection is expensive
- Uploaded images will have various sizes

## Solution
- Sliding window with anchor based on region of interest

## Consideration
- Currently the performance is ~500ms per image