In [None]:
import os
import re
import cv2
import glob
import time
import hashlib
import collections
import matplotlib.pyplot as plt

In [None]:
def matchCrop(imageObject, cropObject):

    threshold = 0.8
    
    imageObject = cv2.GaussianBlur(imageObject, (5,5), 0)
    cropObject = cv2.GaussianBlur(cropObject, (5,5), 0)
    
    image = cv2.cvtColor(imageObject, cv2.COLOR_BGR2RGB)
    crop = cv2.cvtColor(cropObject, cv2.COLOR_BGR2RGB)

    plt.figure(figsize = (14, 8))
    plt.subplot(131)
    plt.title("Original image")
    plt.imshow(imageObject)
    plt.subplot(132)
    plt.title("Cropped image")
    plt.imshow(crop)

    plt.show()

    imageHeight, imageWidth = imageObject.shape[:2]
    cropHeight, cropWidth = cropObject.shape[:2]
    
    if imageHeight // cropHeight > 8 or imageWidth // cropWidth > 8:
        return -1
    
    q = collections.deque([[-2, 0, 0], [-2, 0, 0]], 2)

    if imageWidth / imageHeight > cropWidth / cropHeight :
        cropObject = cv2.resize(cropObject, (int(cropWidth * imageHeight / cropHeight), imageHeight))
    else:
        cropObject = cv2.resize(cropObject, (imageWidth, int(cropHeight * imageWidth / cropWidth)))

    for scale in range(20):

        cropHeight, cropWidth = cropObject.shape[:2]

        if imageHeight // cropHeight > 8 or imageWidth // cropWidth > 8:
            break

        resultArray = cv2.matchTemplate(imageObject, cropObject, cv2.TM_CCOEFF_NORMED)

        plt.figure(figsize = (14 ,8))
        plt.title("Result array")
        plt.imshow(resultArray, cmap = 'gray')

        _, maxVal, _, maxLoc = cv2.minMaxLoc(resultArray)
        print("Maxmimum match: ", maxVal)
        print("Location of match: ", maxLoc) 

        bottomRight = (maxLoc[0] + cropWidth , maxLoc[1] + cropHeight)

        topLeft = maxLoc
        bottomRight = (maxLoc[0] + cropWidth, maxLoc[1] + cropHeight)

        cv2.rectangle(image, topLeft, bottomRight, 255, 2)
        plt.imshow(image)

        if maxVal > q[-1][0]:
            if maxVal > threshold:
                q.append([maxVal, maxLoc, bottomRight])
        else:
            break

        cropHeight = int(cropHeight * 0.86)
        cropWidth = int(cropWidth * 0.86)
        cropObject = cv2.resize(cropObject, (cropWidth, cropHeight))
        
    if q[-1][0] > threshold:
        return q[-1]
    else:
        return -1

In [None]:
def md5(fileName):
    hash_md5 = hashlib.md5()
    with open(fileName, "rb") as f:
        # import pdb;pdb.set_trace()
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()

In [None]:
def getUniqueCrops(cropsPath):
    
    uniqueCropsDict = {}

    for fileName in glob.glob(cropsPath):
        # import pdb;pdb.set_trace()
        checkSum = md5(fileName)

        if checkSum in uniqueCropsDict:
            uniqueCropsDict[checkSum].append(fileName)
        else:
            uniqueCropsDict[checkSum] = [fileName]
            
    return uniqueCropsDict
    
def getCropsAssociation(imagePath, keys):
    # import pdb;pdb.set_trace()
    matchesList = []    
    imageObject = cv2.imread(imagePath)

    for key in keys:
        cropObject = cv2.imread(uniqueCrops[key][0])
        result = matchCrop(imageObject, cropObject)

        if result != -1:
            maxVal, topLeft, bottomRight = result
            
            if key not in foundCrops:
                foundCrops.append(key)
                
            for cropImg in uniqueCrops[key]:
                cropImageName = cropImg.split('/')[1]
                matchesList.append((cropImageName, [topLeft[0], topLeft[1], bottomRight[0], bottomRight[1]]))
    
    return matchesList

In [None]:
# Main Cell

CROPSPATH = '/content/drive/MyDrive/crops/'
IMAGESPATH = '/content/drive/MyDrive/images/'

foundCrops = []
notFound = []
matchDict = {}

uniqueCrops = getUniqueCrops(CROPSPATH + '*')
# import pdb;pdb.set_trace()
keys = uniqueCrops.keys()

# Start time
t1 = time.time()

images = [path.split('images/')[1] for path in glob.glob(IMAGESPATH + '*')]

for index, imageName in enumerate(images):
    print(index)
    print(IMAGESPATH + imageName)
    matchDict[imageName] = getCropsAssociation(IMAGESPATH + imageName, keys)

for key in keys:
    if key not in foundCrops:
        # import pdb;pdb.set_trace()
        for path in uniqueCrops[key]:
            notFound.append((path.split('images/')[1],[]))
            
matchDict['na'] = notFound

# End time
t2 = time.time()

print("Total time ", t2-t1," secs")