# C1 - Content Based Image Retrieval
### Team 8 - Week 3

In [1]:
import re, os, glob, math, tqdm, pickle, itertools
import matplotlib.pyplot as plt

import numpy as np
import cv2

import pytesseract
from Levenshtein import distance as levenshtein_distance

import pywt
from skimage.feature import local_binary_pattern
from scipy.fftpack import dctn

import utils

#autoreload modules when code is run
%load_ext autoreload
%autoreload 2

# Path to the OCR executable
pytesseract.pytesseract.tesseract_cmd = r'C:\Users\Luis\AppData\Local\Programs\Tesseract-OCR\tesseract.exe'

In [2]:
# Create a set to store unique names of authors.
name_bag = set()

for folder in ['BBDD', 'qsd1_w4']:
    # Loop through each .txt file inside the folder.
    for text_file in glob.glob(f'data/{folder}/*.txt'):
        # Extract the specific text pattern from the file and add it to the set.
        name_bag.add(utils.get_text_bbdd(text_file))

In [3]:
# load pickle file 'data/qsd1_w4/text_boxes.pkl'
with open('data/qsd1_w4/text_boxes.pkl', 'rb') as f:
    text_boxes = pickle.load(f)

# load pickle file 'data/qsd1_w4/frames.pkl'
with open('data/qsd1_w4/frames.pkl', 'rb') as f:
    frames = pickle.load(f)

In [4]:
class DataLoader():
    def __init__(self, folder_path):
        self.folder_path = folder_path

    # Obtain the painting image removing the background. 
    # It returns the mask where 1 means painting image and 0 background.
    def get_mask(self, gray, threshold_area=65000):
     
        # Empty mask definition
        mask = np.zeros(gray.shape, dtype=np.uint8)

        # Applying gaussian blurring and define an intelligent gradient threshold depending on 13x13 boxes
        blur = cv2.GaussianBlur(gray, (13,13), 0)
        # Threshold based on local pixel neighborhood (11x11 block size)
        thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)

        # Two pass dilate with horizontal and vertical kernel
        horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,5))
        dilate = cv2.dilate(thresh, horizontal_kernel, iterations=2)
        vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,1))
        dilate = cv2.dilate(dilate, vertical_kernel, iterations=2)

        # Find contours, filter using contour threshold area, and draw rectangle
        cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]

        # Filtering the found contours by size
        counter = 0
        areas = []
        coordinates = []
        for c in cnts:
            # Shoelace formula for convex shapes
            area = cv2.contourArea(c) 
            if area > threshold_area:
                x,y,w,h = cv2.boundingRect(c) 
                areas.append((area, (x,y,w,h)))
                counter += 1

        # Sort areas and positions by area
        areas = sorted(areas, key=lambda x: x[0], reverse=True)[:3]

        # Draw bounding box on mask
        for i in range(len(areas)-1,-1,-1):
            if i > 0 and abs(areas[i][1][0] - areas[i-1][1][0]) < 190 and abs(areas[i][1][1] - areas[i-1][1][1]) < 150:
                # print('Skipping! Two masks in the same painting!')
                continue
            x,y,w,h = areas[i][1]
            coordinates.append((x,y,w,h))
            mask[y:y+h, x:x+w] = 255
        
        # Catching the 0 contours error
        if counter == 0:
            print('Error! No paintings in this image!')
            plt.imshow(gray)
            plt.show()
            plt.imshow(mask, cmap='gray')
            plt.show()

        return mask, coordinates
    
    # Obtain the painting image removing the text. 
    # It returns the mask where 1 means painting image and 0 text.
    def get_mask_text(self, gray, name_bag):

        # Apply morphological opening and closing to enhance text-like features using a 9x9 kernel
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
        opening = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel)
        closing = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
        
        #thresholding the difference to get (hopefully) only the text
        x = closing-opening
        x = (x>125).astype(np.uint8) 

        # Dilation to further enhance the text features using a 13x13 kernel
        kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (13,13))
        dilated = cv2.dilate(x, kernel2, iterations=2)

        # Find contours 
        ctns = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Loop through the contours and find rectangular bounding boxes that likely represent text areas
        areas = []
        for c in ctns[0]:
            x,y,w,h = cv2.boundingRect(c)
            # Filter out rectangles based on certain geometric criteria
            n, m = gray.shape
            ratio = w/h
            relative_area = (w*h)/(n*m)
            if w > h and ratio < 12 and ratio > 1.5 and relative_area < 0.25 and x+w < m and y+h < n and h >= 30:
                # Shoelace formula for convex shapes
                areas.append((cv2.contourArea(c), (x,y,w,h)))
        
        if len(areas) == 0:
            return 0, 0, 0, 0, 'Unknown'
        areas = sorted(areas, key=lambda x: x[0], reverse=True)
        x, y, w, h = areas[0][1]

        # Merge shapes close to the main detected text region (e.g., text broken into separate regions)
        for _, shape in areas:
            if y > shape[1]-10 and y < shape[1]+10:
                if shape[0] < x:
                    w = (x+w) - shape[0]
                    x = shape[0]
                else:
                    w = (shape[0]+shape[2]) - x


        min_word = utils.get_text(gray, name_bag, x, y, x+w, y+h)
        # Return the bounding box of the detected text region and the closest matching name
        return [x, y, x+w, y+h, min_word]

    # Divide the image into blocks
    def create_blocks_array(self, image, blockNumber):
    
        # Set number of slices per axis
        axisSlice = int(math.sqrt(blockNumber))

        blocksArray = []
        # Split the image into vertical blocks
        split_h = np.array_split(image, axisSlice, axis = 0)
        
        for i in range(axisSlice):
            for j in range(axisSlice):
                # Split vertical blocks into square blocks
                split_hv = np.array_split(split_h[i], axisSlice, axis = 1)
                blocksArray.append(split_hv[j])
        return blocksArray

    # Compute the histogram of the image
    def create_histogram(self, block, mask, d_hist, bins):
        
        channels = cv2.split(block)
        range_a, range_b = 256, 256

        if d_hist == 1:
            if mask is None:
                # Compute 1D histograms for each channel separately
                hist = [cv2.calcHist([chan], [0], None, [bins], [0, range_a if i == 0 else range_b]) for i,chan in enumerate(channels)]
            else:
                # Compute 1D histograms for each channel separately
                hist = [cv2.calcHist([chan[mask!=0]], [0], None, [bins], [0, range_a if i == 0 else range_b]) for i,chan in enumerate(channels)]

        elif d_hist == 2:
            if mask is None:
                # Compute 2D joint histograms for each pair of channels
                hist = [cv2.calcHist([channels[i], channels[j]], [0, 1], None, [bins, bins], [0, range_a if i == 0 else range_b, 0, range_b])
                            for i in range(len(channels)) for j in range(i+1, len(channels))]
            else:
                # Compute 2D joint histograms for each pair of channels
                hist = [cv2.calcHist([channels[i][mask!=0], channels[j][mask!=0]], [0, 1], None, [bins, bins], [0, range_a if i == 0 else range_b, 0, range_b])
                            for i in range(len(channels)) for j in range(i+1, len(channels))]

        else:
            if mask is None:
                # Compute 3D joint histogram for all three channels
                hist, _ = np.histogramdd([c.flatten() for c in channels], bins=(bins, bins, bins), range=[(0, range_a), (0, range_b), (0, range_b)])
            else:
                # Compute 3D joint histogram for all three channels
                hist, _ = np.histogramdd([c[mask != 0] for c in channels], bins=(bins, bins, bins), range=[(0, range_a), (0, range_b), (0, range_b)])

        return hist
    
    # Compute the color histogram of the image by blocks
    def get_color_features_by_blocks(self, image, level, d_hist, bins, mask_text):

        # Get blocks using multi-level resolution
        blocksArray = []
        for lvl in range(level+1):
            for b in self.create_blocks_array(image, (2**lvl)*(2**lvl)):
                blocksArray.append(b)

        if mask_text is not None:
            blocksMasks = []

            # We create a mask image blocking the bbox of the text
            # That image will be used to compute the histogram of the image without the text
            mask_text_image = np.ones(image.shape[:2], dtype=np.uint8)
            mask_text_image[mask_text[1]:mask_text[3], mask_text[0]:mask_text[2]] = 0

            # It is necessary to create the blocks of the mask image too
            for lvl in range(level+1):
                for b in self.create_blocks_array(mask_text_image, (2**lvl)*(2**lvl)):
                    blocksMasks.append(b)
        else:
            blocksMasks = [None]*len(blocksArray)

        histograms = []
        for block, mask_text_block in zip(blocksArray, blocksMasks):
            # Compute the histogram of the channel and append it to the list
            hist = self.create_histogram(block, mask_text_block, d_hist, bins)
            if isinstance(hist, list):
                for h in hist:
                    histograms.append(h.flatten() / (block.shape[0]*block.shape[1]))
            else:
                histograms.append(hist.flatten()  / (block.shape[0]*block.shape[1]))
            
        # Concatenate all histograms into a single feature vector
        return np.concatenate(histograms)

    def zigzag_scan(self, image):
        rows, cols = image.shape
        solution = [[] for _ in range(rows + cols - 1)]
        
        for i in range(rows):
            for j in range(cols):
                sum_idx = i + j
                if (sum_idx % 2 == 0):
                    # add at beginning if even index
                    solution[sum_idx].insert(0, image[i,j])
                else:
                    # add at the end if odd index
                    solution[sum_idx].append(image[i,j])

        # flatten the result
        result = np.array([num for sublist in solution for num in sublist])
        return result

    # Compute different texture features by blocks
    def get_texture_features_by_blocks(self, image, level, bins, mask_text):
        
        # Get blocks using multi-level resolution
        blocksArray = []
        for lvl in range(level+1):
            for b in self.create_blocks_array(image, (2**lvl)*(2**lvl)):
                blocksArray.append(b)

        if mask_text is not None:
            blocksMasks = []

            # We create a mask image blocking the bbox of the text
            # That image will be used to compute the histogram of the image without the text
            mask_text_image = np.ones(image.shape[:2], dtype=np.uint8)

            # Assign zero to the region corresponding to the text
            mask_text_image[mask_text[1]:mask_text[3], mask_text[0]:mask_text[2]] = 0

            # It is necessary to create the blocks of the mask image too
            for lvl in range(level+1):
                for b in self.create_blocks_array(mask_text_image, (2**lvl)*(2**lvl)):
                    blocksMasks.append(b)
        else:
            blocksMasks = [None]*len(blocksArray)

        histograms = []
        # For each block and its corresponding text mask block, compute texture features
        for block, mask_text_block in zip(blocksArray, blocksMasks):
            
            details = pywt.dwt2(block, 'bior1.3')
            approx, (h, v, d) = details # approx captures bigger details (more smooth than the original img), (h, v, d) capture de horizontal, vertical and diagonal "smaller" details
            
            if mask_text_block is not None:
                # Resize the text mask to the size of the wavelet's resulting images
                new_mask = cv2.resize(mask_text_block, approx.shape[::-1]).astype(bool) 

            # Create an histogram for each wavelet "image" and concatenate all of them
            final_hist = []
            for wt_img in [approx, h, v, d]:
                hist = np.histogram(wt_img if mask_text_block is None else wt_img[new_mask != 0], bins=bins, range=(0, 256))[0]
                final_hist.append(hist.flatten() / (wt_img.shape[0]*wt_img.shape[1]))
            histograms.append(np.concatenate(final_hist))
            
        # Concatenate all histograms into a single feature vector
        return np.concatenate(histograms)

    def get_features_by_keypoints(self, gray, mode, n_features, mask):
        
        if mode == 'sift':
            # SIFT Detector
            sift = cv2.SIFT_create(nfeatures=n_features)
            _, des = sift.detectAndCompute(gray, mask)

        elif mode == 'orb':
            # ORB Detector
            orb = cv2.ORB_create(nfeatures=n_features)
            _, des = orb.detectAndCompute(gray, mask)

        elif mode == 'akaze':
            thres = 0.005
            # AKAZE Detector
            akaze = cv2.AKAZE_create(threshold=thres)
            _, des = akaze.detectAndCompute(gray, mask)

            while des is None or des.shape[0] < n_features:
                if str(thres)[-1] == '1': 
                    thres = thres / 2
                else:
                    thres /= 5
                
                akaze = cv2.AKAZE_create(threshold=thres)
                _, des = akaze.detectAndCompute(gray, mask)

                if thres < 1e-6:
                    break

        return des

    def clean_noise(self, image, k):
        return cv2.medianBlur(image, k)
    
    # Load data, calculate background and text masks (if necessary) and compute features
    def load_data(self, level = 3, d_hist = 1, bins = 8, n_features=2048, keypoint_mode='sift', remove_background=False, remove_text=False, features_mode='color_features'):
        # Get a list of all image file names in the folder
        image_files = sorted(glob.glob(self.folder_path+'/*.jpg'))

        # Initialize an empty list to store the processed images and masks
        processed_features = dict()
        masks, masks_text = [], []

        # Iterate over each image file
        for f in tqdm.tqdm(image_files):
            
            # Get the image id from the file name. Depending on the OS, the path separator is different
            try:
                img_id = int(f.split('\\')[-1].split('.')[0].split('_')[-1])
            except:
                img_id = int(f.split('/')[-1].split('.')[0].split('_')[-1])

            # Load the image in BGR format
            image = cv2.imread(f)

            # Clean noise of the image using median filter
            image = self.clean_noise(image, k=3)

            # Convert the image to grayscale
            image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

            features_rgb, features_wavelet, features_text, features_keypoints = [], [], [], []

            # Remove background (there can be 2 paintings in the same image)
            if remove_background:
                mask_image, coordinates = self.get_mask(image_gray)
                n_paintings = len(coordinates)
               
                # Remove the text from each image
                if remove_text:
                    coordinates = sorted(coordinates, key=lambda x: (x[0], x[1]))
                    masks_text_i = [[None]]*n_paintings
                    # coordinates contains the coordinates of the paintings mask in the image
                    # We iterate over each masked painting and get the text mask for each one
                    # We hace to recover the original coordinates for the text mask
                    for i, (x,y,w,h) in enumerate(coordinates):
                        x_text, y_text, x_text_max, y_text_max, text = self.get_mask_text(image_gray[y:y+h, x:x+w], name_bag=name_bag)
                        features_text.append(text)
                        masks_text_i[i] = [x+x_text, y+y_text, x+x_text_max, y+y_text_max]
                    masks_text.append(masks_text_i)
                else:
                    masks_text.extend([[None] for _ in range(n_paintings)])

            else:
                mask_image = None

                # if there is no background, the mask is the whole image
                coordinates = [[0,0,image.shape[1],image.shape[0]]]
                n_paintings = 1
                if remove_text:
                    x_text, y_text, x_text_max, y_text_max, text = self.get_mask_text(image_gray, name_bag=name_bag)
                    features_text.append(text)
                    masks_text.append([[x_text, y_text, x_text_max, y_text_max]])
                else:
                    masks_text.append([None])

            masks.append(mask_image)
        
            for i in range(n_paintings):
                x,y,w,h = coordinates[i]

                mask_painting = np.zeros(image.shape[:2], dtype=np.uint8)
                mask_painting[y:y+h, x:x+w] = 255

                relative_mask_text = None
                if masks_text[-1][i] is not None:
                    relative_mask_text = [masks_text[-1][i][0]-x, masks_text[-1][i][1]-y, masks_text[-1][i][2]-x, masks_text[-1][i][3]-y]
                    # Remove the text from the image 
                    mask_painting[masks_text[-1][i][1]:masks_text[-1][i][3], masks_text[-1][i][0]:masks_text[-1][i][2]] = 0

                if features_mode == 'color_features' or features_mode == 'combined':    
                    # Get the features of every masked image
                    f = self.get_color_features_by_blocks(image[y:y+h, x:x+w], level, d_hist, bins, mask_text=relative_mask_text)
                    features_rgb.append(f)

                if features_mode == 'texture_features' or features_mode == 'combined':
                    f_wavelet = self.get_texture_features_by_blocks(image_gray[y:y+h, x:x+w], level, bins, mask_text=relative_mask_text)
                    features_wavelet.append(f_wavelet)

                if features_mode == 'keypoint':
                    f_keypoints = self.get_features_by_keypoints(image_gray, keypoint_mode, n_features, mask_painting)
                    features_keypoints.append(f_keypoints)
            
            # Append the features to the dict
            if features_mode == 'texture_features':
                processed_features[img_id] = features_wavelet
            
            elif features_mode == 'text_features':
                processed_features[img_id] = features_text

            elif features_mode == 'color_features':
                processed_features[img_id] = features_rgb
            
            elif features_mode == 'combined':
                if n_paintings > 1:
                    assert len(features_rgb) == len(features_wavelet) == len(features_text), 'The number of features must be the same for each mode!'
                    processed_features[img_id] = [[features_rgb[i], features_wavelet[i], features_text[i]] for i in range(n_paintings)]
                else:
                    processed_features[img_id] = [[features_rgb, features_wavelet, features_text]]
            
            elif features_mode == 'keypoint':
                processed_features[img_id] = features_keypoints
            
        return processed_features, masks, masks_text
        

In [5]:
# Copied from https://github.com/benhamner/Metrics -> Metrics.Python.ml_metrics.average_precision.py
def apk(actual, predicted, k=10):
    """
    Computes the average precision at k.

    This function computes the average prescision at k between two lists of
    items.

    Parameters
    ----------
    actual : list
             A list of elements that are to be predicted (order doesn't matter)
    predicted : list
                A list of predicted elements (order does matter)
    k : int, optional
        The maximum number of predicted elements

    Returns
    -------
    score : double
            The average precision at k over the input lists

    """
    if len(predicted)>k:
        predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i,p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)

    if not actual:
        return 0.0

    return score / min(len(actual), k)

# Copied from https://github.com/benhamner/Metrics -> Metrics.Python.ml_metrics.average_precision.py
def mapk(actual, predicted, k=10):
    """
    Computes the mean average precision at k.

    This function computes the mean average prescision at k between two lists
    of lists of items.

    Parameters
    ----------
    actual : list
             A list of lists of elements that are to be predicted 
             (order doesn't matter in the lists)
    predicted : list
                A list of lists of predicted elements
                (order matters in the lists)
    k : int, optional
        The maximum number of predicted elements

    Returns
    -------
    score : double
            The mean average precision at k over the input lists

    """
    result = []
    for a,p in zip(actual, predicted):
        for a_i, p_i in zip(a,p):
            result.append(apk([a_i],p_i,k))
    return np.mean(result)

# compute the histogram intersection between two feature vectors
def histogram_intersection(hist1, hist2, normalized=False):
    if normalized:
        return np.sum(np.minimum(hist1, hist2)) / np.sum(np.maximum(hist1, hist2))
    else:
        return np.sum(np.minimum(hist1, hist2))

# compute the chi-squared distance between two feature vectors
def chi_squared_distance(hist1, hist2):
    return np.sum(np.square(hist1 - hist2) / (hist1 + hist2 + 1e-10))

# compute the euclidean distance between two feature vectors
def euclidean_distance(hist1, hist2):
    return np.sqrt(np.sum(np.square(hist1 - hist2)))

def custom_leveshtein_distance(s1, s2, normalized=False):
    if normalized:
        return (max(len(s1), len(s2)) - levenshtein_distance(s1, s2)) / max(len(s1), len(s2))
    else:
        return levenshtein_distance(s1, s2)

In [20]:
# Obtain the closest k DDBB image for query images determined by the similarity function. 
# The features have been previously calculated from the developed method.
# It returns a list of lists with the k closest images for each query image. 
def compare_images(query_features, bbdd_features, k, sim_func, param=None, filter=False, combine=False):
    
    result = []
    for id1,f1 in query_features.items():
        result_i = []
        for f_i in f1:
            distances = []
            for id2,f2 in bbdd_features.items():
                text_bd = utils.get_text_bbdd(f'data/BBDD/bbdd_{str(id2).zfill(5)}.txt')

                # Use the provided similarity function for comparing both paintings
                if not filter and not combine:
                    distances.append((id2, sim_func(f_i,f2)))
                    continue
                
                for f2_i in f2:
                    # First filter those paintings that have the same author. Then, compute similarity/distance for retrieval
                    if filter and text_bd == f_i[-1][0]:
                       distances.append((id2, param*sim_func(f_i[0][0], f2_i[0][0], normalized=True) + (1-param)*sim_func(f_i[1][0], f2_i[1][0], normalized=True)))
                    
                    # Use weighted sum between the color, texture and text scores as the similarity score for the retrieval
                    elif combine:
                        # If the similarity function is a distance 
                        if sim_func in [chi_squared_distance, levenshtein_distance]:
                            distances.append(
                                (id2, 
                                param[0]*sim_func(f_i[0][0], f2_i[0][0], normalized=True) + 
                                param[1]*sim_func(f_i[1][0], f2_i[1][0], normalized=True) +
                                param[2]*(1 - custom_leveshtein_distance(f_i[2][0], text_bd, normalized=True))
                                ))
                        else:
                            distances.append(
                                (id2, 
                                param[0]*sim_func(f_i[0][0], f2_i[0][0], normalized=True) + 
                                param[1]*sim_func(f_i[1][0], f2_i[1][0], normalized=True) +
                                param[2]*custom_leveshtein_distance(f_i[2][0], text_bd, normalized=True)
                                ))
                
            #get k smallest values from distances   
            if sim_func in [chi_squared_distance, levenshtein_distance]:
                k_smallest = sorted(distances, reverse=False, key=lambda x: x[1])[:k]
            else:
                k_smallest = sorted(distances, reverse=True, key=lambda x: x[1])[:k]
            result_i.append((id1, k_smallest))
            
        result.append(result_i)
    
    # Transform the result into the required format
    result2 = []
    for x in result:
        result2_i = []
        for y in x:
            result2_i.append([z[0] for z in y[1]])
        result2.append(result2_i)
    
    return result2

def compare_keypoints(features_query, features_db, k, sim_func, threshold_matches1=190):    
    bf = cv2.BFMatcher(sim_func, crossCheck=False)
    result = []
    for id_q, f_query in tqdm.tqdm(features_query.items(), desc='Computing matches'):
        result_i = []
        for f in f_query:
            number_matches = []
            for id_db, f_db in features_db.items():
                
                matches = bf.knnMatch(f, f_db[0], k=2)

                good_matches = []
                for m, n in matches:
                    if m.distance < 0.7 * n.distance:
                        good_matches.append(m)
                number_matches.append((id_db, len(good_matches)))

            number_matches = sorted(number_matches, reverse=True, key=lambda x: x[1])[:k]

            # If the number of matches is below a certain threshold, we consider that the query image is not in the database
            if number_matches[0][1] < threshold_matches1 and len(f_query) == 1:
                result_i.append((id_q, [[-1]]))
            else:
                result_i.append((id_q, number_matches))
                
        result.append(result_i)
    
    # Transform the result into the required format
    result2 = []
    for x in result:
        result2_i = []
        for y in x:
            result2_i.append([z[0] for z in y[1]])
        result2.append(result2_i)

    return result2

In [21]:
# Create DataLoader objects for both the database and the queries
data_loader = DataLoader('data/BBDD')
data_loader_qsd1_w4 = DataLoader('data/qsd1_w4')

# Load ground truth files for each query
with open('data/qsd1_w4/gt_corresps.pkl', 'rb') as f:
    gt_w4 = pickle.load(f)

## Validation results

### Task 1: 


In [22]:
k = 5
features_mode = 'keypoint'
keypoint_mode, sim_func = 'sift', cv2.NORM_L2 
n_features = 2048
# Compute features for the database and the query images
features, _, _ = data_loader.load_data(features_mode=features_mode, n_features=n_features, keypoint_mode=keypoint_mode, remove_background=False)
features_q1_w4, _, _ = data_loader_qsd1_w4.load_data(features_mode=features_mode, n_features=n_features, keypoint_mode=keypoint_mode, remove_background=True, remove_text=True,)

result = compare_keypoints(features_q1_w4, features, k, sim_func, threshold_matches1=190)
mapk_1 = mapk(gt_w4, result, k)

print(f'Sift, L2, {n_features} features = mAP@{k}: {mapk_1}')

100%|██████████| 286/286 [04:01<00:00,  1.19it/s]
 13%|█▎        | 4/30 [00:06<00:37,  1.46s/it]

Skipping! Two masks in the same painting!


 67%|██████▋   | 20/30 [00:30<00:14,  1.40s/it]

Skipping! Two masks in the same painting!


 90%|█████████ | 27/30 [00:40<00:03,  1.26s/it]

Skipping! Two masks in the same painting!


100%|██████████| 30/30 [00:46<00:00,  1.56s/it]
Computing matches: 100%|██████████| 30/30 [07:38<00:00, 15.28s/it]

Sift, L2, 2048 features = mAP@5: 0.8421052631578947





In [23]:
k = 5
features_mode = 'keypoint'
keypoint_mode, sim_func = 'orb', cv2.NORM_HAMMING2 
n_features = 2048
# Compute features for the database and the query images
features, _, _ = data_loader.load_data(features_mode=features_mode, n_features=n_features, keypoint_mode=keypoint_mode, remove_background=False)
features_q1_w4, _, _ = data_loader_qsd1_w4.load_data(features_mode=features_mode, n_features=n_features, keypoint_mode=keypoint_mode, remove_background=True, remove_text=True,)

result = compare_keypoints(features_q1_w4, features, k, sim_func, threshold_matches1=100)
mapk_1 = mapk(gt_w4, result, k)

print(f'Orb, Hamming, {n_features} features = mAP@{k}: {mapk_1}')

100%|██████████| 286/286 [00:59<00:00,  4.83it/s]
 13%|█▎        | 4/30 [00:04<00:21,  1.20it/s]

Skipping! Two masks in the same painting!


 67%|██████▋   | 20/30 [00:15<00:06,  1.65it/s]

Skipping! Two masks in the same painting!


 90%|█████████ | 27/30 [00:19<00:01,  1.61it/s]

Skipping! Two masks in the same painting!


100%|██████████| 30/30 [00:22<00:00,  1.32it/s]
Computing matches: 100%|██████████| 30/30 [06:55<00:00, 13.86s/it]

Orb, Hamming, 2048 features = mAP@5: 0.8421052631578947





In [24]:
k = 5
features_mode = 'keypoint'
keypoint_mode, sim_func = 'akaze', cv2.NORM_HAMMING2 
n_features = 512
# Compute features for the database and the query images
features, _, _ = data_loader.load_data(features_mode=features_mode, n_features=n_features, keypoint_mode=keypoint_mode, remove_background=False)
features_q1_w4, _, _ = data_loader_qsd1_w4.load_data(features_mode=features_mode, n_features=n_features, keypoint_mode=keypoint_mode, remove_background=True, remove_text=True,)

result = compare_keypoints(features_q1_w4, features, k, sim_func, threshold_matches1=40)
mapk_1 = mapk(gt_w4, result, k)

print(f'Akaze, Hamming, {n_features} features = mAP@{k}: {mapk_1}')

100%|██████████| 286/286 [10:45<00:00,  2.26s/it]
 13%|█▎        | 4/30 [00:10<00:59,  2.31s/it]

Skipping! Two masks in the same painting!


 67%|██████▋   | 20/30 [01:29<01:11,  7.15s/it]

Skipping! Two masks in the same painting!


 90%|█████████ | 27/30 [01:53<00:10,  3.39s/it]

Skipping! Two masks in the same painting!


100%|██████████| 30/30 [02:02<00:00,  4.09s/it]
Computing matches:   0%|          | 0/30 [00:00<?, ?it/s]