In [1]:
import numpy as np
import pandas as pd
import cv2
import os
import collections
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
from tqdm import tqdm
from typing import List, Tuple, Dict, Any


# Get statistics of descriptor matching using Mutual Nearest Neighbour Matching (NN2W)

In [4]:
#####################
### SETTINGS
#####################
root_dir = '/home/mizzade/Workspace/diplom/code'

image_dir = os.path.join(root_dir, 'data')
data_dir = os.path.join(root_dir, 'outputs')
output_dir = '/home/mizzade/Workspace/diplom/outputs/descriptors'


collection_name = 'eisert'
collection_path_data = os.path.join(data_dir, collection_name)
collection_path_img = os.path.join(image_dir, collection_name)


set_names = sorted([x for x in os.listdir(collection_path_img) if os.path.isdir(os.path.join(collection_path_img, x))])
file_scheme = '_10000.csv'


kpts_thresholds = [1000, 5000, 10000]
ratio_threshold = 0.7
desc_distance_threshold = np.sqrt(2) # nn2w, maximal distance for two descriptors to be a match.
desc_distance_thresholds = [desc_distance_threshold]

#####################
### FUNCTIONS
#####################
def normalize_descriptors(desc:np.array) -> np.array:
    """Creates unit vectors for each descriptor."""    
    _n = np.linalg.norm(desc, axis=1, ord=2) # Get norms of each vector
    _d = desc / _n.reshape(-1, 1)            # Build unit vector
    
    return _d

def nn_match_two_way(desc1, desc2, nn_thresh):
    """
    Performs two-way nearest neighbor matching of two sets of descriptors, such
    that the NN match from descriptor A->B must equal the NN match from B->A.

    Inputs:
      desc1 - NxM numpy matrix of N corresponding M-dimensional descriptors.
      desc2 - NxM numpy matrix of N corresponding M-dimensional descriptors.
      nn_thresh - Optional descriptor distance below which is a good match.

    Returns:
      matches - Lx3 numpy array, of L matches, where L <= N and each column i is
                a match of two descriptors, d_i in image 1 and d_j' in image 2:
                [d_i index, d_j' index, match_score]
    """
    # Check if descriptor dimensions match
    assert desc1.shape[1] == desc2.shape[1]

    # Return zero matches, if one image does not have a keypoint and
    # therefore no descriptors.
    if desc1.shape[0] == 0 or desc2.shape[0] == 0:
        return np.zeros((0, 3))
    if nn_thresh < 0.0:
        raise ValueError('\'nn_thresh\' should be non-negative')

    # Compute L2 distance. Easy since vectors are unit normalized.
    dmat = np.dot(desc1, desc2.T)
    dmat = np.sqrt(2-2*np.clip(dmat, -1, 1))

    # Get NN indices and scores.
    idx = np.argmin(dmat, axis=1)
    scores = dmat[np.arange(dmat.shape[0]), idx]
    
    # Threshold the NN matches.
    keep = scores < nn_thresh
    # Check if nearest neighbor goes both directions and keep those.
    idx2 = np.argmin(dmat, axis=0)
    keep_bi = np.arange(len(idx)) == idx2[idx]
    keep = np.logical_and(keep, keep_bi)
    idx = idx[keep]
    scores = scores[keep]
    # Get the surviving point indices.
    m_idx1 = np.arange(desc1.shape[0])[keep]
    m_idx2 = idx
    # Populate the final Nx3 match data structure.
    matches = np.zeros((int(keep.sum()), 3))
    matches[:, 0] = m_idx1
    matches[:, 1] = m_idx2
    matches[:, 2] = scores
    return matches

def save_stats(
    path_output:str,
    collection_name:str,
    df:pd.DataFrame,
    fast_eval:bool=False) -> None:

    fout_name = 'descriptor_matching_{}_nn2w.csv'.format(collection_name)
    if fast_eval:
        fout_name = 'descriptor_matching_{}_nn2w_fast_eval.csv'.format(collection_name)
        
    if not os.path.exists(path_output):
        os.makedirs(path_output, exist_ok=True)

    df.to_csv(os.path.join(path_output, fout_name), 
              index=False, 
              encoding='utf-8')
    
#####################
### MAIN
#####################
"""
collection_name:str      Name of the collection
set_name:str             Name of the set in collection
kpts_threshold:float     Number of used features
descriptor_name:str      Name of descriptor
detector_name:str        Name of detector
matching_method:str      Name of matching method
desc_distance_threshold  Maximal distance of two descriptor to match for nn2w-method
max_num_matches:int      Maximal number of possible matches
num_matches:int          Actual number of matches
matchability:float       Ratio of num_matches and max_num_matches
accuracy:float           Mean of 1 - (score of match / desc_distance_threshold)


"""
column_names = ['collection_name', 'set_name', 'kpts_threshold',
               'descriptor_name', 'detector_name', 'matching_method',
                'desc_distance_threshold','max_num_matches', 
                'num_matches', 'matchability', 'accuracy']
df = pd.DataFrame(columns=column_names)

# Target: Iterate over all sets and evaluate all descriptor/detector combinations available.
fast_eval = True
set_names = [set_names[0]] if fast_eval else set_names
kpts_thresholds = [kpts_thresholds[0]] if fast_eval else kpts_thresholds

# desc_distance_thresholds = np.linspace(0.1, 1, 10)
desc_distance_thresholds = [0.7] # empirisch ermittelt

for set_name in tqdm(set_names):
    # 1. Open folder of set
    set_path_2_desc = os.path.join(collection_path_data, set_name, 'descriptors')
    descriptor_names = sorted([x for x in os.listdir(set_path_2_desc) \
                               if os.path.isdir(os.path.join(set_path_2_desc, x))])
    
    for descriptor_name in descriptor_names:
        set_path_2_dets = os.path.join(set_path_2_desc, descriptor_name)
        
        detector_names = sorted([x for x in os.listdir(set_path_2_dets) \
                                 if os.path.isdir(os.path.join(set_path_2_dets, x))])
        
        detector_names = [detector_names[0]] if fast_eval else detector_names
        for detector_name in detector_names:
            #print('Evaluate {} with {}'.format(descriptor_name, detector_name))
            set_path_2_files = os.path.join(set_path_2_dets, detector_name)
            file_names = sorted([x for x in os.listdir(set_path_2_files) if file_scheme in x])
            
            # Load descriptors
            desc1 = pd.read_csv(os.path.join(set_path_2_files, file_names[0]), sep=',', comment='#', header=None).values
            desc2 = pd.read_csv(os.path.join(set_path_2_files, file_names[1]), sep=',', comment='#', header=None).values
            
            # Normalize descriptors
            desc1 = normalize_descriptors(desc1)
            desc2 = normalize_descriptors(desc2)
            
            for kpts_thresh in kpts_thresholds:
                # Get the first N descriptors (from the best first N detectors)
                d1 = desc1[:kpts_thresh]
                d2 = desc2[:kpts_thresh]
                
                max_num_matches = np.min([len(d1), len(d2)])
                
                for desc_dist_thresh in desc_distance_thresholds:
                    res_nn2w = nn_match_two_way(d1, d2, desc_dist_thresh)
                    
                    num_matches = len(res_nn2w)
                    matchability = 0 if max_num_matches == 0 else num_matches / max_num_matches

                    accuracy = 0 if num_matches == 0 else 1.0 - np.mean(res_nn2w[:, 2] / desc_distance_threshold)

                    df = df.append({
                        'collection_name': collection_name, 
                        'set_name': set_name, 
                        'kpts_threshold': kpts_thresh,
                        'descriptor_name': descriptor_name, 
                        'detector_name': detector_name, 
                        'matching_method': 'nn2w',
                        'desc_distance_threshold': desc_dist_thresh,
                        'max_num_matches': max_num_matches, 
                        'num_matches': num_matches, 
                        'matchability': matchability, 
                        'accuracy': accuracy
                    }, ignore_index=True)

save_stats(
        output_dir, 
        collection_name, 
        df,
        fast_eval=fast_eval)

  0%|          | 0/1 [00:00<?, ?it/s]

Evaluate doap with lift
Evaluate lift with lift
Evaluate sift with lift
Evaluate superpoint with superpoint
Evaluate tfeat with lift


100%|██████████| 1/1 [00:08<00:00,  8.86s/it]


In [None]:
df.head()