In [None]:
import numpy as np
import pandas as pd
import os
from typing import List, Dict, Tuple
from tqdm import tqdm

# Evaluate Detector Models

In [None]:
def nn_match_two_way(kpts1, kpts2, dist):
    """
    Performs two-way nearest neighbor matching of two sets of keypoints, such
    that the match from keypoints A->B must equal the match from B->A.

    Inputs:
      kpts1 - NxM numpy matrix of N corresponding M-dimensional keypoints.
      kpts2 - NxM numpy matrix of N corresponding M-dimensional keypoints.
      dist - Distance in pixels below that two keypoints are considered a match.

    Returns:
      matches - Lx4 numpy array, of L matches, where L <= N and each column i is
                a match of two keypoints, d_i in image 1 and d_j' in image 2:
                [d_i index, d_j' index, l2 distance, accurcy]
    """
    # Check if descriptor dimensions match
    assert kpts1.shape[1] == kpts2.shape[1]

    # Return zero matches, if one image does not have a keypoint and
    # therefore no descriptors.
    if kpts1.shape[0] == 0 or kpts2.shape[0] == 0:
        return np.zeros((0, 4))
    if dist <= 0.0:
        raise ValueError('\'nn_thresh\' should greater zero')

    # Compute L2 distance. Easy since vectors are unit normalized.
    dmat = np.linalg.norm(kpts2-kpts1[:, np.newaxis], axis=2)

    # Get NN indices and scores.
    idx = np.argmin(dmat, axis=1)
    scores = dmat[np.arange(dmat.shape[0]), idx]
    
    # Threshold the NN matches.
    keep = scores < dist
   
    # Check if nearest neighbor goes both directions and keep those.
    idx2 = np.argmin(dmat, axis=0)
    keep_bi = np.arange(len(idx)) == idx2[idx]
    keep = np.logical_and(keep, keep_bi)
    idx = idx[keep]
    scores = scores[keep]
   
    # Get the surviving point indices.
    m_idx1 = np.arange(kpts1.shape[0])[keep]
    m_idx2 = idx
    
    # Populate the final Nx3 match data structure.
    matches = np.zeros((int(keep.sum()), 4))
    matches[:, 0] = m_idx1
    matches[:, 1] = m_idx2
    matches[:, 2] = scores
    matches[:, 3] = 1.0 - (scores / dist)
    return matches

def stats_for_imagepair(kpts1:np.array, kpts2:np.array, t:int, kp_thresh:int) -> Tuple[int, int, float, np.array]:
    matches = nn_match_two_way(kpts1, kpts2, t)
    
    max_num_matches = np.min([kpts1.shape[0], kpts2.shape[0]])
    num_matches = len(matches)
    
    repeatability = 0 if max_num_matches == 0 else num_matches / max_num_matches
    accuracy = np.mean(matches[:, 3])
    
    return max_num_matches, num_matches, repeatability, accuracy

def get_set_names(data_dir:str, sort_output:bool=True) -> List[str]:
    set_names = [x for x in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, x))]

    if sort_output:
        set_names = sorted(set_names)
    
    return set_names

def get_file_names_in_set(path_set:str, file_scheme:str, sort_output:bool=True) -> List[str]:
    file_names = [x for x in os.listdir(path_set) if os.path.isfile(os.path.join(path_set, x))]
 
    # get the correct files with fitting file scheme.
    file_names = [x for x in file_names if file_scheme in x]

    if sort_output:
        file_names = sorted(file_names)

    return file_names

def evaluate_detector(
    df:pd.DataFrame,
    detector_name:str,
    collection_name:str,
    path_collection:str,
    set_names:List[str],
    file_scheme:str,
    keypoint_thresholds:List[int],
    dist_error_thresholds:List[float],
    fast_eval:bool=False) -> pd.DataFrame:

    for set_name in set_names:
        path_set = os.path.join(path_collection, set_name, 'keypoints', detector_name)
        file_names = get_file_names_in_set(path_set, file_scheme)
        num_files = 5 if fast_eval else len(file_names)
        for i in tqdm(range(num_files)):
            path_f1 = os.path.join(path_set, file_names[i])
            f1 = pd.read_csv(path_f1, 
                             sep=',', 
                             header=None, 
                             usecols=[0, 1], 
                             comment='#').values.astype('float32')
            num_kpts_i = f1.shape[0]

            for j in range(i+1,num_files,1):
                path_f2 = os.path.join(path_set, file_names[j])
                f2 = pd.read_csv(path_f2, 
                                 sep=',', 
                                 header=None, 
                                 usecols=[0, 1], 
                                 comment='#').values.astype('float32')
                num_kpts_j = f2.shape[0]


                for kp_thresh in keypoint_thresholds:
                    _f1 = f1[:kp_thresh]
                    _f2 = f2[:kp_thresh]
                    
                    for dist_threshold in dist_error_thresholds:
                        max_num_matches, num_matches, repeatability, accuracy = \
                            stats_for_imagepair(_f1, _f2, dist_threshold, kp_thresh)
                        
                        # lambda term. Note: lambdad is reserved word in python.
                        lambdaa = max_num_matches / kp_thresh
                        
                        # Append new row to dataframe.
                        df = df.append({
                            'collection_name': collection_name,
                            'set_name': set_name,
                            'detector_name': detector_name,
                            'image_i': file_names[i],
                            'image_j': file_names[j],
                            'num_kpts_i': num_kpts_i,
                            'num_kpts_j': num_kpts_j,
                            'keypoint_threshold': kp_thresh,
                            'dist_threshold': dist_threshold,
                            'max_num_matches': max_num_matches,
                            'num_matches': num_matches,
                            'repeatability': repeatability,
                            'accuracy': accuracy,
                            'lambda': lambdaa
                            }, ignore_index=True)
    return df

def save(
    path_output:str,
    collection_name:str,
    df:pd.DataFrame,
    fast_eval:bool=False) -> None:

    if fast_eval:
        fout_name = 'repeatability_{}_fast.csv'.format(collection_name)
    else:
        fout_name = 'repeatability_{}.csv'.format(collection_name)
    
    if not os.path.exists(path_output):
        os.makedirs(path_output, exist_ok=True)

    df.to_csv(os.path.join(path_output, fout_name), 
              index=False, 
              encoding='utf-8')


#################################
### DATAFRAME
#################################
# 'collection_name':str           Name of the collection.
# 'set_name':str                  Name of the set.         
# 'detector_name':str             Name of the detector.
# 'image_i':str                   Name of the first (left) image.
# 'image_j':str                   Name of the second (right) image.
# 'num_kpts_i':int                Number of keypoints found in first 
#                                 image.
# 'num_kpts_j':int                Number of keypoints found in the 
#                                 second image.
# 'keypoint_threshold':int        Number of keypoints to use. 
#                                 [1000, 5000, 10000].
# 'dist_percentage':float         Maximal match distance in percentage 
#                                 to count 
#                                 as match.Relative to image dimensions. 
#                                 [1, 5, 10]
# 'max_num_matches':int           Maximal number of possible matches  
#                                 under current conditions.
# 'num_matches':int               Actual number of matches.
# 'repeatability':float           Ratio of number of matched keypoints 
#                                 to max_num_matches.
# 'accuracy':float                Mean accuracy for all matches.
# 'lambda':float                  Ratio of max_num_matches to keypoint_threshold.
#                                 [0, 1]
column_names = ['collection_name','set_name', 'detector_name', 
                'image_i', 'image_j', 'num_kpts_i', 'num_kpts_j', 
                'keypoint_threshold', 'dist_threshold', 
                'max_num_matches', 'num_matches', 
                'repeatability', 'accuracy', 'lambda']

#################################
### PARAMS
#################################
# Adjust this accordingly

# root_dir = '/Users/mirkolauff/Workbench/diplom/notebooks'
data_dir = '/home/mizzade/Workspace/diplom/outputs'
output_dir = os.path.join(data_dir, 'eval_detectors')

collection_name = 'webcam'
path_collection = os.path.join(data_dir, collection_name)

file_scheme = '_10000.csv'
detector_names = ['sift', 'lift', 'tcovdet' , 'tilde', 'superpoint']

set_names = get_set_names(path_collection, sort_output=True)
keypoint_thresholds = [1000, 5000, 10000]
dist_error_thresholds = [3] # in pixels

#################################
### MAIN
#################################

fast_eval = False

# Create output dataframe.
df = pd.DataFrame(columns=column_names)

# Skip superpoint when fast eval
detector_names = detector_names[:4] if fast_eval else detector_names

for detector_name in detector_names:
    print('Start evaluation of detector {}.'.format(detector_name))
    df = evaluate_detector(
     df,
     detector_name,
     collection_name,
     path_collection,
     set_names,
     file_scheme,
     keypoint_thresholds,
     dist_error_thresholds,
     fast_eval=fast_eval)
    
    print('Evaluation of detector {} complete.'.format(detector_name))
    
save(
    output_dir, 
    collection_name, 
    df,
    fast_eval=fast_eval)
