In [1]:
import numpy as np
import pandas as pd
import os
from typing import List, Dict, Tuple
from tqdm import tqdm


### Detektor Evaluation
- Finde Repeatability und Accuracy für die verschiedenen Models

In [11]:


def get_set_names(data_dir:str, sort_output:bool=True) -> List[str]:
    set_names = [x for x in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, x))]

    if sort_output:
        set_names = sorted(set_names)
    
    return set_names

def get_file_names_in_set(path_set:str, file_sheme:str, sort_output:bool=True) -> List[str]:
    file_names = [x for x in os.listdir(path_set) if os.path.isfile(os.path.join(path_set, x))]
 
    # get the correct files with fitting file scheme.
    file_names = [x for x in file_names if file_scheme in x]

    if sort_output:
        file_names = sorted(file_names)

    return file_names

def evaluate_detector(
    detector_name:str,
    collection_name:str,
    path_collection:str,
    set_names:List[str],
    file_scheme:str,
    thresholds:List[int],
    column_names:List[str],
    threshold_distance:int=None) -> pd.DataFrame:

    # Create output dataframe.
    df = pd.DataFrame(columns=column_names)

    for set_name in set_names:
        path_set = os.path.join(path_collection, set_name, 'keypoints', detector_name)
        file_names = get_file_names_in_set(path_set, file_scheme)
        num_files = len(file_names)
        for i in tqdm(range(num_files)):
            path_f1 = os.path.join(path_set, file_names[i])
            f1 = pd.read_csv(path_f1, sep=',', header=None, usecols=[0, 1], comment='#').values.astype('float32')
            num_kpts_i = f1.shape[0]

            for j in range(i+1,num_files,1):
                path_f2 = os.path.join(path_set, file_names[j])
                f2 = pd.read_csv(path_f2, sep=',', header=None, usecols=[0, 1], comment='#').values.astype('float32')
                num_kpts_j = f2.shape[0]


                for t in thresholds:
                    _f1 = f1[:t]
                    _f2 = f2[:t]

                    # Number of maximal possible matches for first t keypoints.
                    max_num_matches = np.min([len(_f1), len(_f2)])

                    # Each row k contains the differences f2_k - f1, for all
                    # f1_l in f1.
                    # [[f2_0 - f1_0, f2_0 - f1_1, ..., f2_0 - f1_l],
                    #  [...]
                    #  [f2_m - f1_0, f2_m - f1_1, ..., f2_m - f1_l]]
                    d = np.linalg.norm(_f2 - _f1[:, np.newaxis], axis=2)

                    # Get the index of the lowest squared difference for each row
                    sorted_idx = d.argsort(axis=1)
                    nn = sorted_idx[:, 0]
        
                    # Get the corresponding d value
                    d = d[:, nn][:, 0]

                    # Find duplicates
                    _, u_idx = np.unique(nn, return_index=True)

                    # Remove duplicates
                    d = d[u_idx]

                    if threshold_distance is not None:
                        # Find entries that are further away than <max_distance> and
                        # Remove those entries that violate distance threshold
                        idx_dist_thresh = (d <= threshold_distance)
                        d = d[idx_dist_thresh]


                    # mean distance of all hit
                    min_dist = np.min(d)
                    max_dist = np.max(d)
                    mean_dist = np.mean(d)
                    std_dist = np.std(d)

                    # repeatability: ratio of matches and number of maximal possible
                    # matches:
                    num_matches = len(d)
                    repeatability = 0 if max_num_matches == 0 else num_matches / max_num_matches

                    # Append new row to dataframe.
                    df = df.append({
                        'collection_name': collection_name,
                        'set_name': set_name,
                        'detector_name': detector_name,
                        'image_i': file_names[i],
                        'image_j': file_names[j],
                        'num_kpts_i': num_kpts_i,
                        'num_kpts_j': num_kpts_j,
                        'threshold': t,
                        'max_num_matches': max_num_matches,
                        'num_matches': num_matches,
                        'mean_dist': mean_dist,
                        'std_dist': std_dist,
                        'min_dist': min_dist,
                        'max_dist': max_dist,
                        'min_dist': min_dist,
                        'max_dist': max_dist,
                        'repeatability': repeatability
                        }, ignore_index=True)
    return df

def save_output_for_detector(
    path_output:str,
    detector_name:str,
    collection_name:str,
    df:pd.DataFrame) -> None:

    fout_name = '{}_{}.csv'.format(detector_name, collection_name)
    if not os.path.exists(path_output):
        os.makedirs(path_output, exist_ok=True)

    df.to_csv(os.path.join(path_output, fout_name), 
              index=False, 
              encoding='utf-8')


#################################
### PARAMS
#################################

root_dir = '/home/mizzade/Workspace/diplom/code' # Adjust this accordingly
data_dir = 'outputs'
output_dir = 'output_evaluation'
path_output = os.path.join(root_dir, output_dir, 'detectors')

collection_name = 'webcam'
path_collection = os.path.join(root_dir, data_dir, collection_name)

file_scheme = '_10000.csv'
detector_names = ['sift', 'lift', 'tcovdet' , 'tilde', 'superpoint']
detector_name = 'sift'

set_names = get_set_names(path_collection, sort_output=True)
thresholds = [1000, 5000, 10000]
threshold_distance = None # pixels. Set to None to skip

column_names = ['collection_name', 'set_name', 'detector_name', 
                'image_i', 'image_j', 'num_kpts_i', 'num_kpts_j', 
                'threshold', 'max_num_matches', 'num_matches', 
                'mean_dist', 'std_dist', 'min_dist', 'max_dist', 
                'repeatability']

# Test
# detector_name = 'lift'
# collection_name = 'example'
# set_names = ['v_xxl']
# path_collection = os.path.join(root_dir, data_dir, collection_name)


#################################
### MAIN
#################################


for detector_name in ['lift']:
    print('Start evaluation of detector {}.'.format(detector_name))
    df = evaluate_detector(
     detector_name,
     collection_name,
     path_collection,
     set_names,
     file_scheme,
     thresholds,
     column_names,
     threshold_distance=None)

    save_output_for_detector(
        path_output, 
        detector_name, 
        collection_name, 
        df)

    print('Evaluation of detector {} complete.'.format(detector_name))

  0%|          | 0/40 [00:00<?, ?it/s]

Start evaluation of detector lift.


100%|██████████| 40/40 [05:44<00:00,  8.62s/it]
100%|██████████| 50/50 [05:27<00:00,  6.55s/it]
100%|██████████| 40/40 [16:20<00:00, 24.50s/it]
100%|██████████| 40/40 [03:33<00:00,  5.34s/it]
100%|██████████| 40/40 [05:34<00:00,  8.36s/it]
100%|██████████| 40/40 [06:01<00:00,  9.04s/it]


Evaluation of detector lift complete.
