In [None]:
# import all we need to compute the mAP and produce some plots
# implementing mAP as documented in The PASCALVisual Object Classes (VOC) Challenge

import numpy as np
import pickle
import os
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# grab all computed results 
# (.pkl format from "darknet_evaluation_post_inference.py")

input_path = "D:/WOLO/HPC_trained_models/WOLO_DETECT/RESULTS"
input_files = []

for file in os.listdir(input_path):
    if file.endswith(".pkl"):
        input_files.append(os.path.join(input_path,file))
        
input_files.sort()
print("Found {} evaluation files.".format(len(input_files)))
for d, dataset in enumerate(input_files):
    print(d, dataset)
    
use_state = 0

In [None]:
with open(input_files[use_state], 'rb') as f:
    data = pickle.load(f)

"""
- data[0][0]
- training_data_&_training_state

- - data[0][1][0]
- - threshold (for first dataset)
  
- - - data[0][1][1][0 1  2    3   4   5                  6] 
- - - dataset_name,   GT, TP, FN, FP, Average Precision, Recall
"""
#examples:

all_training_states = []

for elem in data:
    all_training_states.append(elem[0].split(".")[0].split("_")[-1])

all_training_states.sort()
print(all_training_states)

The goal is to retrieve the mean Average Precision (mAP) over 13 confidence thresholds ranging from 0.2 to 0.8, classifying a correct detection centre as being within 10% (of the image width) euclidean distance to a ground truth detection, disregarding multiple detections of the same object as they would be suppressed by non-maxmimum suppresion at run-time. We use this adjusted metric from the original, as the actual intersection over union is secondary to the agreement of centres, as different methods have been used to assign bounding boxes. Synthetically generated bounding boxes are defined as the smallest retangle including all projected 2D keypoints in the rendered images, whereas hand annotated bounding boxes are fixed, square detections, as a custom written centre tracking tool (BlenderMotionExport) was used to semi-automatically produce these datasets.

As an example we will plot the precision over recall for these 13 thresholds for the first snapshot of the imported data, and compute the mAP, as in the official [scikit learn implementation](https://github.com/scikit-learn/scikit-learn/blob/baf0ea25d/sklearn/metrics/_ranking.py#L111)

(m)AP summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold, with the increase in recall from the previous threshold used as the weight:

$${AP} = \sum_n (R_n - R_{n-1}) P_n$$
    
where `P_n` and `R_n` are the precision and recall at the nth threshold. Using decreasing threshold values, the Recall $R_{n-1}$ at the first threshold is set to 0 as when the threshold is maximal, no detections are returned. Therefore, with no positives returned, the precision $P_{n-1}$ is by definition equal to 1.

*This implementation is not interpolated and is different from computing the area under the precision-recall curve with the trapezoidal rule, which uses linear interpolation and can be too optimistic.*

**Note:** this implementation is restricted to the binary classification task or multilabel classification task.

In [None]:
def clean_dataset_name(file_name,verbose=False):
    """
    return the name of the dataset wihtout the split extension
    """
    base_name = os.path.basename(file_name)
    dataset_name = base_name.split("_RESULTS")[0]
    if verbose:
        print(dataset_name)
    return dataset_name

print("Found {} evaluation files.".format(len(input_files)))
    
all_nets_all_APs = []

current_AP_group = []
training_datasets = [clean_dataset_name(f) for f in input_files]

prev_dataset = clean_dataset_name(input_files[use_state])

for use_state in range(len(input_files)):
    with open(input_files[use_state], 'rb') as f:
        data = pickle.load(f)

    all_training_states = []

    for elem in data:
        all_training_states.append(elem[0].split(".")[0].split("_")[-1])

    all_training_states.sort()

    final_AP = []
    #print("\n",input_files[use_state])
    
    for dataset_idx in range(1,15):
        all_AP = []

        for model in data:
            curve_coords = np.zeros([len(model[1:]),2])
            AP = 0
            R_n = 0
            for e, elem in reversed(list(enumerate(model[1:]))):
                curve_coords[e] = [elem[dataset_idx][6],elem[dataset_idx][5]]
                AP += (elem[dataset_idx][6] - R_n) * elem[dataset_idx][5]
                R_n = elem[dataset_idx][6]

            #print("AP: {}\n".format(AP))
            all_AP.append([model[1][dataset_idx][0],model[0].split("\\")[-2],AP])

        all_AP.sort()
        final_AP.append(all_AP)
        
    all_nets_all_APs.append(final_AP)
    
output_AP = []

for model in all_nets_all_APs:
    print("\n\nAP scores for:", model[0][0][1])
    
    model_mAP = np.mean([i[0][2] for i in model])
    model_stdAP = np.std([i[0][2] for i in model])
    categories = [i[0][0][:-6] for i in model]
    
    print("mAP:", model_mAP, "+/-" ,model_stdAP,"\n")
    
    dataset_APs = []
    for dataset in model:
        print("dataset", dataset[0][0][:18], "  AP", dataset[0][2])
        dataset_APs.append(round(dataset[0][2],4))
        
    dataset_APs.append(round(model_mAP,4))
    dataset_APs.append(round(model_stdAP,4))
    
    output_AP.append(dataset_APs)
        
categories.append("mAP")
categories.append("sdtAP")

In [None]:
final_dataframe = pd.DataFrame(output_AP, index = training_datasets, columns=categories)
final_dataframe

In [None]:
custom_name = "results_mAP"

final_dataframe.to_csv(os.path.join(input_path,custom_name) + ".csv")

final_dataframe.to_hdf(
    os.path.join(input_path,custom_name) + ".h5",
    "df_with_missing",
    format="table",
    mode="w")
    