In [1]:
# import all we need to compute the mAP and produce some plots
# implementing mAP as documented in The PASCALVisual Object Classes (VOC) Challenge

import numpy as np
import pickle
import os
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# grab all computed results 
# (.pkl format from "darknet_evaluation_post_inference.py")

input_path = "D:/WOLO/HPC_trained_models/WOLO_DETECT/RESULTS"
input_files = []

for file in os.listdir(input_path):
    if file.endswith(".pkl"):
        input_files.append(os.path.join(input_path,file))
        
input_files.sort()
print("Found {} evaluation files.".format(len(input_files)))
for d, dataset in enumerate(input_files):
    print(d, dataset)
    
use_state = 0

In [3]:
with open(input_files[use_state], 'rb') as f:
    data = pickle.load(f)

"""
- data[0][0]
- training_data_&_training_state

- - data[0][1][0]
- - threshold (for first dataset)
  
- - - data[0][1][1][0 1  2    3   4   5                  6] 
- - - dataset_name,   GT, TP, FN, FP, Average Precision, Recall
"""
#examples:

all_training_states = []

for elem in data:
    all_training_states.append(elem[0].split(".")[0].split("_")[-1])

all_training_states.sort()
print(all_training_states)

['last']


The goal is to retrieve the mean Average Precision (mAP) over 13 confidence thresholds ranging from 0.2 to 0.8, classifying a correct detection centre as being within 10% (of the image width) euclidean distance to a ground truth detection, disregarding multiple detections of the same object as they would be suppressed by non-maxmimum suppresion at run-time. We use this adjusted metric from the original, as the actual intersection over union is secondary to the agreement of centres, as different methods have been used to assign bounding boxes. Synthetically generated bounding boxes are defined as the smallest retangle including all projected 2D keypoints in the rendered images, whereas hand annotated bounding boxes are fixed, square detections, as a custom written centre tracking tool (BlenderMotionExport) was used to semi-automatically produce these datasets.

As an example we will plot the precision over recall for these 13 thresholds for the first snapshot of the imported data, and compute the mAP, as in the official [scikit learn implementation](https://github.com/scikit-learn/scikit-learn/blob/baf0ea25d/sklearn/metrics/_ranking.py#L111)

(m)AP summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold, with the increase in recall from the previous threshold used as the weight:

$${AP} = \sum_n (R_n - R_{n-1}) P_n$$
    
where `P_n` and `R_n` are the precision and recall at the nth threshold. Using decreasing threshold values, the Recall $R_{n-1}$ at the first threshold is set to 0 as when the threshold is maximal, no detections are returned. Therefore, with no positives returned, the precision $P_{n-1}$ is by definition equal to 1.

*This implementation is not interpolated and is different from computing the area under the precision-recall curve with the trapezoidal rule, which uses linear interpolation and can be too optimistic.*

**Note:** this implementation is restricted to the binary classification task or multilabel classification task.

In [4]:
def clean_dataset_name(file_name,verbose=False):
    """
    return the name of the dataset wihtout the split extension
    """
    base_name = os.path.basename(file_name)
    dataset_name = base_name.split("_RESULTS")[0]
    if verbose:
        print(dataset_name)
    return dataset_name

print("Found {} evaluation files.".format(len(input_files)))
    
all_nets_all_APs = []

current_AP_group = []
training_datasets = [clean_dataset_name(f) for f in input_files]

prev_dataset = clean_dataset_name(input_files[use_state])

for use_state in range(len(input_files)):
    with open(input_files[use_state], 'rb') as f:
        data = pickle.load(f)

    all_training_states = []

    for elem in data:
        all_training_states.append(elem[0].split(".")[0].split("_")[-1])

    all_training_states.sort()

    final_AP = []
    #print("\n",input_files[use_state])
    
    for dataset_idx in range(1,16):
        all_AP = []

        for model in data:
            curve_coords = np.zeros([len(model[1:]),2])
            AP = 0
            R_n = 0
            for e, elem in reversed(list(enumerate(model[1:]))):
                curve_coords[e] = [elem[dataset_idx][6],elem[dataset_idx][5]]
                AP += (elem[dataset_idx][6] - R_n) * elem[dataset_idx][5]
                R_n = elem[dataset_idx][6]

            #print("AP: {}\n".format(AP))
            all_AP.append([model[1][dataset_idx][0],model[0].split("\\")[-2],AP])

        all_AP.sort()
        final_AP.append(all_AP)
        
    all_nets_all_APs.append(final_AP)
    
output_AP = []

for model in all_nets_all_APs:
    print("\n\nAP scores for:", model[0][0][1])
    
    model_mAP = np.mean([i[0][2] for i in model])
    model_stdAP = np.std([i[0][2] for i in model])
    categories = [i[0][0][:-6] for i in model]
    
    print("mAP:", model_mAP, "+/-" ,model_stdAP,"\n")
    
    dataset_APs = []
    for dataset in model:
        print("dataset", dataset[0][0][:18], "  AP", dataset[0][2])
        dataset_APs.append(round(dataset[0][2],4))
        
    dataset_APs.append(round(model_mAP,4))
    dataset_APs.append(round(model_stdAP,4))
    
    output_AP.append(dataset_APs)
        
categories.append("mAP")
categories.append("sdtAP")

Found 14 evaluation files.


AP scores for: DETECT_MultiCamAnts-and-synth-all_20
mAP: 0.9538463510533615 +/- 0.04470586737401497 

dataset BROWN_FOREST_C920_   AP 0.892925
dataset BROWN_FOREST_DSLR_   AP 0.8865873128689287
dataset BROWN_FOREST_FLOOR   AP 0.9271953116967312
dataset DRY_LEAVES_BACKGRO   AP 0.991725
dataset DRY_LEAVES_C920_SY   AP 0.993575
dataset DRY_LEAVES_DSLR_SY   AP 0.996475
dataset DRY_SOIL_2023-04-1   AP 0.9257541277684742
dataset DRY_SOIL_C920_SYNC   AP 0.8839661125623457
dataset DRY_SOIL_DSLR_SYNC   AP 0.8956729505925504
dataset PLAIN_2023-04-10_1   AP 0.9994
dataset PLAIN_C920_SYNCHRO   AP 0.998999978729261
dataset PLAIN_DSLR_SYNCHRO   AP 0.99825
dataset PLAIN_FRAGMENTS_C9   AP 0.9681876370802855
dataset PLAIN_FRAGMENTS_DS   AP 0.9732480994430552
dataset PLAIN_LEAF_FRAGMEN   AP 0.9757337350587933


AP scores for: DETECT_MultiCamAnts-and-synth-all_5
mAP: 0.9659195182903877 +/- 0.03765518355759547 

dataset BROWN_FOREST_C920_   AP 0.9100247418084145
dataset BROWN_

In [5]:
final_dataframe = pd.DataFrame(output_AP, index = training_datasets, columns=categories)
final_dataframe

Unnamed: 0,BROWN_FOREST_C920_SYNCHRONISED,BROWN_FOREST_DSLR_SYNCHRONISED,BROWN_FOREST_FLOOR_2023-04-10_16-05-32-05S,DRY_LEAVES_BACKGROUND_2023-04-11_14-13-59-13S,DRY_LEAVES_C920_SYNCHRONISED,DRY_LEAVES_DSLR_SYNCHRONISED,DRY_SOIL_2023-04-12_12-20-03-20S,DRY_SOIL_C920_SYNCHRONISED,DRY_SOIL_DSLR_SYNCHRONISED,PLAIN_2023-04-10_13-29-27-29S,PLAIN_C920_SYNCHRONISED,PLAIN_DSLR_SYNCHRONISED,PLAIN_FRAGMENTS_C920_SYNCHRONISED,PLAIN_FRAGMENTS_DSLR_SYNCHRONISED,PLAIN_LEAF_FRAGMENTS_2023-04-11_12-12-23-12S,mAP,sdtAP
DETECT_MultiCamAnts-and-synth-all_20,0.8929,0.8866,0.9272,0.9917,0.9936,0.9965,0.9258,0.884,0.8957,0.9994,0.999,0.9982,0.9682,0.9732,0.9757,0.9538,0.0447
DETECT_MultiCamAnts-and-synth-all_5,0.91,0.9238,0.9437,0.9956,0.9994,0.9997,0.9149,0.9199,0.9129,0.9999,0.9999,1.0,0.9867,0.9939,0.9886,0.9659,0.0377
DETECT_MultiCamAnts-and-synth-simple_20,0.9101,0.9088,0.9538,0.9962,0.9963,0.9966,0.9457,0.9219,0.9127,0.9999,0.9998,0.999,0.9747,0.9802,0.9838,0.9653,0.0351
DETECT_MultiCamAnts-and-synth-simple_5,0.9264,0.9314,0.9655,0.9969,0.9977,0.9999,0.9666,0.946,0.9269,1.0,1.0,1.0,0.987,0.9909,0.9847,0.9747,0.0278
DETECT_MultiCamAnts-and-synth-standard_20,0.9228,0.9276,0.9462,0.9957,0.9958,0.9972,0.9013,0.9212,0.9078,0.9997,0.999,0.9998,0.9628,0.9623,0.9718,0.9607,0.0357
DETECT_MultiCamAnts-and-synth-standard_5,0.9056,0.9289,0.9623,0.9981,0.999,0.9983,0.968,0.9309,0.8889,1.0,1.0,1.0,0.9828,0.9827,0.9798,0.9684,0.0361
DETECT_MultiCamAnts_20,0.9126,0.9214,0.9558,0.9945,0.9986,0.9972,0.9674,0.9237,0.948,0.9997,0.9989,0.9998,0.9696,0.9659,0.9723,0.9684,0.0297
DETECT_MultiCamAnts_5,0.9315,0.9324,0.9534,0.9968,0.9984,1.0,0.9714,0.9427,0.9433,0.9999,0.9998,1.0,0.9908,0.9865,0.9778,0.975,0.026
DETECT_synth-all_20,0.13,0.3134,0.2636,0.5027,0.3521,0.5856,0.4144,0.3515,0.4934,0.8416,0.5953,0.8331,0.3781,0.7597,0.8098,0.5083,0.2161
DETECT_synth-all_5,0.2512,0.632,0.5038,0.6898,0.5565,0.8005,0.5503,0.4933,0.6157,0.8856,0.6753,0.8651,0.6179,0.8499,0.8808,0.6579,0.1723


In [6]:
custom_name = "results_mAP"

final_dataframe.to_csv(os.path.join(input_path,custom_name) + ".csv")

final_dataframe.to_hdf(
    os.path.join(input_path,custom_name) + ".h5",
    "df_with_missing",
    format="table",
    mode="w")
    