In [1]:
import numpy as np
import os
import json
import pandas as pd

from utils.evaluation import *
data_path="/home/emartini/nas/MAEVE/dataset/panoptic-toolbox/trtpose3D/"
sequence_name = "161029_sports1"

CONTINUOUS_STATE_PARTS = [
            "nose", "left_ear", "right_ear", "left_shoulder", "right_shoulder", 
            "left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_hip", "right_hip", "left_knee", 
            "right_knee", "left_ankle", "right_ankle", "neck"]

# Load the json for comparison

mapping = [12, 7, 10, 4,  5, 9, 6, 8, 11, 3, 14, 13]

header = ["frame_id"]+[CONTINUOUS_STATE_PARTS[m] for m in mapping]



In [2]:
# Load ground truth
GT = {}
with open(os.path.join(data_path,sequence_name+".gt.json"), "r") as f:
    ground_truth = json.load(f)
    for frame in ground_truth:
        GT[frame["frame_id"]] = frame        

In [16]:
# Baseline (compute HOTA, MOTA, and MOTP)
cameras = [6,7,8,9,10] # average the value across all cameras
# cameras = [6] # average the value across all cameras
cameras_results = []
for camera in cameras:
    print("CAM",camera)
    # Load camera file
    DUT = {}
    with open(os.path.join(data_path,sequence_name+"."+ str(camera) +".json"), "r") as f:
        file = json.load(f)
    for frame in file:
        DUT[frame["frame_id"]] = frame    
    
    ## Build the triple nested list (shape: n_frames, n_people, n_joints, 3) and IDs (shape: n_frames, n_people)
    # Set unique ids (union of both gt and camera)
    ids = list(GT.keys())
    for frame_dut in list(DUT.keys()):
        if frame_dut not in ids:
            ids.append(frame_dut) 
            
    predicted_keypoints = []
    predicted_ids = []
    ground_truth_keypoints = []
    ground_truth_ids = []
    for id in sorted(ids):
        predicted_keypoints_per_frame = []
        predicted_ids_per_frame = []
        ground_truth_keypoints_per_frame = []
        ground_truth_ids_per_frame = []
        if id in GT.keys():
            for pp in GT[id]['continuousState']:
                s = np.array([ [np.nan,np.nan,np.nan] if not f else f for f in pp])
                s = s[mapping,:]
                ground_truth_keypoints_per_frame.append(s)
            for i in GT[id]['track_ids']:
                ground_truth_ids_per_frame.append(id)
        if id in DUT.keys():
            for i,pp in enumerate(DUT[id]['continuousState']):
                s = np.array([ [np.nan,np.nan,np.nan] if not f else f for f in pp])
                s = s[mapping,:]
                if not np.isnan(s).all():
                    predicted_keypoints_per_frame.append(s)
                    predicted_ids_per_frame.append(i)

            # This doesn't work for cameras
            # for id in DUT[id]['track_ids']:
            #     predicted_ids_per_frame.append(id)
            
        predicted_keypoints.append(predicted_keypoints_per_frame)
        predicted_ids.append(predicted_ids_per_frame)
        ground_truth_keypoints.append(ground_truth_keypoints_per_frame)
        ground_truth_ids.append(ground_truth_ids_per_frame)
    
    hota = hota_3d(predicted_keypoints, predicted_ids, ground_truth_keypoints, ground_truth_ids,distance_threshold=0.25)
    mota,motp = mota_motp(predicted_keypoints, predicted_ids, ground_truth_keypoints, ground_truth_ids,distance_threshold=0.25)
    print("HOTA:",hota,'\t',"MOTA:",round(mota,1),'\t',"MOTP:",round(motp,2))
    cameras_results.append([hota,mota,motp])
print(np.mean(np.array(cameras_results),axis=0))

CAM 6


HOTA: 0.0 	 MOTA: 0.1 	 MOTP: 0.14
CAM 7
HOTA: 0.0 	 MOTA: -0.0 	 MOTP: 0.16
CAM 8
HOTA: 0.0 	 MOTA: -0.1 	 MOTP: 0.17
CAM 9


KeyboardInterrupt: 

In [18]:
# Compared methods (HOTA, MOTA, and MOTP)
# cameras = [6,7,8,9,10] # average the value across all cameras
cameras = [6,7] # average the value across all cameras
method = "cometh"

result = []
print("CAM",camera)
# Load camera file
DUT = {}
with open(os.path.join(data_path,sequence_name+"."+ method+"." +".".join(map(str, cameras)) +".json" ), "r") as f:
    file = json.load(f)
for frame in file:
    DUT[frame["frame_id"]] = frame    

## Build the triple nested list (shape: n_frames, n_people, n_joints, 3) and IDs (shape: n_frames, n_people)
# Set unique ids (union of both gt and camera)
ids = list(GT.keys())
for frame_dut in list(DUT.keys()):
    if frame_dut not in ids:
        ids.append(frame_dut) 
        
predicted_keypoints = []
predicted_ids = []
ground_truth_keypoints = []
ground_truth_ids = []
for id in sorted(ids):
    predicted_keypoints_per_frame = []
    predicted_ids_per_frame = []
    ground_truth_keypoints_per_frame = []
    ground_truth_ids_per_frame = []
    if id in GT.keys():
        for pp in GT[id]['continuousState']:
            s = np.array([ [np.nan,np.nan,np.nan] if not f else f for f in pp])
            s = s[mapping,:]
            ground_truth_keypoints_per_frame.append(s)
        for i in GT[id]['track_ids']:
            ground_truth_ids_per_frame.append(id)
    if id in DUT.keys():
        for i,pp in enumerate(DUT[id]['continuousState']):
            s = np.array([ [np.nan,np.nan,np.nan] if not f else f for f in pp])
            s = s[mapping,:]
            if not np.isnan(s).all():
                predicted_keypoints_per_frame.append(s)
                # predicted_ids_per_frame.append(i)

        # This doesn't work for cameras
        for id in DUT[id]['track_ids']:
            predicted_ids_per_frame.append(id)
        
    predicted_keypoints.append(predicted_keypoints_per_frame)
    predicted_ids.append(predicted_ids_per_frame)
    ground_truth_keypoints.append(ground_truth_keypoints_per_frame)
    ground_truth_ids.append(ground_truth_ids_per_frame)

hota = hota_3d(predicted_keypoints, predicted_ids, ground_truth_keypoints, ground_truth_ids,distance_threshold=0.25)
mota,motp = mota_motp(predicted_keypoints, predicted_ids, ground_truth_keypoints, ground_truth_ids,distance_threshold=0.25)
print("HOTA:",hota,'\t',"MOTA:",round(mota,1),'\t',"MOTP:",round(motp,2))
# cameras_results.append([hota,mota,motp])
# print(np.mean(np.array(cameras_results),axis=0))

CAM 9
HOTA: 0.0 	 MOTA: -0.8 	 MOTP: 0.21


In [None]:
# Elaborates the tables
for camera in cameras_results:
    print(cameras_results[camera].median())