In [None]:
import os
import pandas as pd
import json
import networkx as nx 
import matplotlib.pyplot as plt 
import itertools
import math
import numpy as np

In [None]:
POSE_DIR = './raw_data/UBnormal/poses_stg'

In [None]:
# Source https://github.com/MVIG-SJTU/AlphaPose/blob/master/docs/output.md
raw_order = [
    {0,  "Nose"},
    {1,  "LEye"},
    {2,  "REye"},
    {3,  "LEar"},
    {4,  "REar"},
    {5,  "LShoulder"},
    {6,  "RShoulder"},
    {7,  "LElbow"},
    {8,  "RElbow"},
    {9,  "LWrist"},
    {10, "RWrist"},
    {11, "LHip"},
    {12, "RHip"},
    {13, "LKnee"},
    {14, "Rknee"},
    {15, "LAnkle"},
    {16, "RAnkle"},
]
order = []
for s in raw_order:
    for item in s:
        if isinstance(item, str):
            order.append(item)
            break

In [None]:
def get_files_with_suffix(directory: str, suffix: str):
    """Function that extracts the files from the given directory
    that ends with the suffix
    """
    filelist = [];
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(suffix):
                json_file = os.path.join(root,file)
                filelist.append(json_file)
    return filelist

def generate_pose_df(pose_dir: str):
    """Load all poses into a single dataframe

    Args:
        pose_dir (str): Directory where the pose files are stored

    Returns:
        dataframe: Dataframe containing the pose data.
    """
    dfs = []
    json_files = get_files_with_suffix(pose_dir, 'alphapose_tracked_person.json')
    for file in json_files:
        video_name = '_'.join(file.split('/')[-1].split('_')[:-3])
        with open(file) as f:
            tracking_dict = json.load(f)
        pose_data = []
        for obj_id, frames in tracking_dict.items():
            for frame_id, kp_data in frames.items():
                pose_data.append(
                    {
                        'video': video_name,
                        'image_id': frame_id,
                        'keypoints': kp_data['keypoints'],
                        'score': kp_data['scores']
                    }
                )
        if not pose_data:
            print(f'No pose data found for {video_name}')
        else:
            df = pd.DataFrame(pose_data)
            df = df[['video', 'image_id', 'keypoints', 'score']]
            dfs.append(df)
    df_overview = pd.concat(dfs, ignore_index=True)
    return df_overview

def get_statistics(arr: list) -> dict:
    """Calculate the quartiles, variance, number of items and number of low scoring items
    for the given array

    Args:
        arr (list): List of scores or confidence values

    Returns:
        dict: Dictionary containing the statistics
    """
    arr = np.array(arr)
    quatiles = {}
    for item in (0.25,0.50, 0.75):
        quatiles[item] = np.quantile(arr, item)
    
    # Get number of outliers in boxplot
    # Source: https://www.geeksforgeeks.org/finding-the-outlier-points-from-matplotlib/
    iqr = quatiles[0.75]-quatiles[0.75]
    upper_bound = quatiles[0.75]+(1.5*iqr)
    lower_bound = quatiles[0.25]-(1.5*iqr)
    n_low_scoring_items = len(arr[arr <= lower_bound])

    statistics = {
        'quatiles': quatiles,
        'upper_bound': upper_bound,
        'lower_bound': lower_bound,
        'variance': np.var(arr),
        'n_items': len(arr),
        'n_low_scoring_items': n_low_scoring_items
    }
    return statistics

In [None]:
df_overview = generate_pose_df(POSE_DIR)

In [None]:
# Show score statistics
print(get_statistics(df_overview.score.to_list()))
df_overview.boxplot('score')
plt.show()

In [None]:
# Show confidence statistics
confidences = []
for kp in df_overview.keypoints.to_list():
    for i in range(17):
        c_index = 2 + (i * 3)
        confidences.append(kp[c_index])

print(get_statistics(confidences))
plt.boxplot(confidences)
plt.show()

In [None]:
def generate_tracking_df(pose_dir):
    json_files = get_files_with_suffix(POSE_DIR, 'alphapose_tracked_person.json')
    tracking_records = []
    for json_file in json_files:
        video_name = '_'.join(json_file.split('/')[-1].split('_')[:5])
        with open(json_file) as f:
            tracking_dict = json.load(f)
        for obj_id, frame_data in tracking_dict.items():
            tracking_item = {
                'video': video_name,
                'obj_id': obj_id,
                'n_frames': len(frame_data.keys())
            }
            tracking_records.append(tracking_item)
    return pd.DataFrame(tracking_records)
df_tracking = generate_tracking_df(POSE_DIR)

In [None]:
print('Total number of tracked objects:', df_tracking.shape[0])
print('Number of tracked objects with less than 12 frames:', df_tracking[df_tracking.n_frames < 12].shape[0])
df_tracking['n_frames'].plot.hist(bins=500//12)
plt.show()