In [14]:
import pandas as pd
import os
import numpy as np
from src.tracker.data_track import load_detection_from_txt
sequence_dir = "results\\tracker\\07-04-2022_18-30\\MOT16-02"
events_path = os.path.join(sequence_dir, "events.csv")
events_df = pd.read_csv(events_path)
track_path = os.path.join(sequence_dir, "track.txt")
track = load_detection_from_txt(track_path, vis_threshold=0.0, mode="track")


from src.tracker.data_track_precomputed import MOT16SequencePrecomputed
from src.tracker.tracker import MyTracker
import json
import argparse
eval_config_path = os.path.join(
    os.path.dirname(sequence_dir), "eval_config.json"
)
with open(eval_config_path, "r") as f:
    eval_config = json.load(f)
args = argparse.Namespace(**eval_config)

with open(args.tracker_config_path, "r") as f:
    tracker_hyperparams = json.load(f)

tracker = MyTracker.from_config(tracker_hyperparams)

seq_name = os.path.basename(sequence_dir)
sequence = MOT16SequencePrecomputed(
    seq_name=seq_name,
    original_data_root_dir=args.original_data_root_dir,
    precomputed_seq_dir=os.path.join(
        args.precomputed_data_root_dir, seq_name
    ),
    vis_threshold=args.vis_threshold,
    return_det_segmentation=tracker.assign_model.use_segmentation,
    return_gt_segmentation=tracker.assign_model.use_segmentation,
    return_statistical_info=True
)


In [15]:
from src.tracker.tracking_analysis import find_fp, find_tp, find_misses, find_switches

fp_sequences = find_fp(events_df)
num_fp = len([idx for seq in fp_sequences for idx in seq])

miss_sequences, last_match_sequences = find_misses(events_df)
not_empty_last_match_sequences = [seq for seq in last_match_sequences if seq]
lost_track_sequences = [miss_seq for (miss_seq, last_match_seq) in zip(miss_sequences, last_match_sequences) if last_match_seq]
inherent_miss_sequences = [miss_seq for (miss_seq, last_match_seq) in zip(miss_sequences, last_match_sequences) if not last_match_seq]
num_miss = len([idx for seq in miss_sequences for idx in seq])


switch_idxs, last_match_idxs, ascend_bool, active_switch_bool = find_switches(events_df)
num_switch = len(switch_idxs)
num_ascend = len([boolean for boolean in ascend_bool if boolean])
num_ascend / num_switch
num_active_switch = len([boolean for boolean in active_switch_bool if boolean])
num_active_switch / num_switch

num_hypotheses = len(events_df.dropna(subset="HId"))
num_objects = len(events_df.dropna(subset="OId"))
num_tp = num_hypotheses - num_fp 
precision = num_tp / (num_tp + num_fp)
recall = num_tp / (num_tp + num_miss)
rel_switch = num_switch / num_tp


In [3]:
def split_sequences_by_visibility(sequence, events_df, idx_sequences, vis_threshold):
    split_sequences ={"visible":[], "invisible":[]}
    for idxs in idx_sequences:
        vis = item_of_mot_event(sequence, events_df, idxs[0], key="vis")
        if vis > vis_threshold:
            split_sequences["visible"].append(idxs)
        else:
            split_sequences["invisible"].append(idxs)
    return split_sequences

def item_of_mot_event(sequence, events_df, df_idx, key="vis"):
    frame_id = events_df.loc[df_idx, "FrameId"]
    oid = events_df.loc[df_idx, "OId"]
    if key == "img":
        item = sequence[frame_id][key]
    else:
        item = sequence[frame_id][key][oid]
    return item


def miss_analysis_chart(sequence, events_df, lost_track_sequences, inherent_miss_sequences, vis_threshold=0.0, count_event="per_sequence"):    
    """
    count_event

        "per_sequence"
        One way is to count a sequence of misses on one object as one single miss. 
        This is useful in terms of error reason analysis.

        "per_frame"
        The other way is to count a sequence of misses multiplied by the length of the sequence.
        This is less useful, but it corresponds to the MOTA metric, which counts events per frame.
    """

    lost_track_sequences_split = split_sequences_by_visibility(sequence, events_df, lost_track_sequences, vis_threshold)
    inherent_miss_sequences_split = split_sequences_by_visibility(sequence, events_df, inherent_miss_sequences, vis_threshold)
    if count_event == "per_sequence":
        num_lost_track_miss_vis = len(lost_track_sequences_split["visible"])
        num_lost_track_miss_invis = len(lost_track_sequences_split["invisible"])
        num_inherent_miss_vis = len(inherent_miss_sequences_split["visible"])
        num_inherent_miss_invis = len(inherent_miss_sequences_split["invisible"])

    elif count_event == "per_frame":
        num_lost_track_miss_vis = len([idx for seq in lost_track_sequences_split["visible"] for idx in seq])
        num_lost_track_miss_invis = len([idx for seq in lost_track_sequences_split["invisible"] for idx in seq])
        num_inherent_miss_vis = len([idx for seq in inherent_miss_sequences_split["visible"] for idx in seq])
        num_inherent_miss_invis = len([idx for seq in inherent_miss_sequences_split["invisible"] for idx in seq])


    miss_df = pd.DataFrame(
        index=["visible", "invisible"], 
        columns=["inherent_miss", "lost_track_miss"]
    )
    miss_df.loc["visible", "inherent_miss"] = num_inherent_miss_vis
    miss_df.loc["invisible", "inherent_miss"] = num_inherent_miss_invis
    miss_df.loc["visible", "lost_track_miss"] = num_lost_track_miss_vis
    miss_df.loc["invisible", "lost_track_miss"] = num_lost_track_miss_invis
    miss_df.loc[:, "total"] = miss_df.sum(axis=1)
    miss_df.loc["total", :] = miss_df.sum(axis=0)
    return miss_df


In [79]:
# contrast and luminosity only need to be looked at if the visbility is high enough

from collections import defaultdict


def get_sequences_items(sequence, events_df, idx_sequences, keys=["vis", "contrast", "luminosity", "area"]):
    items = {key : [] for key in keys}
    for idxs in idx_sequences:
        first_idx = idxs[0]
        for key in keys:
            item = item_of_mot_event(sequence, events_df, first_idx, key=key)
            items[key].append(item) 
    return items

def split_items_by_vis(items, vis_tresholds = [0.0, 0.1, 0.3, 0.7, 1.0]):
    items_per_vis = defaultdict(dict)
    for i in range(len(vis_tresholds)-1):
        mask = np.logical_and(np.array(items["vis"]) > vis_tresholds[i], np.array(items["vis"]) < vis_tresholds[i+1])
        for key in items.keys():
            items[key] = np.array(items[key])
            items_per_vis[vis_tresholds[i+1]][key] = items[key][mask]
    return items_per_vis

def split_item_pairs_by_vis(item_pairs, vis_tresholds = [0.0, 0.1, 0.3, 0.7, 1.0]):
    """
    item_pairs: tuple
    """
    split_item_pairs = []
    

    for items in item_pairs:
        items_per_vis = defaultdict(dict)
        for i in range(len(vis_tresholds)-1):
            mask = np.logical_and(
                np.array(item_pairs[0]["vis"]) > vis_tresholds[i], 
                np.array(item_pairs[0]["vis"]) < vis_tresholds[i+1]
            )
            
            for key in items.keys():
                items[key] = np.array(items[key])
                items_per_vis[vis_tresholds[i+1]][key] = items[key][mask]
        split_item_pairs.append(items_per_vis)
    return split_item_pairs



# Statistical Error Analysis

In order to win the MOT16 tracking challenge, the author of this repo decides first execute a statistical error analysis, in order to identify the opportunities to improve tracking performance.

### WHAT kind of errors happen
The MOT16 challenge primarily uses the MOTA metric to rank candidates. 
MOTA is an accumulation of errors of the following categories.

| MOT16 Events | description|
|-|-|
| MATCH | A match between a object and hypothesis was found
| SWITCH | A match between a object and hypothesis was found but differs from previous assignment. <br> This is only counted, if one object is assigned to multiple hypothesis ids. It is not counted as a mistake, if the opposite happens, when one hypothesis consists of multiple objects.
| MISS |  No match for an object was found
| FP | No match for an hypothesis was found (spurious detections)

### WHEN does the error happen
The first step to identify potential for tracker improvement, is to catch the specific part of the tracker, that is responsible for the error. The author of this repo decides to further split the above defined events, such that they become more useful for error analysis.

### HOW OFTEN does the error happen

In [None]:
miss_analysis_chart(sequence, events_df, lost_track_sequences, inherent_miss_sequences, vis_threshold=0.0, count_event="per_sequence")

## **WHY does the error happen**
### **Motivation**
The endgoal of this analysis is fix the error causes. In order to achieve the author of this repo decides to find ways to make detection misses predictable. This can be achieved by correlation analysis of missed detections with apriori quantifiable variables. If strong correlations between missed detections and apriori quantifiable variables can be established, the tracker can use alternative detection methods for those specific cases.<br>
### **t-test**



In [None]:
from scipy.stats import ttest_ind
from collections import defaultdict
from scipy.stats import ttest_rel


def do_t_test(items_group_a, items_group_b, alpha=0.2, alternative='two-sided', dependent_groups=False):
    """
    Arguments
    ---------
    items_group_a
        dict of group a, each key contains the values of a different variable  

    items_group_b
        dict of group b, each key contains the values of a different variable  

    Interpretation
    -----------------------
    t > 0: mean(a) > mean(b)
    t < 0: mean(a) < mean(b)
    """
    t_test_dict = defaultdict(dict)
    for (key, item_group_a), (_, item_group_b) in zip(items_group_a.items(), items_group_b.items()):
        if dependent_groups:
            t, p = ttest_rel(item_group_a, item_group_b, alternative=alternative)   
        else:
            t, p = ttest_ind(item_group_a, item_group_b, alternative=alternative)
        t_test_dict[key]["t"] = t
        t_test_dict[key]["p"] = p

        if alternative == 'two-sided':
            thresh = alpha / 2
        else: 
            thresh = alpha
            
        t_test_dict[key]["significant"] = p < thresh
    return pd.DataFrame(t_test_dict)

### Dependent group t-test: Miss

In [41]:
last_match_items = get_sequences_items(sequence, events_df, idx_sequences=not_empty_last_match_sequences)
lost_track_items = get_sequences_items(sequence, events_df, idx_sequences=lost_track_sequences)
do_t_test(last_match_items, lost_track_items, dependent_groups=True)

In [89]:
last_match_items_by_vis, lost_track_items_by_vis = split_item_pairs_by_vis([last_match_items, lost_track_items])
do_t_test(last_match_items_by_vis[1], lost_track_items_by_vis[1], dependent_groups=True)

Unnamed: 0,vis,contrast,luminosity,area
t,8.349743,2.622564,0.870366,1.082307
p,0.001125,0.058641,0.433217,0.340001
significant,True,True,False,False


### Indepencent Group t-test: Miss

In [90]:
tp_items = get_sequences_items(sequence, events_df, idx_sequences=find_tp(events_df))
miss_items = get_sequences_items(sequence, events_df, idx_sequences=miss_sequences)
do_t_test(tp_items, miss_items, dependent_groups=False)

KeyboardInterrupt: 

### Independent Group Test: FP


In [None]:
fp_sequences

In [51]:
tp_items_by_vis = split_items_by_vis(tp_items)
miss_items_by_vis = split_items_by_vis(miss_items)

NameError: name 'tp_items' is not defined

### Correlation Analysis
- visibility-area<br> The correlation between visibility

In [191]:
miss_correlation_df = pd.DataFrame(
    index=miss_items.keys(), 
    columns=miss_items.keys(), 
    data=np.corrcoef(list(miss_items.values())
))
miss_correlation_df

Unnamed: 0,vis,contrast,luminosity,area
vis,1.0,-0.002548,0.124465,0.247516
contrast,-0.002548,1.0,0.810561,-0.311167
luminosity,0.124465,0.810561,1.0,-0.22994
area,0.247516,-0.311167,-0.22994,1.0


In [190]:
tp_correlation_df = pd.DataFrame(
    index=tp_items.keys(), 
    columns=tp_items.keys(), 
    data=np.corrcoef(list(tp_items.values())
    ))
tp_correlation_df

Unnamed: 0,vis,contrast,luminosity,area
vis,1.0,-0.094326,0.032686,0.248366
contrast,-0.094326,1.0,0.837481,-0.332502
luminosity,0.032686,0.837481,1.0,-0.241056
area,0.248366,-0.332502,-0.241056,1.0
