# Log Paper Results

In [1]:
import csv, os, sys, re, string, json, glob, shutil, random, datetime, math

import cv2
import pandas as pd
import numpy as np
import pprint

from IPython.display import HTML as html_print
from IPython.display import Markdown
from IPython.display import clear_output

# from IPython.display import Image
from PIL import Image, ImageDraw, ImageFont
from ipywidgets import Video

import torch
from transformers import AutoTokenizer
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize

import warnings
warnings.filterwarnings('ignore')

import seaborn as sns
sns.set()
from matplotlib import pyplot as plt
import matplotlib.patches as patches
%pylab inline

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib


In [2]:
# Use our own built-in customized COCO API.
sys.path.insert(0, "../cocoapi")

In [3]:
# from pycocotools.coco import COCO
# from pycocotools.cocoeval import COCOeval
from PythonAPI.pycocotools.coco import COCO
from PythonAPI.pycocotools.cocoeval import COCOeval

import inspect
print("COCO API at:     {}".format(inspect.getfile(COCO)))
print("COCOeval API at: {}".format(inspect.getfile(COCOeval)))

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%reload_ext autoreload
%autoreload 2

COCO API at:     /home/telinwu/research/project_jarvis/modeling/../cocoapi/PythonAPI/pycocotools/coco.py
COCOeval API at: /home/telinwu/research/project_jarvis/modeling/../cocoapi/PythonAPI/pycocotools/cocoeval.py


In [4]:
from coco_eval import calc_iou_individual

# Functions

In [5]:
def create_coco_gt_pred_dict(
    coco_gt_file,
    coco_pred_file,
    compensate_missing_pred=False,
):
    preds = json.load(open(coco_pred_file))
    gts = json.load(open(coco_gt_file))["annotations"]

    gt_dict = {}
    for gt in gts:
        image_id = gt["image_id"]
        if image_id not in gt_dict:
            gt_dict[image_id] = []
        gt_dict[image_id].append(gt["bbox"])
    print("Num GT Instances: {}".format(len(gt_dict)))

    preds_dict = {}
    for pred in preds:
        image_id = pred["image_id"]
        if image_id not in preds_dict:
            preds_dict[image_id] = []
        preds_dict[image_id].append({
            "bbox": pred["bbox"],
            "score": pred["score"],
            "category_id": pred["category_id"],
            "image_id": pred["image_id"],
        })
    print("Num Pred Instances: {}".format(len(preds_dict)))
    
    assert len(gt_dict) >= len(preds_dict), (
        "More prediction ids than gt ids, sure the correct file(s)?"
    )

    joint_dict = {}
    for image_id in sorted(gt_dict):
        if compensate_missing_pred and image_id not in preds_dict:
            preds_dict[image_id] = [{
                "bbox": [0, 0, 1, 1],
                "score": 1.0,
                "category_id": 1,
                "image_id": image_id,
            }]
        preds_dict[image_id] = sorted(
            preds_dict[image_id],
            key=lambda x: x["score"],
            reverse=True,
        )
        joint_dict[image_id] = {
            "gt_bboxes": gt_dict[image_id],
            "pred_bboxes": preds_dict[image_id],
        }
    
    print("Num Joint Instances: {}".format(len(joint_dict)))
    return joint_dict


def get_ndarray_coco_results(coco_pred_file):
    # [Nx7] where each row contains {imageID,x1,y1,w,h,score,class}
    if type(coco_pred_file) == str:
        preds = json.load(open(coco_pred_file))
    else:
        preds = coco_pred_file
    arrs = []
    for pred in preds:
        image_id = pred["image_id"]
        x1 = pred["bbox"][0]
        y1 = pred["bbox"][1]
        w  = pred["bbox"][2]
        h  = pred["bbox"][3]
        score = pred["score"]
        label = pred["category_id"]
        arr = [image_id, x1, y1, w, h, score, label]
        arrs.append(arr)
    return np.asarray(arrs)

In [6]:
def get_per_frame_type_ndarray_coco_results(
    coco_gt_file,
    coco_pred_file,
    scod_clips,
    forced_frame_types=None,
    fully_narrated_image_ids=None,
    verbose=False
):
    if type(coco_pred_file) == str:
        preds = json.load(open(coco_pred_file))
    else:
        preds = coco_pred_file
    if fully_narrated_image_ids is not None:
        preds = [x for x in preds if x["image_id"] in fully_narrated_image_ids]
        
    if type(coco_gt_file) == str:
        gt_data = json.load(open(coco_gt_file))
        gts, anns = gt_data["images"], gt_data["annotations"]
    else:
        gts, anns = coco_gt_file
    if fully_narrated_image_ids is not None:
        gts = [x for x in gts if x["id"] in fully_narrated_image_ids]
        anns = [x for x in anns if x["image_id"] in fully_narrated_image_ids]
        
    frame_type_dict = {}
    for scod in scod_clips:
        video_uid = scod["video_uid"]
        for fr in ["pre", "pnr", "post"]:
            frame_type = "{}_frame".format(fr)
            if frame_type not in scod:
                continue
            frame_num = scod[frame_type]["frame_number"]
            key = "{}_{}".format(video_uid, frame_num)
            if key not in frame_type_dict:
                frame_type_dict[key] = []
            if frame_type not in frame_type_dict[key]:
                frame_type_dict[key].append(frame_type)
            pass
        pass

    id_to_frame_mappings = {}
    for gt in gts:
        file_name = gt["file_name"].split(".")[0]
        video_uid, frame_num = file_name.split("/")
        key = "{}_{}".format(video_uid, frame_num)
        if key not in frame_type_dict:
            raise ValueError(
                "Key {} not in scod clips, sure the right file(s)?".format(key)
            )
        image_id = gt["id"]
        if image_id not in id_to_frame_mappings:
            id_to_frame_mappings[image_id] = []
        id_to_frame_mappings[image_id] += frame_type_dict[key]
        
    per_frame_type_anns = {}
    for ann in anns:
        image_id = ann["image_id"]
        frame_types = id_to_frame_mappings[image_id]
        for frame_type in frame_types:
            if frame_type not in per_frame_type_anns:
                per_frame_type_anns[frame_type] = []
            per_frame_type_anns[frame_type].append(ann)
        pass
    
    per_frame_type_preds = {}
    for pred in preds:
        image_id = pred["image_id"]
        frame_types = id_to_frame_mappings[image_id]
        for frame_type in frame_types:
            if frame_type not in per_frame_type_preds:
                per_frame_type_preds[frame_type] = []
            per_frame_type_preds[frame_type].append(pred)
        pass
    
    per_frame_type_pred_ndarrays = {}
    for fr in ["pre", "pnr", "post"]:
        frame_type = "{}_frame".format(fr)
        if forced_frame_types is not None and fr not in forced_frame_types:
            continue
        per_frame_type_pred_ndarrays[frame_type] = get_ndarray_coco_results(
            per_frame_type_preds[frame_type]
        )
        if verbose:
            print("{}'s cnt = {}".format(frame_type, len(per_frame_type_preds[frame_type])))
    
    return per_frame_type_pred_ndarrays, per_frame_type_anns

In [221]:
def check_gt_pred_match_on_file(
    coco_gt_file,
    coco_pred_file,
):
    # print("Checking GT and Pred files matching...")
    if type(coco_gt_file) is str:
        coco_gt_data = json.load(open(coco_gt_file))
    else:
        coco_gt_data = coco_gt_file
    if type(coco_pred_file) is str:
        coco_pred_data = json.load(open(coco_pred_file))
    elif type(coco_pred_file) == np.ndarray:
        _coco = COCO(verbose=False)
        coco_pred_data = _coco.loadNumpyAnnotations(coco_pred_file)
    else:
        coco_pred_data = coco_pred_file
    gt_image_ids = {x["id"]: True for x in coco_gt_data["images"]}
    pred_image_ids = [x["image_id"] for x in coco_pred_data]
    pred_image_ids = list(set(pred_image_ids))
    for _id in pred_image_ids:
        if _id not in gt_image_ids:
            raise ValueError(
                "\nGT file: {}\nPred file: {}\nThey do not match!?".format(coco_gt_file, coco_pred_file))
        pass
    # print("Checking complete!")
    pass  # Passed.


def naive_ego4d_scod_coco_results(
    coco_gt_file,
    coco_pred_file,
    original_coco_gt_file=None,
    top_k=None,
    ignore_summaries=None,
    do_not_summarize=False,
    verbose=False,
    iou_stats=False,
):  
    if original_coco_gt_file is not None:
        coco_gt = COCO(annotation_file=original_coco_gt_file, verbose=verbose)
        coco_gt = coco_gt.loadRes(coco_gt_file)
        check_gt_pred_match_on_file(original_coco_gt_file, coco_pred_file)
    else:
        coco_gt = COCO(annotation_file=coco_gt_file, verbose=verbose)
        check_gt_pred_match_on_file(coco_gt_file, coco_pred_file)
    coco_dt = coco_gt.loadRes(coco_pred_file)
    coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
    coco_eval.params.verbose = verbose
    if top_k is not None:
        coco_eval.params.maxDets = [top_k, top_k, top_k]
    coco_eval.evaluate()
    coco_eval.accumulate()
    if not do_not_summarize:
        coco_eval.summarize(ignore_summaries=ignore_summaries)
        perfs = coco_eval.stats
    if iou_stats:
        ious = [np.asarray(coco_eval.ious[key]).flatten() for key in coco_eval.ious]
        new_ious = []
        for x in ious:
            if len(x) <= 0:
                continue
            else:
                new_ious += x.tolist()
        ious = new_ious
        print("Number of ious: {}".format(len(ious)))
        avg_ious = np.mean(ious)
        std_ious = np.std(ious)
        print("Mean-IOUs: {}  Std-IOUs: {}".format(avg_ious, std_ious))
        hist = np.histogram(ious, bins=[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
        print(hist[1])
        print(hist[0])
        print(hist[0]/np.sum(hist[0])*100)
    return coco_eval, perfs


def comprehensive_ego4d_scod_coco_results(
    coco_gt_file,
    coco_pred_file,
    scod_clips,
    top_k=None,
    forced_frame_types=None,
    ignore_summaries=None,
    return_eval_dicts=False,
    separate_classes=False,
    fully_narrated=False,
    verbose=False,
    markdown=False,
    iou_stats=False,
):
    object_type_dict = {}
    coco_gt_data = json.load(open(coco_gt_file))
    object_classes = coco_gt_data["categories"]
    fully_narrated_image_ids = None
    if fully_narrated:
        fully_narrated_image_ids = [x["id"] for x in coco_gt_data["images"]
                                    if "object_of_change" not in x["caption"]]
        print("Fully Narrated Image Counts: {} / {}".format(len(fully_narrated_image_ids),
                                                            len(coco_gt_data["images"])))
    for object_class in object_classes:
        object_id = int(object_class["id"])
        object_name = object_class["name"]
        object_type_dict[object_id] = object_name
    
    per_frame_type_pred_ndarrays, per_frame_type_anns = get_per_frame_type_ndarray_coco_results(
        coco_gt_file=coco_gt_file,
        coco_pred_file=coco_pred_file,
        scod_clips=scod_clips,
        forced_frame_types=forced_frame_types,
        fully_narrated_image_ids=fully_narrated_image_ids,
        verbose=verbose,
    )
    
    all_coco_evals = {}
    all_coco_gts = {}
    
    if return_eval_dicts:
        original_coco_gt = json.load(open(coco_gt_file))
        
    if not separate_classes:
        all_perfs = {"OOC and/or Tool": {}}
    else:
        all_perfs = {}
        for object_class in object_classes:
            object_name = object_class["name"]
            all_perfs[object_name] = {}
    
    for fr in ["pre", "pnr", "post"]:
        frame_type = "{}_frame".format(fr)
        if frame_type not in per_frame_type_pred_ndarrays:
            continue
        if fr != "pre":
            print()
        print("---------- {} ----------".format(frame_type))
        curr_frame_type_anns = per_frame_type_anns[frame_type]
        curr_frame_type_pred_ndarrays = per_frame_type_pred_ndarrays[frame_type]

        if separate_classes:
            for object_id in sorted(object_type_dict):
                object_id_indices = np.where(curr_frame_type_pred_ndarrays[:, -1]==object_id)
                object_id_frame_type_pred_ndarrays = curr_frame_type_pred_ndarrays[object_id_indices]
                object_id_frame_type_anns = [ann for ann in curr_frame_type_anns if ann["category_id"] == object_id]
                print("----- {} -----".format(object_type_dict[object_id]))
                _, perfs = naive_ego4d_scod_coco_results(
                    coco_gt_file=object_id_frame_type_anns,
                    coco_pred_file=object_id_frame_type_pred_ndarrays,
                    original_coco_gt_file=coco_gt_file,
                    top_k=top_k,
                    ignore_summaries=ignore_summaries,
                    verbose=verbose,
                )
                ap, ap50, ap75 = perfs[0], perfs[1], perfs[2]
                all_perfs[object_type_dict[object_id]][frame_type] = {
                    "AP": ap, "AP50": ap50, "AP75": ap75,
                }
            # Simply for the eval dict.
            if return_eval_dicts:
                curr_coco_eval, _ = naive_ego4d_scod_coco_results(
                    coco_gt_file=curr_frame_type_anns,
                    coco_pred_file=curr_frame_type_pred_ndarrays,
                    original_coco_gt_file=coco_gt_file,
                    top_k=top_k,
                    ignore_summaries=ignore_summaries,
                    do_not_summarize=True,
                    verbose=verbose,
                    iou_stats=iou_stats,
                )
        else:
            curr_coco_eval, perfs = naive_ego4d_scod_coco_results(
                coco_gt_file=curr_frame_type_anns,
                coco_pred_file=curr_frame_type_pred_ndarrays,
                original_coco_gt_file=coco_gt_file,
                top_k=top_k,
                ignore_summaries=ignore_summaries,
                verbose=verbose,
                iou_stats=iou_stats,
            )
            ap, ap50, ap75 = perfs[0], perfs[1], perfs[2]
            all_perfs["OOC and/or Tool"][frame_type] = {
                "AP": ap, "AP50": ap50, "AP75": ap75,
            }

        if return_eval_dicts:
            fr_image_ids = [x["image_id"] for x in curr_frame_type_anns]
            fr_anns = curr_frame_type_anns
            fr_images = [x for x in original_coco_gt["images"] if x["id"] in fr_image_ids]
            fr_coco_gt = {
                "info": original_coco_gt["info"],
                "licenses": original_coco_gt["licenses"],
                "categories": original_coco_gt["categories"],
                "images": fr_images,
                "annotations": fr_anns,
            }
            all_coco_gts[frame_type] = fr_coco_gt
            curr_coco_eval.params.imgIds = fr_image_ids
            curr_coco_eval._paramsEval.imgIds = fr_image_ids
            all_coco_evals[frame_type] = curr_coco_eval

    header_str = "Pre                   & PNR                   & Post                 "
    ap_str     = "AP    & AP50  & AP75  & AP    & AP50  & AP75  & AP    & AP50  & AP75 "
    perf_str   = "{:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f}"
    
    if True:
        for obj_type in sorted(all_perfs):
            print()
            print("---------- For {} ----------".format(obj_type))
            perf_nums = []
            for fr in ["pre", "pnr", "post"]:
                frame_type = "{}_frame".format(fr)
                curr_perfs = all_perfs[obj_type][frame_type]
                perf_nums.append(curr_perfs["AP"]*100)
                perf_nums.append(curr_perfs["AP50"]*100)
                perf_nums.append(curr_perfs["AP75"]*100)
            print(header_str)
            print(ap_str)
            print(perf_str.format(*perf_nums))
            
            if markdown:
                print("-"*50)
                header_str = "| **Pre** | **Pre** | **Pre** | **PNR** | **PNR** | **PNR** | **Post** | **Post** | **Post** |"
                column_str = "| :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | "
                ap_str     = "| **AP** | **AP50** | **AP75**  | **AP** | **AP50** | **AP75** | **AP** | **AP50** | **AP75** |"
                perf_str   = "| {:.3f} | {:.3f} | {:.3f} | {:.3f} | {:.3f} | {:.3f} | {:.3f} | {:.3f} | {:.3f} |"
                print(header_str)
                print(column_str)
                print(ap_str)
                print(perf_str.format(*perf_nums))
        pass

    if return_eval_dicts:
        return all_coco_evals, all_coco_gts
    pass


def get_mean_std_of_results(s, mode="markdown"):
    assert mode in ["markdown", "latex"]
    all_nums = []
    for si in s:
        nums = [float(x.strip()) for x in si.split("&")]
        all_nums.append(nums)
    all_nums = np.asarray(all_nums)
    avg_nums = np.mean(all_nums, axis=0)
    std_nums = np.std(all_nums, axis=0)

    if mode == "latex":
        header_str = "Pre                   & PNR                   & Post                 "
        ap_str     = "AP    & AP50  & AP75  & AP    & AP50  & AP75  & AP    & AP50  & AP75 "
        mean_str   = "{:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f}"
        std_str    = "{:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f}"
        print(header_str)
        print(ap_str)
        print(mean_str.format(*avg_nums))
        print(std_str.format(*std_nums))
    else:
        header_str = "|           | **Pre** | **Pre** | **Pre** | **PNR** | **PNR** | **PNR** | **Post** | **Post** | **Post** |"
        column_str = "| :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | "
        ap_str     = "|           | **AP** | **AP50** | **AP75**  | **AP** | **AP50** | **AP75** | **AP** | **AP50** | **AP75** |"
        mean_str   = "| Mean | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} |"
        std_str    = "| Std    | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} | {:.2f} |"
        print(header_str)
        print(column_str)
        print(ap_str)
        print(mean_str.format(*avg_nums))
        print(std_str.format(*std_nums))

    return None

## DATA

In [11]:
annots_root = "/local1/telinwu/research/resources/Ego4D/v1/annotations"

scod_train_file = "fho_scod_train.json"
scod_train_file = os.path.join(annots_root, scod_train_file)

scod_val_file = "fho_scod_val.json"
scod_val_file = os.path.join(annots_root, scod_val_file)

scod_train_clips = json.load(open(scod_train_file))["clips"]
scod_val_clips = json.load(open(scod_val_file))["clips"]

scod_clips = scod_train_clips + scod_val_clips

print("Train SCOD clips: {}".format(len(scod_train_clips)))
print("Val   SCOD clips: {}".format(len(scod_val_clips)))
print("Total SCOD clips: {}".format(len(scod_clips)))

Train SCOD clips: 19071
Val   SCOD clips: 12801
Total SCOD clips: 31872


# Results

### GPT+Conds.+Desc.

In [12]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_no_comma_defs_new_caps"
    "/eval_at_40K_no_drop_nulls/eval/model_0040000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3817
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5761
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3947
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4373
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6353
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6689
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3827
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5913
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.4060
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4262
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.5187
 Average Recall     (AR) @[ IoU

In [23]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_no_comma_defs_new_caps"
    "/eval_at_45K_no_drop_nulls/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3793
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5691
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3950
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4387
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6350
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6658
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3888
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5952
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.4131
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4328
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.5165
 Average Recall     (AR) @[ IoU

In [135]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_no_comma_defs_new_caps_seed42"
    "/eval/model_0040000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3818
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5761
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3975
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4414
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6363
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6688
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3843
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5915
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.4060
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4304
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.5206
 Average Recall     (AR) @[ IoU

In [44]:
s = [
    "38.40 & 57.43 & 40.09 & 40.00 & 60.54 & 41.50 & 35.81 & 52.86 & 37.61",
    "38.17 & 57.61 & 39.47 & 40.04 & 61.00 & 41.30 & 35.39 & 52.95 & 36.92",
    "37.93 & 56.91 & 39.50 & 39.72 & 60.25 & 40.93 & 35.13 & 52.19 & 36.54",
]
get_mean_std_of_results(s, mode="latex")
print("-"*50)
get_mean_std_of_results(s, mode="markdown")

Pre                   & PNR                   & Post                 
AP    & AP50  & AP75  & AP    & AP50  & AP75  & AP    & AP50  & AP75 
38.17 & 57.32 & 39.69 & 39.92 & 60.60 & 41.24 & 35.44 & 52.67 & 37.02
0.19 & 0.30 & 0.29 & 0.14 & 0.31 & 0.24 & 0.28 & 0.34 & 0.44
--------------------------------------------------
|           | **Pre** | **Pre** | **Pre** | **PNR** | **PNR** | **PNR** | **Post** | **Post** | **Post** |
| :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | 
|           | **AP** | **AP50** | **AP75**  | **AP** | **AP50** | **AP75** | **AP** | **AP50** | **AP75** |
| Mean | 38.17 | 57.32 | 39.69 | 39.92 | 60.60 | 41.24 | 35.44 | 52.67 | 37.02 |
| Std    | 0.19 | 0.30 | 0.29 | 0.14 | 0.31 | 0.24 | 0.28 | 0.34 | 0.44 |


### Full-Instr.

In [51]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/full_sentence/val_scod_all_frames_narrated_full_sentence.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/full_sentence/ego4d_scod_all_frames_narrated_full_sentence"
    "/eval_at_45K/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    verbose=False,
)

---------- pre_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3341
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5274
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3458
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.3895
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.5868
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6399

---------- pnr_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3475
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5585
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3577
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.3959
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.5842
 Average Recall     (AR) @[ IoU=0.50:0.95 | 

In [75]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/full_sentence/val_scod_all_frames_narrated_full_sentence.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/full_sentence/ego4d_scod_all_frames_narrated_full_sentence"
    "/eval_at_30K/eval/model_0030000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    verbose=False,
)

---------- pre_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3335
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5262
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3421
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.3954
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.5917
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6369

---------- pnr_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3507
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5609
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3574
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4051
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.5940
 Average Recall     (AR) @[ IoU=0.50:0.95 | 

In [76]:
s = [
    "32.45 & 51.62 & 33.34 & 33.78 & 54.44 & 34.49 & 31.30 & 49.23 & 32.42",
    "33.41 & 52.74 & 34.58 & 34.75 & 55.85 & 35.77 & 30.71 & 47.80 & 32.21",
    "33.35 & 52.62 & 34.21 & 35.07 & 56.09 & 35.74 & 30.68 & 47.84 & 31.73",
]
get_mean_std_of_results(s, mode="latex")
print("-"*50)
get_mean_std_of_results(s, mode="markdown")

Pre                   & PNR                   & Post                 
AP    & AP50  & AP75  & AP    & AP50  & AP75  & AP    & AP50  & AP75 
33.07 & 52.33 & 34.04 & 34.53 & 55.46 & 35.33 & 30.90 & 48.29 & 32.12
0.44 & 0.50 & 0.52 & 0.55 & 0.73 & 0.60 & 0.29 & 0.66 & 0.29
--------------------------------------------------
|           | **Pre** | **Pre** | **Pre** | **PNR** | **PNR** | **PNR** | **Post** | **Post** | **Post** |
| :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | 
|           | **AP** | **AP50** | **AP75**  | **AP** | **AP50** | **AP75** | **AP** | **AP50** | **AP75** |
| Mean | 33.07 | 52.33 | 34.04 | 34.53 | 55.46 | 35.33 | 30.90 | 48.29 | 32.12 |
| Std    | 0.44 | 0.50 | 0.52 | 0.55 | 0.73 | 0.60 | 0.29 | 0.66 | 0.29 |


### GPT.

In [39]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_wo_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_wo_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_wo_tool/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_wo_tool_drop_null_ooc"
    "/eval_at_40K/eval/model_0040000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    verbose=False,
)

---------- pre_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3724
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5560
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3883
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4330
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6389
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6748

---------- pnr_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3935
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5947
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.4034
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4443
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6370
 Average Recall     (AR) @[ IoU=0.50:0.95 | 

In [58]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v1_srl_arg1_wo_tool/val_scod_all_frames_narrated_gpt_v1_srl_arg1_wo_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_wo_tool/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_wo_tool_drop_null_ooc"
    "/eval_at_45K/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    verbose=False,
)

---------- pre_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3701
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5518
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3862
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4341
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6326
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6630

---------- pnr_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3906
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5891
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.4006
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4450
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6338
 Average Recall     (AR) @[ IoU=0.50:0.95 | 

In [59]:
s = [
    "37.46 & 56.05 & 38.96 & 39.07 & 59.17 & 40.13 & 34.77 & 51.34 & 36.35",
    "37.24 & 55.60 & 38.83 & 39.35 & 59.47 & 40.34 & 34.72 & 51.14 & 36.58",
    "37.01 & 55.18 & 38.62 & 39.06 & 58.91 & 40.06 & 34.77 & 51.06 & 36.65",
]
get_mean_std_of_results(s, mode="latex")
print("-"*50)
get_mean_std_of_results(s, mode="markdown")

Pre                   & PNR                   & Post                 
AP    & AP50  & AP75  & AP    & AP50  & AP75  & AP    & AP50  & AP75 
37.24 & 55.61 & 38.80 & 39.16 & 59.18 & 40.18 & 34.75 & 51.18 & 36.53
0.18 & 0.36 & 0.14 & 0.13 & 0.23 & 0.12 & 0.02 & 0.12 & 0.13
--------------------------------------------------
|           | **Pre** | **Pre** | **Pre** | **PNR** | **PNR** | **PNR** | **Post** | **Post** | **Post** |
| :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | 
|           | **AP** | **AP50** | **AP75**  | **AP** | **AP50** | **AP75** | **AP** | **AP50** | **AP75** |
| Mean | 37.24 | 55.61 | 38.80 | 39.16 | 59.18 | 40.18 | 34.75 | 51.18 | 36.53 |
| Std    | 0.18 | 0.36 | 0.14 | 0.13 | 0.23 | 0.12 | 0.02 | 0.12 | 0.13 |


### GPT+Conds.

In [49]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v3_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v3_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v3_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v3_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_new_condstr"
    "/eval_at_40K/eval/model_0040000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    fully_narrated=False,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3849
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5731
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3989
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4419
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6439
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6777
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.4304
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.6502
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.4514
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4843
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.5845
 Average Recall     (AR) @[ IoU

In [71]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v3_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v3_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v3_v_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v3_v_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_new_condstr"
    "/eval_at_45K/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    fully_narrated=False,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3852
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5749
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3993
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4430
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6430
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6759
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.4343
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.6642
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.4526
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4907
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.5964
 Average Recall     (AR) @[ IoU

In [72]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/gpt_v3_srl_arg1_with_tool/val_scod_all_frames_narrated_gpt_v3_srl_arg1_with_tool.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v3_srl_arg1_with_tool/ego4d_scod_all_frames_narrated_gpt_v3_srl_arg1_with_tool_drop_null_ooc_tool_symb_conds_mask_new_condstr"
    "/eval_at_35K/eval/model_0035000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    top_k=None,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    return_eval_dicts=False,
    separate_classes=True,
    fully_narrated=False,
    verbose=False,
)

---------- pre_frame ----------
----- object_of_change -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3810
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5696
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3968
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4348
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6412
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6786
----- tool -----
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.4321
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.6496
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.4459
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4801
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.5865
 Average Recall     (AR) @[ IoU

In [74]:
s = [
    "38.65 & 57.55 & 40.16 & 40.19 & 60.39 & 41.56 & 35.40 & 52.15 & 37.11",
    "38.49 & 57.31 & 39.89 & 40.12 & 60.38 & 41.56 & 35.34 & 52.00 & 37.03",
    "38.52 & 57.49 & 39.93 & 39.99 & 60.51 & 41.10 & 35.35 & 52.23 & 37.24",
    "38.10 & 56.96 & 39.68 & 39.67 & 60.26 & 41.17 & 35.02 & 51.86 & 36.66",
]
get_mean_std_of_results(s, mode="latex")
print("-"*50)
get_mean_std_of_results(s, mode="markdown")

Pre                   & PNR                   & Post                 
AP    & AP50  & AP75  & AP    & AP50  & AP75  & AP    & AP50  & AP75 
38.44 & 57.33 & 39.91 & 39.99 & 60.38 & 41.35 & 35.28 & 52.06 & 37.01
0.21 & 0.23 & 0.17 & 0.20 & 0.09 & 0.21 & 0.15 & 0.14 & 0.22
--------------------------------------------------
|           | **Pre** | **Pre** | **Pre** | **PNR** | **PNR** | **PNR** | **Post** | **Post** | **Post** |
| :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | 
|           | **AP** | **AP50** | **AP75**  | **AP** | **AP50** | **AP75** | **AP** | **AP50** | **AP75** |
| Mean | 38.44 | 57.33 | 39.91 | 39.99 | 60.38 | 41.35 | 35.28 | 52.06 | 37.01 |
| Std    | 0.21 | 0.23 | 0.17 | 0.20 | 0.09 | 0.21 | 0.15 | 0.14 | 0.22 |


### GT-SRL-ARG1

In [144]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/full_sentence/val_scod_all_frames_narrated_full_sentence.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/full_sentence/ego4d_scod_all_frames_narrated_full_sentence"
    "/eval_at_50K/eval/model_0050000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    verbose=False,
)

---------- pre_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3702
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5645
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3824
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4241
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6257
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6619

---------- pnr_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3860
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5957
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3970
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4350
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6232
 Average Recall     (AR) @[ IoU=0.50:0.95 | 

In [147]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    "/full_sentence/val_scod_all_frames_narrated_full_sentence.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gpt_v1_srl_arg1_wo_tool_no_stopwords/ego4d_scod_all_frames_narrated_gpt_v1_srl_arg1_wo_tool_no_stopwords_symb_conds"
    "/eval_at_40K/eval/model_0040000/inference/narrated_ego4d_test/bbox.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    verbose=False,
)

---------- pre_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3746
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5605
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.3896
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4342
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6410
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.6758

---------- pnr_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.3907
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.5917
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.4013
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.4427
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.6382
 Average Recall     (AR) @[ IoU=0.50:0.95 | 

In [148]:
s = [
    "37.87 & 56.35 & 39.55 & 39.64 & 59.41 & 40.73 & 34.97 & 51.34 & 36.69",
    "37.02 & 56.45 & 38.24 & 38.60 & 59.57 & 39.70 & 34.20 & 51.27 & 35.72",
    "37.46 & 56.05 & 38.96 & 39.07 & 59.17 & 40.13 & 34.77 & 51.34 & 36.35",
]
get_mean_std_of_results(s, mode="latex")
print("-"*50)
get_mean_std_of_results(s, mode="markdown")

Pre                   & PNR                   & Post                 
AP    & AP50  & AP75  & AP    & AP50  & AP75  & AP    & AP50  & AP75 
37.45 & 56.28 & 38.92 & 39.10 & 59.38 & 40.19 & 34.65 & 51.32 & 36.25
0.35 & 0.17 & 0.54 & 0.43 & 0.16 & 0.42 & 0.33 & 0.03 & 0.40
--------------------------------------------------
|           | **Pre** | **Pre** | **Pre** | **PNR** | **PNR** | **PNR** | **Post** | **Post** | **Post** |
| :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | 
|           | **AP** | **AP50** | **AP75**  | **AP** | **AP50** | **AP75** | **AP** | **AP50** | **AP75** |
| Mean | 37.45 | 56.28 | 38.92 | 39.10 | 59.38 | 40.19 | 34.65 | 51.32 | 36.25 |
| Std    | 0.35 | 0.17 | 0.54 | 0.43 | 0.16 | 0.42 | 0.33 | 0.03 | 0.40 |


## Markdown Ver. (OpenReview)

| **Model** |           | **Pre** | **Pre** | **Pre** | **PNR** | **PNR** | **PNR** | **Post** | **Post** | **Post** |
| :-----:| :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | 
|           |           | **AP** | **AP50** | **AP75**  | **AP** | **AP50** | **AP75** | **AP** | **AP50** | **AP75** |
| Full-Instr. | Mean | 33.07 | 52.33 | 34.04 | 34.53 | 55.46 | 35.33 | 30.90 | 48.29 | 32.12 |
| Full-Instr.  | Std. | 0.44 | 0.50 | 0.52 | 0.55 | 0.73 | 0.60 | 0.29 | 0.66 | 0.29 |
| GPT | Mean | 37.24 | 55.61 | 38.80 | 39.16 | 59.18 | 40.18 | 34.75 | 51.18 | 36.53 |
| GPT  | Std. | 0.18 | 0.36 | 0.14 | 0.13 | 0.23 | 0.12 | 0.02 | 0.12 | 0.13 |
| GT-SRL-ARG1 | Mean | 37.45 | 56.28 | 38.92 | 39.10 | 59.38 | 40.19 | 34.65 | 51.32 | 36.25 |
| GT-SRL-ARG1  | Std. | 0.35 | 0.17 | 0.54 | 0.43 | 0.16 | 0.42 | 0.33 | 0.03 | 0.40 |
| GPT+Conds. | Mean | 38.44 | 57.33 | 39.91 | 39.99 | 60.38 | 41.35 | 35.28 | 52.06 | 37.01 |
| GPT+Conds. | Std. | 0.21 | 0.23 | 0.17 | 0.20 | 0.09 | 0.21 | 0.15 | 0.14 | 0.22 |
| GPT+Conds.+Desc. | Mean | 38.17 | 57.32 | 39.69 | 39.92 | 60.60 | 41.24 | 35.44 | 52.67 | 37.02 |
| GPT+Conds.+Desc. | Std. | 0.19 | 0.30 | 0.29 | 0.14 | 0.31 | 0.24 | 0.28 | 0.34 | 0.44 |

# END

# Data for Voting Baseline

In [110]:
from collections import OrderedDict

In [None]:
from tqdm import tqdm

annots_root = "/local1/telinwu/research/resources/Ego4D/v1/annotations"

scod_train_file = "fho_scod_train.json"
scod_train_file = os.path.join(annots_root, scod_train_file)

scod_val_file = "fho_scod_val.json"
scod_val_file = os.path.join(annots_root, scod_val_file)

scod_train_clips = json.load(open(scod_train_file))["clips"]
scod_val_clips = json.load(open(scod_val_file))["clips"]

scod_clips = scod_train_clips + scod_val_clips

print("Train SCOD clips: {}".format(len(scod_train_clips)))
print("Val   SCOD clips: {}".format(len(scod_val_clips)))
print("Total SCOD clips: {}".format(len(scod_clips)))

scod_clips_split = {
    "train": scod_train_clips,
    "val":   scod_val_clips,
}

In [101]:
def create_voting_dataset(
    scod_clips_split,
    split="train",
    output_folder=None,
):
    scod_clips = scod_clips_split[split]
    dataset = {}
    for scod_clip in tqdm(scod_clips):
        video_uid = scod_clip["video_uid"]
        for fr in ["pre", "pnr", "post"]:
            datum = []
            frame = scod_clip["{}_frame".format(fr)]
            frame_number = frame["frame_number"]
            height = frame["height"]
            width = frame["width"]
            bboxes_info = frame["bbox"]
            ooc_bbox = None
            for bbox_info in bboxes_info:
                # pprint.pprint(bbox_info)
                object_type = bbox_info["object_type"]
                structured_noun = bbox_info["structured_noun"]
                bbox = bbox_info["bbox"]
                x1, y1 = bbox["x"], bbox["y"]
                x2, y2 = bbox["x"]+bbox["width"], bbox["y"]+bbox["height"]
                bbox_dict = {
                    "x1": x1 / width,
                    "y1": y1 / height,
                    "x2": x2 / width,
                    "y2": y2 / height,
                }
                if "hand" in object_type:
                    if object_type == "left_hand":
                        hand_side = "l"
                    elif object_type == "right_hand":
                        hand_side = "r"
                    bbox_dict["contact_state"] = 1
                    bbox_dict["hand_side"] = hand_side
                    bbox_dict["width"] = width
                    bbox_dict["height"] = height
                    datum.append(bbox_dict)
                elif object_type == "object_of_change" and ooc_bbox is None:
                    ooc_bbox = bbox_dict
                else:
                    pass # Do nothing.
            if ooc_bbox is not None:
                for d in datum:
                    d["obj_bbox"] = ooc_bbox
            else:
                # pprint.pprint(frame)
                # raise
                continue # Do not add null ouc bbox.
            # pprint.pprint(datum)
            file_name = "{}/{}.jpg".format(video_uid, frame_number)
            # print(file_name)
            dataset[file_name] = datum
            # pprint.pprint(dataset)
            # raise
    print("Split {} -- Data count: {}".format(split, len(dataset)))
    return dataset

train_dataset = create_voting_dataset(
    scod_clips_split,
    split="train",
    output_folder="/local1/telinwu/research/resources/Ego4D/voting_baseline"
)
json.dump(
    train_dataset,
    open(
        "/local1/telinwu/research/resources/Ego4D/voting_baseline/"
        "ego4d_gt_hand_no_tool_drop_null_ouc_0823_train.json"
        , "w"
    ),
    indent=4,
)

val_dataset = create_voting_dataset(
    scod_clips_split,
    split="val",
    output_folder="/local1/telinwu/research/resources/Ego4D/voting_baseline"
)
json.dump(
    val_dataset,
    open(
        "/local1/telinwu/research/resources/Ego4D/voting_baseline/"
        "ego4d_gt_hand_no_tool_drop_null_ouc_0823_val.json"
        , "w"
    ),
    indent=4,
)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 19071/19071 [00:00<00:00, 28477.78it/s]


Split train -- Data count: 53924


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 12801/12801 [00:00<00:00, 116590.92it/s]


Split val -- Data count: 36358


In [150]:
gt_val_od_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge"
    "/paper_data/all_frames/od/val_scod_all_frames_od.json"
)

gt_val_od = json.load(open(gt_val_od_file))
gt_val_od_images = gt_val_od["images"]
mapping = {}
for img in gt_val_od_images:
    file_name = img["file_name"]
    if file_name not in mapping:
        mapping[file_name] = img["id"]
json.dump(
    mapping,
    open("/local1/telinwu/research/resources/Ego4D/voting_baseline/fileName2ImageID.json", "w"),
    indent=4,
)

In [165]:
voting_pred_file = (
    # "/local1/bryanzhou008/jarvis/clean_rebuttal/project_jarvis/baselines"
    # "/SequentialVotingDet/bryan_res/doh100_zero_shot_ego4d.json"
    # "/local1/telinwu/research/resources/Ego4D/voting_baseline/incomplete_preds.json"
    "/local1/bryanzhou008/jarvis/clean_rebuttal/project_jarvis/baselines"
    # "/SequentialVotingDet/bryan_res/new_2/epoch_3.json"
    "/SequentialVotingDet/bryan_res/new_2/epoch_30.json"
)
voting_input_file = (
    "/local1/telinwu/research/resources/Ego4D/voting_baseline"
    "/ego4d_gt_hand_no_tool_drop_null_ouc_0823_val.json"
)
mapping_file = (
    "/local1/telinwu/research/resources/Ego4D/voting_baseline/fileName2ImageID.json"
)
  
"""
# convert to Ordereddict
data = json.JSONDecoder(object_pairs_hook=OrderedDict).decode(jsonFile)
print(data)
"""

mappings = json.load(open(mapping_file))

voting_inputs = json.load(
    open(voting_input_file), 
    # object_pairs_hook=OrderedDict
)
voting_preds = json.load(open(voting_pred_file))
voting_preds_dict = {}
for voting_pred in voting_preds:
    image_id = voting_pred["image_id"]
    if image_id not in voting_preds_dict:
        voting_preds_dict[image_id] = [voting_pred]
    else:
        voting_preds_dict[image_id].append(voting_pred)
print(len(voting_preds), len(voting_preds_dict))
cnt = 0
mapped_image_ids = []
for file_name in voting_inputs:
    image_id = mappings[file_name]
    mapped_image_ids.append(image_id)
for i in sorted(voting_preds_dict):
    for j in range(len(voting_preds_dict[i])):
        voting_preds_dict[i][j]["image_id"] = mapped_image_ids[i]
voting_preds_all = []
for x in sorted(voting_preds_dict):
    voting_preds_all += voting_preds_dict[x]
print(len(voting_preds))

json.dump(
    voting_preds,
    open("/local1/telinwu/research/resources/Ego4D/voting_baseline/voting_preds_v1.json", "w"),
    indent=4,
)

62562 35508
62562


In [229]:
# mappings
voting_input_file_raw_id = (
    "/local1/bryanzhou008/jarvis/clean_rebuttal/project_jarvis/baselines"
    # "/SequentialVotingDet/bryan_res/new/epoch_15.json"
    "/SequentialVotingDet/bryan_res/new/epoch_30.json"
)
voting_inputs_raw_id = json.load(open(voting_input_file_raw_id))
for image in voting_inputs_raw_id:
    image["image_id"] = mappings[image["image_id"]]

json.dump(
    voting_preds,
    open("/local1/telinwu/research/resources/Ego4D/voting_baseline/voting_preds_v1.json", "w"),
    indent=4,
)

In [230]:
coco_gt_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge"
    "/paper_data/all_frames/od/val_scod_all_frames_od.json"
    # "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_data/all_frames/narrated"
    # "/full_sentence/val_scod_all_frames_narrated_full_sentence.json"
)
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/voting_baseline/voting_preds_v1.json"
    # "/local1/bryanzhou008/jarvis/clean_rebuttal/project_jarvis/baselines"
    # ""/SequentialVotingDet/bryan_res/new_2/meccano_and_ego_priors.json"
    # "/local1/bryanzhou008/jarvis/clean_rebuttal/project_jarvis/baselines"
    # "/SequentialVotingDet/bryan_res/new_2/epoch_30.json"
)
forced_frame_types = ["pre", "pnr", "post"]
ignore_summaries = {
    "area": ["small", "medium", "large"],
}

comprehensive_ego4d_scod_coco_results(
    coco_gt_file=coco_gt_file,
    coco_pred_file=coco_pred_file,
    scod_clips=scod_val_clips,
    forced_frame_types=forced_frame_types,
    ignore_summaries=ignore_summaries,
    top_k=None,
    fully_narrated=False,
    verbose=False,
    markdown=True,
    iou_stats=True,
)

---------- pre_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.0001
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.0002
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.0000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.0013
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.0014
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.0014
Number of ious: 21179
Mean-IOUs: 0.048471741109324444  Std-IOUs: 0.0976610173430553
[0.  0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1. ]
[17606  1841   916   460   232    82    31     9     2     0]
[8.31295151e+01 8.69257283e+00 4.32503895e+00 2.17196279e+00
 1.09542471e+00 3.87175976e-01 1.46371406e-01 4.24949242e-02
 9.44331649e-03 0.00000000e+00]

---------- pnr_frame ----------
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.0020
 Average Precision  (A

- Mean and std of IOUs
| **Pre** | **Pre** | **PNR** | **PNR** | **Post** | **Post** |
| :-----: | :-----: | :-----: | :-----: | :-----: | :-----: |
| **Mean** | **Std** | **Mean**  | **Std** | **Mean** | **Std** |
| 0.0484 | 0.0976 | 0.0488 | 0.0993 | 0.0478 | 0.0967 |

- Histogram (%)
| **0.0-0.1**| **0.1-0.2** | **0.2-0.3** | **0.3-0.4** | **0.4-0.5** | **0.5-0.6** | **0.6-0.7** | **0.7-0.8** | **0.8-0.9** | **0.9-1.0** |
| :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: |
| 83.16 | 8.64 | 4.31 | 2.09 | 1.21 | 0.36 | 0.21 | 0.07 | 0.02 | 0.00 |

- Performance (%)
| **Pre** | **Pre** | **Pre** | **PNR** | **PNR** | **PNR** | **Post** | **Post** | **Post** |
| :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | 
| **AP** | **AP50** | **AP75**  | **AP** | **AP50** | **AP75** | **AP** | **AP50** | **AP75** |
| 0.008 | 0.021 | 0.000 | 0.198 | 0.330 | 0.330 | 0.005 | 0.026 | 0.001 |

In [131]:
coco_pred_file = (
    "/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated"
    "/gt_srl_arg1/ego4d_scod_all_frames_narrated_gt_srl_arg1_seed42"
    "/eval_at_45K/eval/model_0045000/inference/narrated_ego4d_test/bbox.json"
)
print(coco_pred_file)

/local1/telinwu/research/resources/Ego4D/ego4d_scod_challenge/paper_models/all_frames/narrated/gt_srl_arg1/ego4d_scod_all_frames_narrated_gt_srl_arg1_seed42/eval_at_45K/eval/model_0045000/inference/narrated_ego4d_test/bbox.json
